In [3]:
import math
import pickle
import geopandas as gpd
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error


def getPathLength(lat1,lng1,lat2,lng2):
    '''calculates the distance between two lat, long coordinate pairs'''
    R = 6371000 # radius of earth in m
    lat1rads = math.radians(lat1)
    lat2rads = math.radians(lat2)
    deltaLat = math.radians((lat2-lat1))
    deltaLng = math.radians((lng2-lng1))
    a = math.sin(deltaLat/2) * math.sin(deltaLat/2) + math.cos(lat1rads) * math.cos(lat2rads) * math.sin(deltaLng/2) * math.sin(deltaLng/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = R * c
    return d

def getDestinationLatLong(lat,lng,azimuth,distance):
    '''returns the lat an long of destination point 
    given the start lat, long, aziuth, and distance'''
    R = 6378.1 #Radius of the Earth in km
    brng = math.radians(azimuth) #Bearing is degrees converted to radians.
    d = distance/1000 #Distance m converted to km
    lat1 = math.radians(lat) #Current dd lat point converted to radians
    lon1 = math.radians(lng) #Current dd long point converted to radians
    lat2 = math.asin(math.sin(lat1) * math.cos(d/R) + math.cos(lat1)* math.sin(d/R)* math.cos(brng))
    lon2 = lon1 + math.atan2(math.sin(brng) * math.sin(d/R)* math.cos(lat1), math.cos(d/R)- math.sin(lat1)* math.sin(lat2))
    #convert back to degrees
    lat2 = math.degrees(lat2)
    lon2 = math.degrees(lon2)
    return[lat2, lon2]

def calculateBearing(lat1,lng1,lat2,lng2):
    '''calculates the azimuth in degrees from start point to end point'''
    startLat = math.radians(lat1)
    startLong = math.radians(lng1)
    endLat = math.radians(lat2)
    endLong = math.radians(lng2)
    dLong = endLong - startLong
    dPhi = math.log(math.tan(endLat/2.0+math.pi/4.0)/math.tan(startLat/2.0+math.pi/4.0))
    if abs(dLong) > math.pi:
         if dLong > 0.0:
             dLong = -(2.0 * math.pi - dLong)
         else:
             dLong = (2.0 * math.pi + dLong)
    bearing = (math.degrees(math.atan2(dLong, dPhi)) + 360.0) % 360.0;
    return bearing

def coords_src_dest(interval,azimuth,lat1,lng1,lat2,lng2):
    '''returns every coordinate pair inbetween two coordinate 
    pairs given the desired interval'''

    d = getPathLength(lat1,lng1,lat2,lng2)
    remainder, dist = math.modf((d / interval))
    counter = float(interval)
    coords = []
    coords.append([lat1,lng1])
    for distance in range(0,int(dist)):
        coord = getDestinationLatLong(lat1,lng1,azimuth,counter)
        counter = counter + float(interval)
        coords.append(coord)
    coords.append([lat2,lng2])
    return coords

def locs_to_avgspeed(lat1, lng1, lat2, lng2, model):
    interval = 25
    azimuth = calculateBearing(lat1, lng1, lat2, lng2)
    coords =  coords_src_dest(interval, azimuth, lat1, lng1, lat2, lng2)
    speeds = float()
    for lat, lng in coords:
        speeds = speeds + model.predict([[math.radians(lat),math.radians(lng)]])
    return float(speeds / len(coords))

def prediction(dfInput, model1, model2): 
    ETT = []
    distances = []
    speeds = []
    Numpy_Vectorized_df = dfInput.to_numpy()
    for row in Numpy_Vectorized_df:
        speed = locs_to_avgspeed(row[0], row[1], row[2], row[3], model1)
        distance = getPathLength(row[0], row[1], row[2], row[3]) / 1000
        time = distance * 60 / speed
        time_updated = model2.predict([[speed, distance, time]])[0]
        ETT.append(time_updated)
        distances.append(distance)
        speeds.append(speed)
    return ETT, distances, speeds

df = pd.read_parquet('./../data/BMTC.parquet.gzip', engine='pyarrow')
dfInput = pd.read_csv('./../data/Input.csv')
dfGroundTruth = pd.read_csv('./../data/GroundTruth.csv') 
    

def EstimatedTravelTime(df, dfInput): # The output of this function will be evaluated
    # Function body - Begins
    model1 = pickle.load(open('Loc_Speed.sav', 'rb'))
    model2 = pickle.load(open('Speeds_Dist_ETT.sav', 'rb'))
    ETT, distances, speeds = prediction(dfInput, model1, model2)
    dfOutput = pd.DataFrame(ETT, columns =['ETT'])
    # Function body - Ends
    return dfOutput 

In [4]:
dfInput = dfInput.loc[:, ~dfInput.columns.str.contains('^Unnamed')]
dfOutput = EstimatedTravelTime(df, dfInput)

In [5]:
print("MAE:", mean_absolute_error(dfGroundTruth['TT'], dfOutput['ETT']))
print("MSE:", mean_squared_error(dfGroundTruth['TT'], dfOutput['ETT']))
print("RMSE:", math.sqrt(mean_squared_error(dfGroundTruth['TT'], dfOutput['ETT'])))

MAE: 6.605919842226036
MSE: 78.25485732734043
RMSE: 8.846177554590481


In [9]:
dfGroundTruth['TT']

0        2.833333
1        1.500000
2       21.250000
3        2.000000
4       35.733333
          ...    
1200    27.233333
1201    26.750000
1202    53.600000
1203    59.616667
1204    59.816667
Name: TT, Length: 1205, dtype: float64

In [8]:
dfOutput['ETT']

0        4.124706
1        1.419898
2       10.402947
3        1.546715
4       34.661196
          ...    
1200    24.179794
1201    30.715755
1202    42.972197
1203    48.590186
1204    61.210662
Name: ETT, Length: 1205, dtype: float64