In [1]:
import os
import glob
import pickle
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from geopy.distance import geodesic
from library.preprocessing import distance
%matplotlib inline

# Looping for all test CSVs

In [2]:
all_folders=glob.glob("./Trails/*/*/")

test_dates=['2019-06-28','2019-06-30','2019-07-01','2019-07-02','2019-07-03','2019-07-04','2019-07-05']
formatted_test_dates=['-'.join(d.split("-")[::-1]) for d in test_dates] #reversing order

get_date=lambda fname:fname.split("\\")[-2].split("_")[0]
get_format_date=lambda fname: '-'.join(get_date(fname).split("-")[::-1])


test_pred_files=[]
for date in test_dates:
    test_pred_files.extend(glob.glob("./Trails/*/*/pred_time_from_prev*{}.csv".format(date)))

test_gps_files=[]
for pred_file in test_pred_files:
    test_gps_files.append(pred_file.split("\\p")[0]+"\\GPS.csv")
    
test_mu_files=[]
for pred_file in test_pred_files:
    test_mu_files.append(glob.glob(pred_file.split("\\p")[0]+"\\mu_serv*.csv")[0])

test_file_pairs=list(zip(test_gps_files,test_pred_files,test_mu_files))

In [3]:
def read_necessary_stop_info(file_pair):
    trail_type="up" if "up" in file_pair[1] else "down"
    df_pred=pd.read_csv(file_pair[1])
    df_mu=pd.read_csv(file_pair[2])
    df_mu["ETA"]=df_pred.predicted_start_time
    df_mu["Err"]=df_pred.error_in_min
    df_mu["trail"]=trail_type
    df_mu['speed']=df_pred.speed
    cols=['trail','lat', 'long','speed','start_time','ETA','Err','stay_duration',
      'estimated_stay_duration','Is_Bus_stop','Is_Turn','Is_Signal','Is_Congestion','Is_Adhoc']
    return df_mu[cols].copy()

def filter_out_bs(lat,long,df_stop):
    for d in df_stop.values:
        dic=dict(zip(df_stop.columns,d))
        if distance(lat,long,dic["lat"],dic["long"])<30:
            return(dic)
    return(dict(zip(df_stop.columns,[np.nan]*df_stop.shape[1])))


def geodistance(pointA, pointB):
    return geodesic(pointA, pointB).meters

def get_Sec_from_datetime(dt):
    timeStamp=pd.to_datetime(dt,format="%m/%d/%Y %H:%M:%S")
    return timeStamp.timestamp()

def get_time_plus_delta(TIME,DELTA):
    return str((pd.to_datetime(TIME)+datetime.timedelta(seconds=DELTA)).time())

def process_GPS(f_name):
    df=pd.read_csv(f_name)
    start_lat,start_long,start_time=df[['#lat','long','time']].iloc[0]

    next_hop_distance=[]
    time_elapsed=[]
    for next_lat,next_long,next_time in df[['#lat','long','time']].values:
        next_hop_distance.append(geodistance((start_lat,start_long),(next_lat,next_long)))
        time_elapsed.append(get_Sec_from_datetime(next_time)-get_Sec_from_datetime(start_time)+1e-9)
        start_lat,start_long,start_time=next_lat,next_long,next_time

    #next_hop_distance
    #time_elapsed
    df['next_hop_distance']=next_hop_distance
    df['time_elapsed']=time_elapsed
    df['start_time']=df.time.apply(lambda e:e.split(" ")[1])
    df['lat']=df['#lat']
    return df[['start_time','lat','long','next_hop_distance','time_elapsed']].copy()

def time_zone_cal(s):
    hour=int(s.split(':')[0])
    if 6<hour<=9:
        time_zone='Early_Morning'
    elif 9<hour<=12:
        time_zone='Morning'
    elif 12<hour<=17:
        time_zone='Afternoon'
    elif 17<hour<=23:
        time_zone='Evening'
    return time_zone

#Saving stay_duration_dict
with open('./logs/stay_duration.pickle', 'rb') as handle:
    stay_dic=pickle.load(handle)

In [4]:
#Main code
def get_BusStops(pair):
    df_stop=read_necessary_stop_info(pair)

    trail=df_stop.trail[0]
    if trail=='down':
        df_bs=pd.read_csv("./Labels/down/Bus_stop_down.csv",header=None)
    elif trail=='up':
        df_bs=pd.read_csv("./Labels/up/Bus_stop_up.csv",header=None)
    df_bs.columns=['b_lat','b_long','BS']

    BusStops=\
    pd.concat([df_bs,
               pd.DataFrame(df_bs[['b_lat','b_long']].apply(lambda e: filter_out_bs(e[0],e[1],df_stop),axis=1).values.tolist())],
             axis=1).drop(columns=['trail','lat','long','Is_Bus_stop',
                                  'Is_Turn','Is_Signal','Is_Congestion','Is_Adhoc'])
    BusStops.stay_duration.replace(np.nan, 0,inplace=True)
    BusStops.start_time.replace(np.nan,"--",inplace=True)

    gps_df=process_GPS(pair[0])

    #Recalculating.........
    cal_speed_on_past_min=20
    total_stay=0
    START_TIME=[];SPEED=[];DIS=[];ESTIMATED_STAY=[]
    prev_dis=0
    for index,(start_lat,start_long,start_stay_dur) in enumerate(BusStops[['b_lat','b_long','stay_duration']].values):
        dis=0
        for stime,lat,long,d in gps_df[['start_time','lat','long','next_hop_distance']].values:
            dis+=d
            if geodistance((lat,long),(start_lat,start_long))<=30:
                total_stay+=start_stay_dur
                prev_time=get_time_plus_delta(stime,(-cal_speed_on_past_min*60)) #past 20 min speed is calculated
                df_past_patch=gps_df[(gps_df.start_time<=stime)&(gps_df.start_time>=prev_time)].copy()
                speed=df_past_patch.next_hop_distance.sum()/(df_past_patch.time_elapsed.sum()-total_stay)
                if index==0:
                    speed={'Afternoon': 3.208857802457898,
                    'Early_Morning': 2.4823871880135058,
                    'Evening': 1.9810927189917138,
                    'Morning': 3.0719420700235105}[time_zone_cal(stime)]
                START_TIME.append(stime)
                SPEED.append(speed)
                ESTIMATED_STAY.append(stay_dic['Is_Bus_stop'][time_zone_cal(stime)])
                DIS.append(dis-prev_dis)
                prev_dis=dis
                break

    #Replacing NaN values

    BusStops['speed']=\
    BusStops.reset_index()[['index','speed']].apply(lambda e:SPEED[int(e[0])] if np.isnan(e[1]) else e[1],axis=1)

    BusStops['start_time']=\
    BusStops.reset_index()[['index','start_time']].apply(lambda e:START_TIME[int(e[0])] if e[1]=='--' else e[1],axis=1)

    BusStops['estimated_stay_duration']=ESTIMATED_STAY

    BusStops['distance_from_prev']=DIS
    return BusStops

In [5]:
def get_upper_triangular_matrix_df(dataframe):
    df=dataframe.copy()
    error_matrix=[]
    ETA_matrix=[]
    for start_bs in range(1,df.shape[0]+1): #1st bus stop

        start_time=df.iloc[start_bs-1].start_time
        stay_duration=df.iloc[start_bs-1].estimated_stay_duration

        error=[np.nan]*start_bs
        eta=['--']*start_bs

        for speed,next_start_time,estimated_stay_duration,prevDist in df[['speed',
                                                                    'start_time',
                                                                    'estimated_stay_duration',
                                                                    'distance_from_prev']].iloc[start_bs:].values.tolist():

            travel_speed=speed
            travel_time=round(prevDist/travel_speed) #sec
            estimated_reach_time=get_time_plus_delta(start_time,round(stay_duration+travel_time)) #timestamp

            c_error=round((pd.to_datetime(estimated_reach_time).timestamp()-pd.to_datetime(next_start_time).timestamp())/60,2)
            error.append(c_error)
            eta.append(estimated_reach_time)

            start_time=estimated_reach_time #start time fr next hope is estimated
            stay_duration=estimated_stay_duration # estimated stay will be added to compute the next

        error_matrix.append(error)
        ETA_matrix.append(eta)
        
        BS_names=dataframe[['BS','stay_duration']].apply(lambda e:e[0]+"(s)" if e[1]==0 else e[0],axis=1)

    return (pd.DataFrame(ETA_matrix,index=BS_names,columns=BS_names),
            pd.DataFrame(error_matrix,index=BS_names,columns=BS_names))

In [6]:
def get_ETA_and_ERROR(pair):
    BusStops=get_BusStops(pair)
    ETA,error=get_upper_triangular_matrix_df(BusStops)
    return ETA,error

plt.figure(figsize=(16,7))
sns.heatmap(error,annot=True)
plt.show()

BS_ETA_DICT=dict(zip(ETA.columns,ETA.T.values.tolist()))
BS_ETA_DICT

In [7]:
for i,pair in enumerate(test_file_pairs):
    base_name=pair[0].split("\\")[1]+"_"+pair[0].split("\\")[2]+f"_{i}"
    if os.path.exists(f"./report/BS_Matrices/ERR/{base_name}_ETA.csv")==True:
        print("Already processed",base_name)
        continue
    try:
        ETA,error=get_ETA_and_ERROR(pair)
        ETA.to_csv(f"./report/BS_Matrices/ERR/{base_name}_ETA.csv")
        error.to_csv(f"./report/BS_Matrices/ETA/{base_name}_ERR.csv")
        print("Saved for ",base_name)
    except:
        print("Error in processing file", base_name)

Saved for  up_28-06-2019_0
Saved for  down_30-06-2019_1
Saved for  down_01-07-2019_2
Saved for  down_01-07-2019_DATA_17_03_44_3
Saved for  up_01-07-2019_4
Saved for  up_01-07-2019_DATA_15_59_01_5
Error in processing file down_02-07-2019_6
Saved for  down_02-07-2019_DATA_09_52_39_7
Error in processing file down_02-07-2019_DATA_11_11_23_8
Saved for  down_02-07-2019_DATA_12_06_05_9
Saved for  up_02-07-2019_10
Saved for  up_02-07-2019_DATA_08_59_55_11
Saved for  up_02-07-2019_DATA_10_58_35_12
Error in processing file down_03-07-2019_13
Error in processing file down_03-07-2019_DATA_09_56_40_14
Error in processing file down_03-07-2019_DATA_11_52_11_15
Error in processing file down_03-07-2019_DATA_18_04_00_16
Saved for  up_03-07-2019_17
Error in processing file up_03-07-2019_DATA_08_46_57_18
Error in processing file up_03-07-2019_DATA_10_58_15_19
Saved for  up_03-07-2019_DATA_17_00_17_20
Error in processing file down_04-07-2019_21
Saved for  down_04-07-2019_DATA_19_23_45_22
Saved for  up_04-0

In [6]:
#NICE