In [1]:
import glob
import datetime
import numpy as np
import pandas as pd
from library.preprocessing import distance
from geopy.distance import geodesic

def geodistance(pointA, pointB):
    return geodesic(pointA, pointB).meters

In [2]:
all_folders=glob.glob("./Trails/*/*/")

test_dates=['2019-06-28','2019-06-30','2019-07-01','2019-07-02','2019-07-03','2019-07-04','2019-07-05']
formatted_test_dates=['-'.join(d.split("-")[::-1]) for d in test_dates] #reversing order

get_date=lambda fname:fname.split("\\")[-2].split("_")[0]
get_format_date=lambda fname: '-'.join(get_date(fname).split("-")[::-1])

#test folders
test_file_pairs=\
[(f+"GPS.csv",
 f+"mu_serv_{}.csv".format(get_format_date(f))) for f in all_folders if get_date(f) in formatted_test_dates]

In [3]:
def get_Sec_from_datetime(dt):
    timeStamp=pd.to_datetime(dt,format="%m/%d/%Y %H:%M:%S")
    return timeStamp.timestamp()

In [4]:
def process_GPS(f_name):
    df=pd.read_csv(f_name)
    start_lat,start_long,start_time=df[['#lat','long','time']].iloc[0]

    next_hop_distance=[]
    time_elapsed=[]
    for next_lat,next_long,next_time in df[['#lat','long','time']].values:
        next_hop_distance.append(geodistance((start_lat,start_long),(next_lat,next_long)))
        time_elapsed.append(get_Sec_from_datetime(next_time)-get_Sec_from_datetime(start_time)+1e-9)
        start_lat,start_long,start_time=next_lat,next_long,next_time

    #next_hop_distance
    #time_elapsed
    df['next_hop_distance']=next_hop_distance
    df['time_elapsed']=time_elapsed
    df['start_time']=df.time.apply(lambda e:e.split(" ")[1])
    return df[['start_time','#lat','long','next_hop_distance','time_elapsed']].copy()

train_initial_speed=[]
tzones=[]
train_files=\
[(f+"GPS.csv",
 f+"mu_serv_{}.csv".format(get_format_date(f))) for f in all_folders if get_date(f) not in formatted_test_dates]

for gps_f,mu_f in train_files:
    tz=pd.read_csv(mu_f).time_zone[0]
    df=process_GPS(gps_f).iloc[:5*60]#5 min data
    tzones.append(tz)
    train_initial_speed.append(df.next_hop_distance.sum()/df.time_elapsed.sum())

init_speed_dict=\
pd.DataFrame({'time_zone':tzones,'speed':train_initial_speed}).groupby("time_zone")['speed'].mean()

dict(init_speed_dict)

#{'Afternoon': 3.208857802457898,
#'Early_Morning': 2.4823871880135058,
#'Evening': 1.9810927189917138,
#'Morning': 3.0719420700235105}

In [5]:
def process_MU(f_name):
    df_mu=pd.read_csv(f_name)
    init_speed_dict={'Afternoon': 3.208857802457898,
                     'Early_Morning': 2.4823871880135058,
                     'Evening': 1.9810927189917138,
                     'Morning': 3.0719420700235105}
    initial_speed=init_speed_dict[df_mu.time_zone[0]] #calcullated from past data
    data=df_mu[['start_time','lat','long','estimated_stay_duration','stay_duration']]
    label=df_mu[['Is_Bus_stop', 'Is_Turn', 'Is_Signal', 'Is_Congestion', 'Is_Adhoc','time_zone']]
    return data,label,initial_speed

def get_time_plus_delta(TIME,DELTA):
    return str((pd.to_datetime(TIME)+datetime.timedelta(seconds=DELTA)).time())

In [6]:
def get_immediate_predicted_arrival_time(file):
    cal_speed_on_past_min=20 ##speed is calculated from past 20 min trail
    
    df_gps=process_GPS(file[0])
    df_mu,mu_labels,speed=process_MU(file[1])

    start_time,start_lat,start_long,start_estimated_stay_duration,start_stay_duration=df_mu.iloc[0]

    estimated_start_time=['NA']
    error_in_mins=[np.nan]
    SPEED=[speed]
    total_stay=0

    for next_start_time,next_lat,next_long,next_estimated_stay_duration,next_stay_duration in df_mu.iloc[1:].values:

        #immediate patch distance
        patch_distance=df_gps[(df_gps.start_time<=next_start_time)&(df_gps.start_time>=start_time)].next_hop_distance.sum()
        travel_time=patch_distance/speed

        #added time and others
        time_delta=round(start_estimated_stay_duration+travel_time)
        pred_next_start_time=get_time_plus_delta(start_time,time_delta)
        estimated_start_time.append(pred_next_start_time)

        #error cal in Minutes
        error_in_min=(pd.to_datetime(pred_next_start_time).timestamp()-pd.to_datetime(next_start_time).timestamp())/60
        error_in_mins.append(error_in_min)

        #past stay ground_truth
        total_stay+=start_stay_duration

        #speed update
        prev_time=get_time_plus_delta(next_start_time,(-cal_speed_on_past_min*60)) #past 20 min speed is calculated
        df_past_patch=df_gps[(df_gps.start_time<=next_start_time)&(df_gps.start_time>=prev_time)].copy()

        speed=df_past_patch.next_hop_distance.sum()/(df_past_patch.time_elapsed.sum()-total_stay)
        start_time,start_lat,start_long,start_estimated_stay_duration,start_stay_duration=\
        next_start_time,next_lat,next_long,next_estimated_stay_duration,next_stay_duration
        SPEED.append(speed)

    df_mu['predicted_start_time']=estimated_start_time
    df_mu['error_in_min']=error_in_mins
    df_mu['speed']=SPEED

    df_processed=pd.concat([df_mu[['lat','long','speed','start_time','predicted_start_time','error_in_min']],mu_labels],axis=1)
    return df_processed

In [7]:
get_immediate_predicted_arrival_time(test_file_pairs[0])

Unnamed: 0,lat,long,speed,start_time,predicted_start_time,error_in_min,Is_Bus_stop,Is_Turn,Is_Signal,Is_Congestion,Is_Adhoc,time_zone
0,23.493971,87.316881,2.482387,08:57:28,,,0,0,0,1,1,Early_Morning
1,23.494337,87.315492,17.802477,09:05:59,09:00:46,-5.216667,0,0,0,0,1,Early_Morning
2,23.495549,87.31479,12.300911,09:06:27,09:06:41,0.233333,0,0,0,0,1,Early_Morning
3,23.496365,87.313693,10.121922,09:06:58,09:07:13,0.25,1,0,0,0,0,Early_Morning
4,23.500254,87.305777,8.244998,09:09:22,09:09:03,-0.316667,1,0,0,0,0,Early_Morning
5,23.502531,87.306906,8.149839,09:10:27,09:10:32,0.083333,0,0,0,1,1,Early_Morning
6,23.503252,87.307421,8.049575,09:10:50,09:11:13,0.383333,1,0,1,1,0,Early_Morning
7,23.510668,87.310522,8.309192,09:12:58,09:13:18,0.333333,0,0,0,0,1,Early_Morning
8,23.526002,87.311084,8.628183,09:16:16,09:17:01,0.75,0,1,0,1,1,Early_Morning
9,23.526425,87.311034,8.603854,09:16:35,09:16:55,0.333333,0,1,0,1,0,Early_Morning


# Get Prev Egde speed (mean) in meter/sec

In [8]:
for i,(gps_file,mu_file) in enumerate(test_file_pairs):

    pred_time_from_prev=get_immediate_predicted_arrival_time((gps_file,mu_file))
    
    tz=pred_time_from_prev.time_zone[0] #Time Zone    
    down_or_up=gps_file.split("\\")[1] #down / up
    date='-'.join(gps_file.split("\\")[2].split("_")[0].split("-")[::-1]) #date
    
    file_to_save_in_struct=f"pred_time_from_prev_{i}_{date}.csv"
    file_to_save_in_folder=f"{down_or_up}_{tz}_pred_time_from_prev_{i}_{date}.csv"
    
    print("Saving",i,"trail")
    pred_time_from_prev.to_csv(gps_file.split("GPS")[0]+file_to_save_in_struct,index=False)
    pred_time_from_prev.to_csv("./report/test_files/"+file_to_save_in_folder,index=False)

Saving 0 trail
Saving 1 trail
Saving 2 trail
Saving 3 trail
Saving 4 trail
Saving 5 trail
Saving 6 trail
Saving 7 trail
Saving 8 trail
Saving 9 trail
Saving 10 trail
Saving 11 trail
Saving 12 trail
Saving 13 trail
Saving 14 trail
Saving 15 trail
Saving 16 trail
Saving 17 trail
Saving 18 trail
Saving 19 trail
Saving 20 trail
Saving 21 trail
Saving 22 trail
Saving 23 trail
Saving 24 trail
Saving 25 trail
Saving 26 trail
Saving 27 trail
Saving 28 trail
Saving 29 trail
Saving 30 trail
Saving 31 trail
Saving 32 trail
Saving 33 trail
Saving 34 trail
Saving 35 trail
Saving 36 trail


import os

for f in glob.glob("./Trails/*/*/pred_time_from_prev*.csv"):
    os.remove(f)

In [9]:
glob.glob("./Trails/*/*/pred_time_from_prev*.csv")

['./Trails\\down\\01-07-2019\\pred_time_from_prev_0_2019-07-01.csv',
 './Trails\\down\\01-07-2019_DATA_17_03_44\\pred_time_from_prev_1_2019-07-01.csv',
 './Trails\\down\\02-07-2019\\pred_time_from_prev_2_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_09_52_39\\pred_time_from_prev_3_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_11_11_23\\pred_time_from_prev_4_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_12_06_05\\pred_time_from_prev_5_2019-07-02.csv',
 './Trails\\down\\03-07-2019\\pred_time_from_prev_6_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_09_56_40\\pred_time_from_prev_7_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_11_52_11\\pred_time_from_prev_8_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_18_04_00\\pred_time_from_prev_9_2019-07-03.csv',
 './Trails\\down\\04-07-2019\\pred_time_from_prev_10_2019-07-04.csv',
 './Trails\\down\\04-07-2019_DATA_19_23_45\\pred_time_from_prev_11_2019-07-04.csv',
 './Trails\\down\\05-07-2019\\pred_time_from_prev_12_2019-

In [9]:
#NICE