In [1]:
import glob
import datetime
import numpy as np
import pandas as pd
from library.preprocessing import distance

In [2]:
all_folders=glob.glob("./Trails/*/*/")

test_dates=['2019-06-28','2019-06-30','2019-07-01','2019-07-02','2019-07-03','2019-07-04','2019-07-05']
formatted_test_dates=['-'.join(d.split("-")[::-1]) for d in test_dates] #reversing order

get_date=lambda fname:fname.split("\\")[-2].split("_")[0]
get_format_date=lambda fname: '-'.join(get_date(fname).split("-")[::-1])

#test folders
test_file_pairs=\
[(f+"GPS.csv",
 f+"mu_serv_{}.csv".format(get_format_date(f))) for f in all_folders if get_date(f) in formatted_test_dates]

# Get Prev Egde speed (mean) in meter/sec

In [3]:
def get_edge_speeds(df_gps,df_mu):
    gps=df_gps.copy()
    mu=df_mu.copy()
    
####### logic for speed means from the beginning

#     gps['ts']=gps.time.apply(lambda e: e.split()[1]) #getting timestamps

#     gps=gps[gps.speed>3] #filtering low spped regions

#     ser=mu['start_time'].apply(lambda e: gps[gps.ts<=e].speed.mean()) # getting edge speeds from start_time

#     mu['speed_in_prev']=ser.replace(np.nan,ser.mean()) #replacing NaN with means
    


###### logic for only edge speed

    gps['ts']=gps.time.apply(lambda e: e.split()[1]) #getting timestamps

    gps=gps[gps.speed>3] #filtering low spped regions

    beg_time=gps.iloc[0].time.split()[1] #stat time from gps

    speed=[]
    for end_time in mu['start_time'].values: #end time to next stop arrival
        speed.append(gps[(beg_time<=gps.ts)&(gps.ts<=end_time)].speed.mean())
        beg_time=end_time #setting beg time for next edge

    ser=pd.Series(speed)

    mu['speed_in_prev']=ser.replace(np.nan,ser.mean()) 
    
    return mu

# Get distance from the previous Stop in meter

In [4]:
def get_distance_from_prev(df_gps,df_mu):
    gps=df_gps.copy()
    mu=df_mu.copy()
    
    start_lat,start_long=gps.iloc[0][['#lat','long']].values.tolist()
    dis=[]
    for (next_lat,next_long) in mu[['lat','long']].values.tolist():
        dis.append(distance(start_lat,start_long,next_lat,next_long))
        start_lat,start_long=next_lat,next_long


    mu['distance_from_prev']=dis
    
    return mu

# Add seconds to a time string

In [5]:
def get_time_plus_delta(TIME,DELTA):
    return str((pd.to_datetime(TIME)+datetime.timedelta(seconds=DELTA)).time())

# Predicting stop times from the immediate previous

# Algo

In [6]:
def Algo_to_pred_reach_time_from_prev_stop(df_MU,df_GPS):
    df_mu=df_MU.copy()
    df_gps=df_GPS.copy()
    df_mu=get_edge_speeds(df_gps,df_mu)
    df_mu=get_distance_from_prev(df_gps,df_mu)
    speed_dict={'Afternoon': 7.8311055934586244,'Early_Morning': 7.939567834568437,
                'Evening': 7.526067233283224,'Morning': 7.979809998838039}
    start_time=df_gps.iloc[0].time.split()[1] #gps start time
    stay_duration=0 #starting deplay is 0

    pred_time_from_n_1=[]

    for (tz,next_start_time,estimated_stay_duration,speed_in_prev,distance_from_prev) in df_mu[['time_zone','start_time',
                                                                                        'estimated_stay_duration',
                                                                                        'speed_in_prev',
                                                                                        'distance_from_prev']].values.tolist():
        speed_in_prev=speed_dict[tz]#17 #forefully set speed to 17m/sec
        travel_time=round(distance_from_prev/speed_in_prev) #sec
        pred_time_from_n_1.append(get_time_plus_delta(start_time,round(stay_duration+travel_time)))

        start_time=next_start_time #for next pred  current time is taken
        stay_duration=estimated_stay_duration # for next current stay is considered

    df_mu['pred_time_from_n_1']=pred_time_from_n_1 #predicted times are added from the immenint previous one

    # get time diffenence between pred and real in minutes
    df_mu['pred_minus_start_time(min)']=\
    df_mu[['pred_time_from_n_1','start_time']].apply(lambda e:round((pd.to_datetime(e[0]).timestamp()-pd.to_datetime(e[1]).timestamp())/60,2),axis=1)

    # reformatting
    df_pred_from_prev=\
    df_mu[[
    'time_zone',
    'start_date',
    'lat', 'long',
    #'speed_in_prev', 
    'distance_from_prev',
    'Is_Bus_stop', 'Is_Turn', 'Is_Signal', 'Is_Congestion', 'Is_Adhoc',
    'estimated_stay_duration',
    'start_time','pred_time_from_n_1',
    'pred_minus_start_time(min)']]
    
    return df_pred_from_prev.copy()

# Running in the file Structure

In [7]:
for i,(gps_file,mu_file) in enumerate(test_file_pairs):
    df_microServ=pd.read_csv(mu_file)
    df_trail_GPS=pd.read_csv(gps_file)
    pred_time_from_prev=Algo_to_pred_reach_time_from_prev_stop(df_microServ,df_trail_GPS)
    
    tz=pred_time_from_prev.time_zone[0] #Time Zone    
    down_or_up=gps_file.split("\\")[1] #down / up
    date='-'.join(gps_file.split("\\")[2].split("_")[0].split("-")[::-1]) #date
    
    file_to_save_in_struct=f"pred_time_from_prev_{i}_{date}.csv"
    file_to_save_in_folder=f"{down_or_up}_{tz}_pred_time_from_prev_{i}_{date}.csv"
    
    print("Saving",i,"trail")
    pred_time_from_prev.to_csv(gps_file.split("GPS")[0]+file_to_save_in_struct,index=False)
    pred_time_from_prev.to_csv("./report/test_files/"+file_to_save_in_folder,index=False)

Saving 0 trail
Saving 1 trail
Saving 2 trail
Saving 3 trail
Saving 4 trail
Saving 5 trail
Saving 6 trail
Saving 7 trail
Saving 8 trail
Saving 9 trail
Saving 10 trail
Saving 11 trail
Saving 12 trail
Saving 13 trail
Saving 14 trail
Saving 15 trail
Saving 16 trail
Saving 17 trail
Saving 18 trail
Saving 19 trail
Saving 20 trail
Saving 21 trail
Saving 22 trail
Saving 23 trail
Saving 24 trail
Saving 25 trail
Saving 26 trail
Saving 27 trail
Saving 28 trail
Saving 29 trail
Saving 30 trail
Saving 31 trail
Saving 32 trail
Saving 33 trail
Saving 34 trail
Saving 35 trail
Saving 36 trail


import os

for f in glob.glob("./Trails/*/*/pred_time_from_prev*.csv"):
    os.remove(f)

In [8]:
glob.glob("./Trails/*/*/pred_time_from_prev*.csv")

['./Trails\\down\\01-07-2019\\pred_time_from_prev_0_2019-07-01.csv',
 './Trails\\down\\01-07-2019_DATA_17_03_44\\pred_time_from_prev_1_2019-07-01.csv',
 './Trails\\down\\02-07-2019\\pred_time_from_prev_2_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_09_52_39\\pred_time_from_prev_3_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_11_11_23\\pred_time_from_prev_4_2019-07-02.csv',
 './Trails\\down\\02-07-2019_DATA_12_06_05\\pred_time_from_prev_5_2019-07-02.csv',
 './Trails\\down\\03-07-2019\\pred_time_from_prev_6_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_09_56_40\\pred_time_from_prev_7_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_11_52_11\\pred_time_from_prev_8_2019-07-03.csv',
 './Trails\\down\\03-07-2019_DATA_18_04_00\\pred_time_from_prev_9_2019-07-03.csv',
 './Trails\\down\\04-07-2019\\pred_time_from_prev_10_2019-07-04.csv',
 './Trails\\down\\04-07-2019_DATA_19_23_45\\pred_time_from_prev_11_2019-07-04.csv',
 './Trails\\down\\05-07-2019\\pred_time_from_prev_12_2019-

In [9]:
#NICE