In [1]:
import os
import glob
import numpy as np
import pandas as pd
import datetime
from library.preprocessing import distance
import matplotlib.pyplot as plt
%matplotlib inline

# Reading the File

In [2]:
def read_test_file_on_BS(fname):
    df=pd.read_csv(fname)

    if 'down' in fname:
        bs=pd.read_csv("./Labels/down/Bus_stop_down.csv",header=None).values.tolist()
    elif 'up' in fname:
        bs=pd.read_csv("./Labels/up/Bus_stop_up.csv",header=None).values.tolist()

    def find_by_stop_name(lat,long):
        for blat,blong,bsname in bs:
            if distance(lat,long,blat,blong)<30:
                return bsname
        return np.nan

    df['BS_name']=df[['lat','long']].apply(lambda e:find_by_stop_name(e[0],e[1]),axis=1)

    df=df.dropna().reset_index(drop='index')

    return df

# Helper functions

In [3]:
def get_time_plus_delta(TIME,DELTA):
    return str((pd.to_datetime(TIME)+datetime.timedelta(seconds=DELTA)).time())

def get_upper_triangular_matrix_df(dataframe):
    speed_dict={'Afternoon': 7.8311055934586244,'Early_Morning': 7.939567834568437,
                'Evening': 7.526067233283224,'Morning': 7.979809998838039}
    df=dataframe.copy()
    error_matrix=[]
    for start_bs in range(1,df.shape[0]+1): #1st bus stop

        start_time=df.iloc[start_bs-1].start_time
        stay_duration=df.iloc[start_bs-1].estimated_stay_duration

        error=[np.nan]*start_bs

        for tz,next_start_time,estimated_stay_duration,prevDist in df[['time_zone','start_time',
                                                                    'estimated_stay_duration',
                                                                    'distance_from_prev']].iloc[start_bs:].values.tolist():

            travel_speed=speed_dict[tz]#17 #m/sec
            travel_time=round(prevDist/travel_speed) #sec
            estimated_reach_time=get_time_plus_delta(start_time,round(stay_duration+travel_time)) #timestamp

            c_error=round((pd.to_datetime(estimated_reach_time).timestamp()-pd.to_datetime(next_start_time).timestamp())/60,2)
            error.append(c_error)

            start_time=estimated_reach_time #start time fr next hope is estimated
            stay_duration=estimated_stay_duration # estimated stay will be added to compute the next

        error_matrix.append(error)

    return pd.DataFrame(error_matrix,index=df.BS_name.values,columns=df.BS_name.values)

In [4]:
def get_upper_triangular_matrix_from_fname(fname):
    df=read_test_file_on_BS(fname)
    return get_upper_triangular_matrix_df(df)

# Looping for all test CSVs

In [5]:
test_files=os.listdir("./report/test_files")

for tf in test_files:
    df=get_upper_triangular_matrix_from_fname(f'./report/test_files/{tf}')
    df.to_csv(f"./report/BS_Matrices/{tf}")
    print("Saved for ",tf)

Saved for  down_Afternoon_pred_time_from_prev_10_2019-07-04.csv
Saved for  down_Afternoon_pred_time_from_prev_16_2019-07-05.csv
Saved for  down_Afternoon_pred_time_from_prev_1_2019-07-01.csv
Saved for  down_Early_Morning_pred_time_from_prev_0_2019-07-01.csv
Saved for  down_Early_Morning_pred_time_from_prev_12_2019-07-05.csv
Saved for  down_Early_Morning_pred_time_from_prev_13_2019-07-05.csv
Saved for  down_Early_Morning_pred_time_from_prev_2_2019-07-02.csv
Saved for  down_Early_Morning_pred_time_from_prev_3_2019-07-02.csv
Saved for  down_Early_Morning_pred_time_from_prev_6_2019-07-03.csv
Saved for  down_Early_Morning_pred_time_from_prev_7_2019-07-03.csv
Saved for  down_Evening_pred_time_from_prev_11_2019-07-04.csv
Saved for  down_Evening_pred_time_from_prev_17_2019-07-05.csv
Saved for  down_Evening_pred_time_from_prev_9_2019-07-03.csv
Saved for  down_Morning_pred_time_from_prev_14_2019-07-05.csv
Saved for  down_Morning_pred_time_from_prev_15_2019-07-05.csv
Saved for  down_Morning_pred_

In [6]:
#NICE