In [1]:
import glob
import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Files

In [2]:
all_folders=glob.glob("./Trails/*/*/")

test_dates=['2019-06-28','2019-06-30','2019-07-01','2019-07-02','2019-07-03','2019-07-04','2019-07-05']
formatted_test_dates=['-'.join(d.split("-")[::-1]) for d in test_dates] #reversing order

get_date=lambda fname:fname.split("\\")[-2].split("_")[0]
get_format_date=lambda fname: '-'.join(get_date(fname).split("-")[::-1])


test_pred_files=[]
for date in test_dates:
    test_pred_files.extend(glob.glob("./Trails/*/*/pred_time_from_prev*{}.csv".format(date)))

valid_test_pred_files=[]
BS_files=[]
ETA_files=[]
for base in test_pred_files:
    trail_type="down" if "down" in base else "up"
    date=base.split("\\")[2]
    try:
        BS_files.append(glob.glob(f'./report/BS_Matrices/BS\\{trail_type}_{date}*.csv')[0])
        ETA_files.append(glob.glob(f'./report/BS_Matrices/ETA\\{trail_type}_{date}*.csv')[0])
        valid_test_pred_files.append(base)
    except:
        pass


test_gps_files=[]
for pred_file in valid_test_pred_files:
    test_gps_files.append(pred_file.split("\\p")[0]+"\\GPS.csv")
    
test_mu_files=[]
for pred_file in valid_test_pred_files:
    test_mu_files.append(glob.glob(pred_file.split("\\p")[0]+"\\mu_serv*.csv")[0])

test_file_pairs=list(zip(test_gps_files,valid_test_pred_files,test_mu_files,BS_files,ETA_files))

# ROUTE DATA CALCULATION

In [3]:
def geodistance(pointA, pointB):
    return geodesic(pointA, pointB).meters

def get_Sec_from_datetime(dt):
    timeStamp=pd.to_datetime(dt,format="%m/%d/%Y %H:%M:%S")
    return timeStamp.timestamp()

def get_time_plus_delta(TIME,DELTA):
    return str((pd.to_datetime(TIME)+datetime.timedelta(seconds=DELTA)).time())

def process_GPS(f_name):
    df=pd.read_csv(f_name)
    start_lat,start_long,start_time=df[['#lat','long','time']].iloc[0]

    next_hop_distance=[]
    time_elapsed=[]
    for next_lat,next_long,next_time in df[['#lat','long','time']].values:
        next_hop_distance.append(geodistance((start_lat,start_long),(next_lat,next_long)))
        time_elapsed.append(get_Sec_from_datetime(next_time)-get_Sec_from_datetime(start_time)+1e-9)
        start_lat,start_long,start_time=next_lat,next_long,next_time

    #next_hop_distance
    #time_elapsed
    df['next_hop_distance']=next_hop_distance
    df['time_elapsed']=time_elapsed
    df['start_time']=df.time.apply(lambda e:e.split(" ")[1])
    df['lat']=df['#lat']
    return df[['lat','long','time_elapsed']].copy()

def get_route_data(pair):
    df_gps=process_GPS(pair[0])
    df_bs=pd.read_csv(pair[3])
    df_bs['skip']=df_bs.stay_duration.apply(lambda e: True if e==0 else False)
    df_mu=pd.read_csv(pair[2])[['lat','long','Is_Turn','Is_Signal','Is_Congestion','Is_Adhoc','stay_duration']]

    index=[]
    ID=[]
    TYPE=[]
    DURATION=[]
    SKIP=[]

    for start_lat,start_long,bs_name,stay_duration,skip in df_bs[['b_lat','b_long','BS','stay_duration','skip']].values:
        for ind,(lat,long, elps_time) in enumerate(df_gps[['lat','long','time_elapsed']].values):
            if geodistance((lat,long),(start_lat,start_long))<=30:
                index.append(ind)
                TYPE.append("BUS")
                ID.append(bs_name)
                DURATION.append(1 if stay_duration==0 else stay_duration)
                SKIP.append(skip)
                break

    for start_lat,start_long,tur,sig,cong,adh,stay_duration in df_mu.values:
        for ind,(lat,long, elps_time) in enumerate(df_gps[['lat','long','time_elapsed']].values):
            if geodistance((lat,long),(start_lat,start_long))<=30:
                if ind in index:break
                index.append(ind)
                typ=[]
                if adh==1:
                    typ.append("ADH")
                else:
                    if tur==1:
                        typ.append("TUR")
                    if sig==1:
                        typ.append("SIG")
                    if cong==1:
                        typ.append("CONG")
                        
                if len(typ)==0:
                    TYPE.append("TRAIL")
                    DURATION.append(1 if elps_time==0 else elps_time)
                else:
                    TYPE.append('_'.join(typ))
                    DURATION.append(1 if stay_duration==0 else stay_duration)
                ID.append("null")
                SKIP.append("null")
                break

    df_gps['type']='TRAIL'
    df_gps['Duration']=df_gps.time_elapsed

    df_gps["ID"]="null"
    df_gps["skip"]="null"

    for idx,Id,Type,Dur,Skip in zip(index,ID,TYPE,DURATION,SKIP):
        df_gps.loc[idx,"ID"]=Id;
        df_gps.loc[idx,"type"]=Type;
        df_gps.loc[idx,"Duration"]=Dur;
        df_gps.loc[idx,"skip"]=Skip;

    route_data=[]
    for lat,long,Type,Id,Skip,Dur in df_gps[['lat','long','type','ID','skip','Duration']].values:
        obj={}
        obj["lat"]=lat;obj["long"]=long;
        obj["type"]=Type;
        if Type=="BUS":
            obj["id"]=Id
            obj["skip"]=True if Skip==True else False
        obj["duration"]=np.ceil(Dur)
        route_data.append(obj.copy())
        
    return route_data

# GT_DATA CALCULATION

In [4]:
def get_gt_data(pair):
    file=pair[4]
    ETA=pd.read_csv(file,index_col="Unnamed: 0").values
    TYPE= "down" if "down" in file else "up"
    df=pd.read_csv(pair[3])[['b_lat','b_long','BS']]
    df.columns=['lat','long','id']

    gt_data=[]

    for i,(lat,long,Id) in enumerate(df.values):
        obj={}
        obj["id"]=Id
        obj["lat"]=lat
        obj["long"]=long
        obj["ETA"]=ETA[i].tolist()
        gt_data.append(obj.copy())

    return gt_data

In [5]:
pair=test_file_pairs[0]

In [6]:
route=get_route_data(pair)
gt=get_gt_data(pair)

In [7]:
import json

In [8]:
myobj=json.dumps({'route':route,'gt':gt})

In [9]:
with open("sample.json", "w") as outfile:
    outfile.write(myobj)

In [None]:
#NICE