In [None]:
import os
import pandas as pd
import pickle
import collections
import matplotlib.pyplot as plt
from statistics import  mean
import math
import time
from typing import Union
import tqdm
%matplotlib inline

In [None]:
HOME = os.environ['HOME']

## Creating Dataframe

In [None]:
def create_df(dpath:str)->pd.DataFrame:
    '''
    Creates a pandas dataframe from all the 
    pickled files
    '''
    data = pd.DataFrame(columns=['Time','Name','Model',
                                'GPS','Speed','BS'])

    for count,filename in enumerate(os.listdir(dpath)):
        ind = count
        with open(dpath+f'/{filename}','rb') as a:
            data.at[ind,'Time'] = pickle.load(a)
            data.at[ind,'Name'] = pickle.load(a)
            data.at[ind,'Model'] = pickle.load(a)
            data.at[ind,'GPS'] = pickle.load(a)
            data.at[ind,'Speed'] = pickle.load(a)
            data.at[ind,'BS'] = pickle.load(a)
        
        if (count+1)%1000 == 0:
            print(f'{count+1} samples has been added to dataframe')

    return data


In [None]:
try: 
    data = pd.read_pickle(f'{HOME}/webots_code/data/final/gps.pkl')
    print(f'Preprocessed dataframe already present')
except:
    # data = create_df(dpath)
    print(f'DataFrame does not exist')

In [None]:
data.info()

In [None]:
ncars = len(data['Name'].unique())

## Distribution of samples 

In [None]:
# Number of samples per car
car_dist = data['Name'].value_counts().to_dict()
#Sorting into list
per_car = [car_dist[f'SUMO vehicle {i}'] for i in range(0,len(data['Name'].unique()))]

plt.rcParams['figure.figsize'] = [20, 8]
plt.rcParams['figure.dpi'] = 100 
plt.bar(range(0,len(data['Name'].unique())),per_car)

## Average Speed and speed distribution

In [None]:
speed = dict()
speed_dist = [0]*(round(data['Speed'][data['Speed']<100].max())+1)
for i in data.index.values:
    if data.at[i,'Speed'] > 100.00:
        print('Unusually high speed:',data.at[i,'Speed'],'Skipping')
        continue
    if data.at[i,'Name'] in speed.keys():
        speed[data.at[i,'Name']].append(data.at[i,'Speed'])
    else:
        speed[data.at[i,'Name']] = list()
        speed[data.at[i,'Name']].append(data.at[i,'Speed'])
    speed_dist[round(data.at[i,'Speed'])]+=1

### Speed Distribution

In [None]:
plt.rcParams['figure.figsize'] = [15, 8]
plt.rcParams['figure.dpi'] = 100
plt.bar(range(20),speed_dist[:20])
print('Max speed is',data['Speed'][data['Speed']<100].max())

In [None]:
avg_speed = list()
for i in range(len(data['Name'].unique())):
    avg_speed.append(mean(speed[f'SUMO vehicle {i}']))

In [None]:
plt.rcParams['figure.figsize'] = [20, 8]
plt.rcParams['figure.dpi'] = 100
plt.plot(range(ncars),avg_speed)

## Surrounding vehicles

### For each sample
Calculating surrounding vehciles for each sample

In [None]:
timestep = 0.128
gpath = f'{HOME}/webots_code/data/final/tracking'
lrange = 120

#Creating new column for veh count
veh_range = [0]*len(data)

In [None]:
# Determining the distance between car and transmitter in meter
# TO-DO : Consider height while calculating distance
def dist_gps(gps1, gps2):
    lat1, lon1, _ = gps1
    lat2, lon2, _ = gps2
    R = 6371000  # radius of Earth in meters
    phi_1 = math.radians(lat1)
    phi_2 = math.radians(lat2)

    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2.0) ** 2 + \
        math.cos(phi_1) * math.cos(phi_2) * \
        math.sin(delta_lambda / 2.0) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

In [None]:
def read_gps(gpath)-> Union[pd.DataFrame]:
    '''
    Read all the GPS dataset and store
    it in RAM, for faster exec
    '''
    gps_pd = []
    for filename in os.listdir(gpath):
        gps_pd.append(pd.read_feather(
                        gpath+f'/{filename}'))
    
    return gps_pd

In [None]:
gps_pd = read_gps(gpath)
print(len(gps_pd),gps_pd[0].info())

In [None]:
#Iterating over all the vehicle GPS data
start = time.time()
veh_dist = list()
for i in tqdm.tqdm_notebook(data.index.values):
    siml_time = data.at[i,'Time']
    i_dist = list()
    for gps_entry in gps_pd: 
        # Checking for the position of the vehicle in that timestep
        entry = gps_entry[
                    ((data.at[i,'Time'] - timestep) < gps_entry['Time']) &
                    (gps_entry['Time'] <= data.at[i,'Time'])
                    ]
        
        
        if entry.empty : 
            continue
        

        if len(entry) > 1:
            entry = entry[entry['Time']==siml_time]
            
        # Calculating distance between vehicles
        dist = dist_gps(data.at[i,'GPS'][1],entry['gps'].values[0][1])
        dist = round(dist,2)
        if dist < lrange and dist!=0:
            veh_range[i] +=1         #For number of vehicles in range
            i_dist.append(dist) #Capturing the distance of each vehicle
    
    veh_dist.append(i_dist)

In [None]:
# # Saving dataframe
# savepath = f'{HOME}/webots_code/data/final/stats_pd.pkl'
# data.to_pickle(savepath)

In [None]:
# data

In [None]:
data['veh_range'] = veh_range
data['veh_dist'] = veh_dist

### Load pre processed data

In [None]:
occr = [0]*(data['veh_range'].max()+1)

for i in data.index.values:
    occr[data.at[i,'veh_range']]+=1

plt.rcParams['figure.figsize'] = [20, 8]
plt.rcParams['figure.dpi'] = 100
plt.bar(range(data['veh_range'].max()+1),occr)

In [None]:
dist = [0]*121
start = time.time()
for i in tqdm.notebook.tqdm(data.index.values):
    for j in data.at[i,'veh_dist']:
        dist[int(j)] +=1
        
plt.rcParams['figure.figsize'] = [20, 8]
plt.rcParams['figure.dpi'] = 100
plt.plot(range(0,121),dist)

In [None]:
data.info()