In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
from pathlib import Path
from typing import Tuple, List, Callable

import numpy as np
import pandas as pd
import torch
import glob
from datetime import datetime, timedelta
from pyproj import Geod
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
from itertools import product

import folium
import folium.plugins

In [3]:
wgs84_geod = Geod(ellps='WGS84') # Distance will be measured in meters on this ellipsoid - more accurate than a spherical method

def coords_to_areas(target): # Calculate to which area an opening's coordinates (target) "belong to"
    _,_,dist = wgs84_geod.inv(virtual_area_centers['GPS_Latitude'], virtual_area_centers['GPS_Longitude'], np.full(len(virtual_area_centers),target['GPS_Latitude']), np.full(len(virtual_area_centers), target['GPS_Longitude']))
    return pd.Series((1 - dist / sum(dist)) / (len(dist) - 1)) # Percentage of how much an opening belongs to each area

In [4]:
i=pd.MultiIndex.from_frame(pd.read_csv((Path.cwd().parent/ 'data' / 'processed' / 'locations.csv'), usecols=['Time', 'Vehicle_Number_Plate'], parse_dates=['Time'])).reorder_levels([1,0])

In [40]:
a = pd.read_csv((Path.cwd().parent / 'data' / 'processed' / 'actions.csv'), parse_dates=['Time'])
a.index = pd.MultiIndex.from_frame(a.loc[:,['Time', 'Vehicle_Number_Plate']])
a.drop(labels=['Time', 'Vehicle_Number_Plate'], axis=1, inplace=True)

In [43]:
a.loc[i[835909]]

Virtual_Zone_Name_0     0
Virtual_Zone_Name_1     0
Virtual_Zone_Name_2     0
Virtual_Zone_Name_3     0
Virtual_Zone_Name_4     0
Virtual_Zone_Name_5     0
Virtual_Zone_Name_6     0
Virtual_Zone_Name_7     0
Virtual_Zone_Name_8     0
Virtual_Zone_Name_9     0
Virtual_Zone_Name_10    0
Virtual_Zone_Name_11    0
Virtual_Zone_Name_12    0
Virtual_Zone_Name_13    0
Virtual_Zone_Name_14    0
Virtual_Zone_Name_15    0
Virtual_Zone_Name_16    0
Virtual_Zone_Name_17    0
Virtual_Zone_Name_18    0
Virtual_Zone_Name_19    0
Virtual_Zone_Name_20    1
Virtual_Zone_Name_21    0
Virtual_Zone_Name_22    0
Virtual_Zone_Name_23    0
Virtual_Zone_Name_24    0
Virtual_Zone_Name_25    0
Virtual_Zone_Name_26    0
Virtual_Zone_Name_27    0
Virtual_Zone_Name_28    0
Virtual_Zone_Name_29    0
Virtual_Zone_Name_30    0
Virtual_Zone_Name_31    0
Virtual_Zone_Name_32    0
Virtual_Zone_Name_33    0
Virtual_Zone_Name_34    0
Virtual_Zone_Name_35    0
Virtual_Zone_Name_36    0
Virtual_Zone_Name_37    0
Virtual_Zone

In [7]:
len(a)/len(i)*100

0.12141000669152338

In [8]:
rental = pd.read_csv(Path.cwd().parent / 'data' / 'interim' / 'rental.csv', low_memory=False)
openings = pd.read_csv(Path.cwd().parent / 'data' / 'interim' / 'openings.csv')
virtual_area_centers = pd.read_csv(Path.cwd().parent / 'data' / 'processed' / 'areas.csv', index_col=0)

In [9]:
# Area centers based on current areas
area_centers = rental.groupby('Start_Zone_Name').mean()[['Start_GPS_Latitude','Start_GPS_Longitude']]
area_centers.rename(columns={
    'Start_GPS_Latitude': 'GPS_Latitude', 
    'Start_GPS_Longitude': 'GPS_Longitude'}, inplace=True)
area_centers.index.names = ['Area']

In [10]:
openings['Created_Datetime_Local'] = pd.to_datetime(openings['Created_Datetime_Local'], format='%Y-%m-%d %H:%M')
openings = pd.get_dummies(openings, columns=['Platform'], drop_first=True)

In [11]:
rental['Start_Datetime_Local'] = pd.to_datetime(rental['Start_Datetime_Local'], format='%Y-%m-%d %H:%M')
rental['End_Datetime_Local'] = pd.to_datetime(rental['End_Datetime_Local'], format='%Y-%m-%d %H:%M')
rental = pd.get_dummies(rental, columns=['Vehicle_Engine_Type'], drop_first=True)
rental = pd.get_dummies(rental, columns=['Vehicle_Model'])
rental['VZE_ori'] = rental['Virtual_End_Zone_Name']
rental = pd.get_dummies(rental, columns=['Virtual_End_Zone_Name'])

In [12]:
time_start = max(rental['Start_Datetime_Local'].min(), openings['Created_Datetime_Local'].min())
time_end = min(rental['End_Datetime_Local'].max(), openings['Created_Datetime_Local'].max())
print('Time limits:', time_start, 'to', time_end)
total_time = time_end-time_start

Time limits: 2020-02-01 00:56:26 to 2021-05-03 23:59:51


In [16]:
time_step=timedelta(hours=1)

In [17]:
vehicles = rental.columns[rental.columns.str.contains('Vehicle_Model')] # Get names of vehicles
timepoints = np.arange(time_start, time_end, time_step).astype(datetime)
time_window = timedelta(minutes=15)
cars = pd.unique(rental['Vehicle_Number_Plate'])
indeces = list(product(timepoints, cars))

In [None]:
def demand(idx):
    # Auxiliary method for __getitem__. Uses array timepoint as a index. Returns the demand of all areas at some point in time.
    dem = openings[(openings['Created_Datetime_Local'] > timepoints[idx]-time_window) &
    (openings['Created_Datetime_Local'] <= timepoints[idx])].copy()
    if len(dem) == 0:
        return pd.Series(data=0, index=np.arange(len(virtual_area_centers)))
    else:
        dem[virtual_area_centers.index.values] = 0 # Create columns with area names
        di = dem.apply(lambda x: coords_to_areas(x), axis=1)
        dem[virtual_area_centers.index.values] =  di # Apply function to all openings
        return dem.loc[:,virtual_area_centers.index].sum(axis=0) # Aggregate demand in the time window over areas (.loc to remove gps coords and platform). Sum of demand equals to amount of app openings

In [None]:
# Function that returns the location of all parked vehicles at any datetime. remove_in_use decides to remove vehicles in transit or keep them and pick their last location
def vehicle_locations(idx):
    loc = rental[rental['End_Datetime_Local'] <= timepoints[idx]]
    loc = loc.drop_duplicates(subset='Vehicle_Number_Plate', keep='last') # Keep the last location
    current_trips = rental[(rental['Start_Datetime_Local'] <= timepoints[idx]) & (rental['End_Datetime_Local'] > timepoints[idx])] # Cars in use
    loc = loc[~loc['Vehicle_Number_Plate'].isin(current_trips['Vehicle_Number_Plate'])] # Filter out cars in use
    #loc = loc.loc[:, ~loc.columns.str.contains('Start')].drop(columns=['End_Datetime_Local'], axis=1) # Drop unused columns
    #loc = loc.groupby('Virtual_End_Zone_Name')[vehicles].sum() # Aggregate amount of cars
    #missing_areas = pd.DataFrame(index=virtual_area_centers.index[~virtual_area_centers.index.isin(loc.index)], columns=loc.columns, data=0)
    #loc = pd.melt(pd.concat([loc, missing_areas]), ignore_index=False) # Add missing areas and unpivot
    #loc.index = loc.index.astype('str')+loc.variable # Join zone and vehicle model, necessary to sort
    #return torch.tensor(loc.drop(labels='variable', axis=1).sort_index().values).squeeze() # Drop vehicle model (already in index) and sort
    loc['Time'] = timepoints[idx]
    return loc

In [None]:
cols = np.append(rental.columns[(rental.columns.str.contains('Plate') | rental.columns.str.contains('Vehicle_Model') | rental.columns.str.contains('Virtual'))].values, 'Time')

In [None]:
a = vehicle_locations(0).loc[:,cols]

In [None]:
b = vehicle_locations(1).loc[:,cols]

In [None]:
c=a.append(b)
c.index = pd.MultiIndex.from_frame(c.loc[:,['Time', 'Vehicle_Number_Plate']], names=['Time', 'Vehicle_Number_Plate'])
c.drop(labels=['Vehicle_Number_Plate', 'Time'], axis=1, inplace=True)

In [None]:
try:
    print(c.loc[timepoints[0], 'BZ20249'])
except KeyError:
    None

In [None]:
def state(idx):
    # Auxiliary method for __getitem__. Joins vehicle locations and demand
    dem = demand(idx)
    loc = vehicle_locations(idx)
    return torch.hstack((torch.tensor(timepoints[idx].month), torch.tensor(timepoints[idx].day), torch.tensor(timepoints[idx].hour), dem, loc))

In [None]:
state(20).shape

In [None]:
#def test(idx):
#    # Auxiliary method for __getitem__. Calculates actions
#    a = rental[(rental['Servicedrive_YN']==1) &
#                    (rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) &
#                    (rental['Start_Datetime_Local'] < timepoints[idx])]
#    a = a[a['Virtual_Start_Zone_Name'] != a['Virtual_End_Zone_Name']]
#    a = a.loc[:, [*vehicles, 'Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name', 'Servicedrive_YN']]
#    a = pd.melt(a, id_vars=['Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name'], value_vars=[*vehicles])
#    a = a[a.value>0]
#    return a
#ac = []
#for i in tqdm(range(1000)):
#    ac.append(len(test(i)))
#bins = np.arange(0, 10, 1) # fixed bin size
#plt.xlim([0, 10])
#plt.hist(ac, bins=bins, density=True)
#plt.show()

In [13]:
n_actions = 5
n_areas = len(virtual_area_centers)

In [28]:
def actions(idx):
    ad = rental[(rental['Servicedrive_YN']==1) &
                    (rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) &
                    (rental['Start_Datetime_Local'] < timepoints[idx])]
    ad = ad[ad['Virtual_Start_Zone_Name'] != ad['VZE_ori']]
    #ad = np.reshape(ad.to_numpy(), (-1, ad.shape[1]))
    #a = np.zeros((n_actions, ad.shape[1]), dtype=np.int8)
    #a[:ad.shape[0]] = ad
    #a[ad.shape[0]:, [0, -2*n_areas, -n_areas]] = 1
    #return torch.from_numpy(a)
    ad['Time'] = timepoints[idx]
    return ad

In [37]:
actions(0)[:1]

Unnamed: 0,Vehicle_Number_Plate,Revenue_Net,Start_Datetime_Local,End_Datetime_Local,Start_GPS_Latitude,Start_GPS_Longitude,End_GPS_Latitude,End_GPS_Longitude,Package_Description,Operation_State_Name_Before,...,Virtual_End_Zone_Name_41,Virtual_End_Zone_Name_42,Virtual_End_Zone_Name_43,Virtual_End_Zone_Name_44,Virtual_End_Zone_Name_45,Virtual_End_Zone_Name_46,Virtual_End_Zone_Name_47,Virtual_End_Zone_Name_48,Virtual_End_Zone_Name_49,Time


In [None]:
cols_act = np.append(rental.columns[(rental.columns.str.contains('Plate') | rental.columns.str.contains('Virtual_End_Zone_Name_'))].values, 'Time')

In [None]:
b=a.append(actions(206))[cols_act]
b.index = pd.MultiIndex.from_frame(b.loc[:,['Time', 'Vehicle_Number_Plate']])
b.drop(labels=['Time', 'Vehicle_Number_Plate'], axis=1, inplace=True)

In [None]:
b

In [None]:
indeces[217360]

In [None]:
b.loc[indeces[217360]]

In [None]:
def revenue(idx):
    # Auxiliary method for __getitem__. Uses array timepoint as a index.
    trips_in_window = rental[(rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) & (rental['End_Datetime_Local'] < timepoints[idx])]
    return torch.tensor(trips_in_window['Revenue_Net'].sum())

In [None]:
pd.options.mode.chained_assignment = None
def item(idx):
    s = state(idx) # Returns position of cars in timepoint idx and demand between idx-timedelta and idx
    a = actions(idx) # Returns end position of cars due to service trips within idx-timedelta (only moved cars)
    s1 = state(idx+1) # Returns position of cars in timepoint idx+1 and demand between idx+1-timedelta and idx+1
    r = revenue(idx) # Returns total revenue between idx-timedelta and idx
    return s, a, s1, r