In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
from pathlib import Path
from typing import Tuple, List, Callable

import numpy as np
import pandas as pd
import torch
import glob
from datetime import datetime, timedelta
from pyproj import Geod
from tqdm import tqdm
import matplotlib.pyplot as plt
import random


import folium
import folium.plugins

In [3]:
wgs84_geod = Geod(ellps='WGS84') # Distance will be measured in meters on this ellipsoid - more accurate than a spherical method

def coords_to_areas(target): # Calculate to which area an opening's coordinates (target) "belong to"
    _,_,dist = wgs84_geod.inv(virtual_area_centers['GPS_Latitude'], virtual_area_centers['GPS_Longitude'], np.full(len(virtual_area_centers),target['GPS_Latitude']), np.full(len(virtual_area_centers), target['GPS_Longitude']))
    return pd.Series((1 - dist / sum(dist)) / (len(dist) - 1)) # Percentage of how much an opening belongs to each area

In [4]:
rental = pd.read_csv(Path.cwd().parent / 'data' / 'processed' / 'rental.csv', low_memory=False)
openings = pd.read_csv(Path.cwd().parent / 'data' / 'processed' / 'openings.csv')
virtual_area_centers = pd.read_csv(Path.cwd().parent / 'data' / 'processed' / 'areas.csv', index_col=0)

In [5]:
# Area centers based on current areas
area_centers = rental.groupby('Start_Zone_Name').mean()[['Start_GPS_Latitude','Start_GPS_Longitude']]
area_centers.rename(columns={
    'Start_GPS_Latitude': 'GPS_Latitude', 
    'Start_GPS_Longitude': 'GPS_Longitude'}, inplace=True)
area_centers.index.names = ['Area']

In [6]:
openings['Created_Datetime_Local'] = pd.to_datetime(openings['Created_Datetime_Local'], format='%Y-%m-%d %H:%M')
openings = pd.get_dummies(openings, columns=['Platform'], drop_first=True)

In [7]:
rental['Start_Datetime_Local'] = pd.to_datetime(rental['Start_Datetime_Local'], format='%Y-%m-%d %H:%M')
rental['End_Datetime_Local'] = pd.to_datetime(rental['End_Datetime_Local'], format='%Y-%m-%d %H:%M')
rental = pd.get_dummies(rental, columns=['Vehicle_Engine_Type'], drop_first=True)
rental = pd.get_dummies(rental, columns=['Vehicle_Model'])
one_hot_zones = pd.get_dummies(rental.loc[:,['Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name']], columns=['Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name'])
rental = pd.concat([rental, one_hot_zones], axis=1)

In [8]:
time_start = max(rental['Start_Datetime_Local'].min(), openings['Created_Datetime_Local'].min())
time_end = min(rental['End_Datetime_Local'].max(), openings['Created_Datetime_Local'].max())
print('Time limits:', time_start, 'to', time_end)
total_time = time_end-time_start

Time limits: 2020-02-01 00:56:26 to 2021-05-03 23:59:51


In [9]:
time_step=timedelta(hours=1)

In [15]:
vehicles = rental.columns[rental.columns.str.contains('Vehicle_Model')] # Get names of vehicles
timepoints = np.arange(time_start, time_end, time_step).astype(datetime)
time_window = timedelta(minutes=15)
cars = pd.unique(rental['Vehicle_Number_Plate'])

In [60]:
def demand(idx):
    # Auxiliary method for __getitem__. Uses array timepoint as a index. Returns the demand of all areas at some point in time.
    dem = openings[(openings['Created_Datetime_Local'] > timepoints[idx]-time_window) &
    (openings['Created_Datetime_Local'] <= timepoints[idx])].copy()
    if len(dem) == 0:
        return pd.Series(data=0, index=np.arange(len(virtual_area_centers)))
    else:
        dem[virtual_area_centers.index.values] = 0 # Create columns with area names
        di = dem.apply(lambda x: coords_to_areas(x), axis=1)
        dem[virtual_area_centers.index.values] =  di # Apply function to all openings
        return dem.loc[:,virtual_area_centers.index].sum(axis=0) # Aggregate demand in the time window over areas (.loc to remove gps coords and platform). Sum of demand equals to amount of app openings

In [79]:
a = pd.DataFrame(demand(559)).T

In [88]:
b = a.append(demand(560), ignore_index=True)

In [89]:
b.set_index()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.060522,0.060529,0.056539,0.060953,0.05919,0.058573,0.059399,0.060783,0.060536,0.060153,...,0.059988,0.061037,0.060049,0.059675,0.060536,0.060087,0.060782,0.059525,0.060833,0.060834
1,0.060511,0.059905,0.0579,0.060008,0.06116,0.058798,0.059218,0.06021,0.060138,0.059907,...,0.060796,0.060237,0.059571,0.059437,0.060404,0.059766,0.06034,0.061023,0.06032,0.059962


In [12]:
# Function that returns the location of all parked vehicles at any datetime. remove_in_use decides to remove vehicles in transit or keep them and pick their last location
def vehicle_locations(idx):
    loc = rental[rental['End_Datetime_Local'] <= timepoints[idx]]
    loc = loc.drop_duplicates(subset='Vehicle_Number_Plate', keep='last') # Keep the last location
    current_trips = rental[(rental['Start_Datetime_Local'] <= timepoints[idx]) & (rental['End_Datetime_Local'] > timepoints[idx])] # Cars in use
    loc = loc[~loc['Vehicle_Number_Plate'].isin(current_trips['Vehicle_Number_Plate'])] # Filter out cars in use
    loc = loc.loc[:, ~loc.columns.str.contains('Start')].drop(columns=['End_Datetime_Local'], axis=1) # Drop unused columns
    loc = loc.groupby('Virtual_End_Zone_Name')[vehicles].sum() # Aggregate amount of cars
    missing_areas = pd.DataFrame(index=virtual_area_centers.index[~virtual_area_centers.index.isin(loc.index)], columns=loc.columns, data=0)
    loc = pd.melt(pd.concat([loc, missing_areas]), ignore_index=False) # Add missing areas and unpivot
    loc.index = loc.index.astype('str')+loc.variable # Join zone and vehicle model, necessary to sort
    return torch.tensor(loc.drop(labels='variable', axis=1).sort_index().values).squeeze() # Drop vehicle model (already in index) and sort

In [16]:
from itertools import product

In [30]:
index = list(product(timepoints, cars))

In [42]:
len(timepoints)

10992

In [41]:
p[0][0]

datetime.datetime(2020, 2, 1, 0, 56, 26)

In [14]:
def state(idx):
    # Auxiliary method for __getitem__. Joins vehicle locations and demand
    dem = demand(idx)
    loc = vehicle_locations(idx)
    return torch.hstack((torch.tensor(timepoints[idx].month), torch.tensor(timepoints[idx].day), torch.tensor(timepoints[idx].hour), dem, loc))

In [15]:
state(20).shape

torch.Size([603])

In [16]:
#def test(idx):
#    # Auxiliary method for __getitem__. Calculates actions
#    a = rental[(rental['Servicedrive_YN']==1) &
#                    (rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) &
#                    (rental['Start_Datetime_Local'] < timepoints[idx])]
#    a = a[a['Virtual_Start_Zone_Name'] != a['Virtual_End_Zone_Name']]
#    a = a.loc[:, [*vehicles, 'Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name', 'Servicedrive_YN']]
#    a = pd.melt(a, id_vars=['Virtual_Start_Zone_Name', 'Virtual_End_Zone_Name'], value_vars=[*vehicles])
#    a = a[a.value>0]
#    return a
#ac = []
#for i in tqdm(range(1000)):
#    ac.append(len(test(i)))
#bins = np.arange(0, 10, 1) # fixed bin size
#plt.xlim([0, 10])
#plt.hist(ac, bins=bins, density=True)
#plt.show()

In [17]:
n_actions = 5
n_areas = len(virtual_area_centers)

In [33]:
def actions(idx):
    ad = rental[(rental['Servicedrive_YN']==1) &
                    (rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) &
                    (rental['Start_Datetime_Local'] < timepoints[idx])]
    ad = ad[ad['Virtual_Start_Zone_Name'] != ad['Virtual_End_Zone_Name']].iloc[:,19:-1]
    ad = np.reshape(ad.to_numpy(), (-1, ad.shape[1]))
    a = np.zeros((n_actions, ad.shape[1]), dtype=np.int8)
    a[:ad.shape[0]] = ad
    a[ad.shape[0]:, [0, -2*n_areas, -n_areas]] = 1
    return torch.from_numpy(a)

In [34]:
a = actions(200)

In [35]:
a

tensor([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     

In [89]:
a[torch.argmax(a[:, -2*n_areas:-n_areas], dim=1) == torch.argmax(a[:, -n_areas:], dim=1)] = 1

In [17]:
def revenue(idx):
    # Auxiliary method for __getitem__. Uses array timepoint as a index.
    trips_in_window = rental[(rental['Start_Datetime_Local'] >= timepoints[idx]-time_window) & (rental['End_Datetime_Local'] < timepoints[idx])]
    return torch.tensor(trips_in_window['Revenue_Net'].sum())

In [18]:
pd.options.mode.chained_assignment = None
def item(idx):
    s = state(idx) # Returns position of cars in timepoint idx and demand between idx-timedelta and idx
    a = actions(idx) # Returns end position of cars due to service trips within idx-timedelta (only moved cars)
    s1 = state(idx+1) # Returns position of cars in timepoint idx+1 and demand between idx+1-timedelta and idx+1
    r = revenue(idx) # Returns total revenue between idx-timedelta and idx
    return s, a, s1, r

In [19]:
item(2000)

UnboundLocalError: local variable 'dist' referenced before assignment

In [None]:
s1, a1, *_ = item(8)
s1