In [3]:
%reset
import importlib
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt
import time
import networkx as nx
import osmnx as ox
import seaborn as sns
import json
import numpy as np
from gurobipy import *
rnd = np.random
import matplotlib.pyplot as plt
os.chdir("C:/Users/Rick/Desktop/Python Code/FMLM_data_code/")

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [4]:
%%time
# ---------------------------- Load Data ----------------------------------------------

#    Load in all the trajectory data for May 1, 2019 - Aug 1, 2019
#    Load ridership data for 07_15_19

# -------------------------------------------------------------------------------------
riderFile = 'riderData_07_15_19.csv'
travelTimesFile = '3e_medTravTimes.csv'

def loadTravTimesForRuleBasedSimulation(travelTimesFile):
    travTimes = pd.read_csv(travelTimesFile)
    travTimes = travTimes.drop(['Unnamed: 0'], axis = 1)
    travTimes['median_x'] = travTimes['median_x'].astype(int)
    new = travTimes['OD_string'].str.split(', ', n=1, expand=True)
    travTimes['POI'] = new[0].astype(int)
    travTimes['dest_POI'] = new[1].astype(int)
    return travTimes

def createGraph(travTimes, currentTimePeriod):
    baseTime = '6:00:00'
    # create column of time differences between current time and travel time
    travTimes.loc[:,'currentTime'] = abs((pd.to_timedelta(currentTimePeriod).total_seconds() - \
                                                pd.to_timedelta(baseTime).total_seconds()) - \
                                               (pd.to_timedelta(travTimes.loc[:,'timePeriod']).dt.total_seconds() - \
                                                pd.to_timedelta(baseTime).total_seconds()))
    # Find the closest time (either before or after) to determine travel time between two given nodes
    a = travTimes.groupby(['OD_string']).apply(lambda travTimes: travTimes['currentTime'].idxmin())
    # Filter the travel times based on the time-dependent travel times
    timeDependentTravTimes = travTimes.loc[a, :].reset_index(drop=True)
    # Create the graph for the specific time-dependent travel times
    G = nx.from_pandas_edgelist(timeDependentTravTimes, source = 'POI', target = 'dest_POI', \
                                edge_attr = 'median_x', create_using = nx.DiGraph())
    return G

# Create time dependent network from median travel times
def createTravTimeDict(travTimes):
    timeDepTravTime = dict()
    timeList1 = [f'0{hour}:{minute}:00' for hour in list(range(6,10)) for minute in ('00','15','30','45')]
    timeList2 = [f'{hour}:{minute}:00' for hour in list(range(10,24)) for minute in ('00','15','30','45')]
    timeList3 = timeList1 + timeList2
    for currentTime in timeList3:
        G = createGraph(travTimes, currentTime)
        timeDepTravTime[currentTime] = G
    return timeDepTravTime

riderData = pd.read_csv('riderData_07_15_19.csv')
travTimes = loadTravTimesForRuleBasedSimulation(travelTimesFile)
timeDepTravTime = createTravTimeDict(travTimes)
newDF = riderData[['ride_id', 'orig_sec']].copy()
origTimeDict = newDF.set_index('ride_id').to_dict()
riderTimeDict1 = origTimeDict['orig_sec']
schedule_28x = pd.read_csv('28x_Arrival_Times.csv')
schedule_28x_inbound = pd.read_csv('28x_Departure_Times.csv')

def addVirtualRidersToTimeDict(riderTimeDict, schedule_28x):
    for i in range(len(schedule_28x)):
        riderTimeDict[schedule_28x['ride_id'].iloc[i]] = schedule_28x['Sec'].iloc[i]
    return riderTimeDict

riderTimeDict = addVirtualRidersToTimeDict(riderTimeDict1, schedule_28x)

with open('requestsDict.json', 'r') as fp:
    requestsDict = json.load(fp)

Wall time: 26.9 s


The next block contains all the functions for the RTO algorithm

In [5]:
import BASECASE_RouteSimClass as simul
from BASECASE_RouteSimClass import *
import BASECASE_RouteRiderVanClass as rv
from BASECASE_RouteRiderVanClass import *

importlib.reload(simul)
importlib.reload(rv)


# ===================== Select simulation data for format dataframe for simulation ========================
def selectSimDate(simulationDate, riderData, requestsToConsider):
    westBusNodes = [249, 197, 221, 200, 238, 124, 107, 116, 122, 124, 63, 113, 104, 227, 163, 84, 157, 110, 186, 14, 204]
    # Filter data to only include IKEA riders
    dfIKEA = riderData[(riderData['date'] == simulationDate) & \
                       #(riderData['ride_id'].isin(requestsToConsider)) & \
                       (riderData['vehicle'].isin([2008, 2064, 2068])) & \
                       (~riderData['origin_poi'].isin(westBusNodes)) & \
                       (~riderData['destination_poi'].isin(westBusNodes))].reset_index(drop=True)
    # A couple of nodes are not present in the graph, so we can replace missing nodes with nearby nodes
    dfIKEA[['origin_poi','destination_poi']] = dfIKEA[['origin_poi','destination_poi']].replace(46, 39)
    dfIKEA[['origin_poi','destination_poi']] = dfIKEA[['origin_poi','destination_poi']].replace(33, 3)
    # These nodes were only visited once during a 4 month period, so we can drop these
    dfIKEA1 = dfIKEA[(dfIKEA['origin_poi'] != 67) & (dfIKEA['destination_poi'] != 67) & \
                     (dfIKEA['origin_poi'] != 7) & (dfIKEA['destination_poi'] != 7) & \
                     (dfIKEA['origin_poi'] != 275) & (dfIKEA['destination_poi'] != 275)].copy()
    #print(f'Count of trips dropped because node was not present in graph: {len(dfIKEA) - len(dfIKEA1)}') 
    # Reformat dataframe
    dfIKEA2 = dfIKEA1[['ride_id','date','vehicle','origin_timestamp','timePeriod1','orig_sec','pending_sec', \
                       'origin_poi','destination_poi','od_pair', 'USE_orig_lat',
                       'USE_orig_long', 'USE_dest_lat', 'USE_dest_long']].sort_values(by='origin_timestamp').copy()
    return dfIKEA2

# ====================== Format rider list for simulation input =========================
def createDailyRiderList(riderData):
    riderList = []
    for i in range(len(riderData)):
        riderList.append(rider(riderData['ride_id'].iloc[i], riderData['orig_sec'].iloc[i], 
                               riderData['pending_sec'].iloc[i], riderData['origin_poi'].iloc[i], 
                               riderData['destination_poi'].iloc[i], riderData['timePeriod1'].iloc[i],
                               riderData['USE_orig_lat'].iloc[i], riderData['USE_orig_long'].iloc[i],
                               riderData['USE_dest_lat'].iloc[i], riderData['USE_dest_long'].iloc[i]))
    # create a list of riders that need to be served where riders can be removed throughout the day
    remainingRiders = riderList.copy()
    return riderList, remainingRiders

# =================== Add timed bus arrivals into rider list as virtual riders ======================
def addVirtualIkeaRiders(schedule_28x, remainingRiders):
    ikea = 3
    for i in range(len(schedule_28x)):
        remainingRiders.append(rider(schedule_28x['ride_id'].iloc[i], schedule_28x['Sec'].iloc[i], \
                                     schedule_28x['Sec'].iloc[i], ikea, ikea, schedule_28x['timePeriod'].iloc[i], \
                                     schedule_28x['lat'].iloc[i], schedule_28x['long'].iloc[i], \
                                     schedule_28x['lat'].iloc[i], schedule_28x['long'].iloc[i]))
    return remainingRiders

# ==================== Store simulation results function ========================
def storeSimulationResults(dailyRiders, riderData, date):
    # Filter original rider data
    dfVan = riderData[(riderData['date'] == date)].copy()
    dfVan['origin_timestamp'] = dfVan['origin_timestamp'].apply(pd.to_datetime, format='%Y-%m-%d %H:%M:%S')
    # Convert new dataframe from the simulation results
    a,b,c,d =[],[],[],[]
    for rider in dailyRiders:
        a.append(rider.van)
        b.append(rider.pickupTime)
        c.append(rider.dropoffTime)
        d.append(rider.rideID)
    x = pd.DataFrame(data = {'ride_id':d,'vanID':a, 'pickup_Sim':b,'dropoff_Sim':c})
    mergeDF = pd.merge(dfVan, x, how = 'left', on = 'ride_id')
    mergeDF1 = mergeDF[['ride_id','vehicle','vanID','origin_timestamp','orig_sec', 'pickup_Sim','dropoff_Sim','od_pair']]
    mergeDF1.loc[:,'waitTimeSim'] = mergeDF1['pickup_Sim'] - mergeDF1['orig_sec']
    mergeDF1.loc[:,'driveTimeSim'] = mergeDF1['dropoff_Sim'] - mergeDF1['pickup_Sim']
    return mergeDF1

# ======================== Run simulation for one day ==============================
def runDaySimulation(riderData, date, timeDepTravTime, riderTimeDict, requestsDict):
    # Format data
    resultsDF = pd.DataFrame(columns = ['ride_id','vehicle','vanID','origin_timestamp','orig_sec','pickup_Sim',\
                                        'dropoff_Sim','od_pair','waitTimeSim','driveTimeSim'])
    df = selectSimDate(date, riderData, requestsDict[date])
    print('Len Riders', len(df))
    # Define simulation inputs
    dailyRiders, remainingRiders1 = createDailyRiderList(df)
    
    # Add virtual riders at Ikea
    remainingRiders = addVirtualIkeaRiders(schedule_28x, remainingRiders1)
    
    # Define shuttle inputs
    van1_start = 6 # <-- 6am
    van2_start = 6 # <-- 6am
    van1_id = 1535
    van2_id = 1533
    ikeaVehs = dict([(van1_id, van1_start*3600), (van2_id, van2_start*3600)])
    simStartTime = 6*3600   # <-- start simulation at 6am
    simEndTime = 24*3600-60 # <-- end simulation at 12am
    
    # ===========> Come back to this <===============
    waitCoeff = 3
    driveCoeff = 1
    
    # Start simulation
    sim = Sim(simStartTime, ikeaVehs, remainingRiders, timeDepTravTime, riderTimeDict, waitCoeff, driveCoeff) 
    # Run through entire day and store outputs
    while (sim.time <= simEndTime):
        sim.activateVans()
        sim.assignRiderToVan()
        sim.nextTrip()
        sim.moveVans()
        sim.step()

    storedResults = storeSimulationResults(dailyRiders, df, date)
    return storedResults


In [6]:
#============================= Run Simulation -=================================

#         Select specific dates and hours to run the simulation

#================================================================================

simulDates = ['2019-07-15']
startHour, endHour= 6, 24

def selectDateAndHours1(data, hours, date):
    filtDf = data[(data['date'] == date) & data['orig_hour'].isin(hours)].sort_values(by='orig_sec').reset_index(drop=True).copy()
    return filtDf

resultsRTO = pd.DataFrame(columns = ['date','wait','drive'])
riderIDs = dict()

for date in simulDates:
    df1 = selectDateAndHours1(riderData, list(range(startHour,endHour)), date)
    df = df1[df1['destination_poi'] != 46].copy()
    print('n = ' + str(len(df)))
    results1 = runDaySimulation(df, date, timeDepTravTime, riderTimeDict, requestsDict)
    riderIDs[date] = list(results1['ride_id'])
    resultsRTO.loc[len(resultsRTO)] = [date, results1['waitTimeSim'].sum(), results1['driveTimeSim'].sum()]

resultsRTO['cost'] = resultsRTO['wait']*3 + resultsRTO['drive']

n = 181
Len Riders 181


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
