In [1]:
import os
import json
import random
import pandas as pd
import numpy as np
import pickle as pkl
from time import time
import matplotlib.pyplot as plt
import seaborn
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta
from functools import partial
from multiprocessing import Pool
from collections import Counter

In [2]:
SHIFT_MINS = 12 * 60
DAY_MINS = 24 * 60
WEEK_MINS = 7 * DAY_MINS

In [33]:
df = pd.read_feather('data/all.feather')

  labels, = index.labels


In [34]:
df['pickup'] = (df.pickupX * 141 + df.pickupY).astype(np.uint16)
df['dropoff'] = (df.dropoffX * 141 + df.dropoffY).astype(np.uint16)
df['earning'] = (df['fare_amount'] + df['tip_amount']).astype(np.float32)
df.drop(['doy', 'woy', 'payment_type', 'dropoffX', 'dropoffY', 'pickupX', 'pickupY', 'fare_amount', 'tip_amount'], inplace=True, axis=1)

In [35]:
def idx2int(t):
    return t[0] * 141 + t[1]

with open(os.path.join(".", "provided_files", "shapefiles", "game_board_2019.geojson")) as fp:
    board = json.load(fp)
idToX = {}
idToY = {}
nbs = {}
for cell in board['features']:
    cellID = cell['properties']['id']
    cellTuple = tuple(map(int, cellID.split(':')))
    idToX[cellID], idToY[cellID] = cellTuple
    nbs[cellTuple] = [tuple(map(int, nb.split(':'))) for nb in cell['properties']['neighbours']]
    
with open('provided_files/Players/Greedy/refinedManhattan.pickle', 'rb') as handle:
    M = pkl.load(handle)

with open('provided_files/Players/Greedy/refinedJFK.pickle', 'rb') as handle:
    J = pkl.load(handle)

with open('provided_files/Players/Greedy/refinedLaguardia.pickle', 'rb') as handle:
    L = pkl.load(handle)    

greedy_dest = {}
for c in nbs:
    best = []
    for cell in nbs[c]:
        lowest_dist = (cell, min(M[cell], J[cell], L[cell]))
        best.append(lowest_dist)
    best = sorted(best, key=lambda x: x[1])[0][0]
    greedy_dest[idx2int(c)] = idx2int(best)

In [52]:
def dp(start_time, groups, counts, pfind):
    """
    Finds the exepected earnings of greedy agent.
    start_time : Shift start time, as minute of week (mow).
    group      : groupby object (by pickup mow)
    counts     : a dictionary of counting the number of trips with given pickup mow and cell
    pfind      : dictionary of probability of finding a trip at (pickup mow, pickup cell)
    """
    
    # expected earnings until end of shift
    reward = [Counter() for _ in range(SHIFT_MINS + 1)]
    
    for start in range(SHIFT_MINS - 1, -1, -1):
        curr_reward = reward[start]
        next_reward = reward[start + 1]
        # start is trip start time relative to shift
        # abs_start is trip start time relative to monday 12am
        abs_start = start_time + start
        
        curr_sub = groups.get_group(abs_start)
        for pu, dur, do, earn in zip(curr_sub.pickup, curr_sub.duration, curr_sub.dropoff, curr_sub.earning):
            # end is trip end time relative to shift
            end = start + int(dur)
            # combine current trip earnings and expected earnings of the destination state
            try:
                curr_reward[pu] += earn + (reward[end][do] if end < SHIFT_MINS else 0)
            except TypeError:
                print(pu, end, do)
                return
            
        # curr_reward current is a sum, needs to be normalised
        for pu, e in curr_reward.items():
            # expected earnings if no trip found
            nb_reward = next_reward[greedy_dest[pu]]
            # combine earnings if trip found or if no trip found
            curr_reward[pu] = (
                pfind[(abs_start, pu)] * e / counts[(abs_start, pu)]
                + (1 - pfind[(abs_start, pu)]) * nb_reward
            )
        print(reward[start][25*141+68])
    # the magic cell 25:68
    ans = reward[0][25 * 141 + 68]
    print(f'day {start_time // (24 * 60) % 7} {start_time // 60 % 24:02d}:{start_time % 60:02d} ${ans:.2f}')
    return reward

In [53]:
# count number of day of week
f_dow = [0] * 7
d = start_date = date(2015, 7, 1)
end_date = date(2017, 6, 30)
while d < end_date:
    d += timedelta(days=1)
    f_dow[d.weekday()] += 1
f_dow

[104, 104, 104, 105, 105, 104, 104]

In [60]:
counts = df.groupby(['mow', 'pickup']).size()

In [55]:
# is this logic correct? probability of getting a trip is counting all trips and dividing by the number of days
pfind = Counter()
for k, v in counts.items():
    pfind[k] = min(1, v / f_dow[k[0] // DAY_MINS])

In [56]:
groups = df.groupby('mow')

In [57]:
%%time
reward = dp(720, groups, counts, pfind)

day 0 12:00 $350.00
CPU times: user 52.7 s, sys: 4.6 s, total: 57.3 s
Wall time: 58.8 s


In [70]:
sorted(reward[0].items(), key=lambda x:x[1], reverse=True)

[(3602, 357.76257131612954),
 (3170, 357.7260110934229),
 (3588, 356.87032574921386),
 (3584, 356.26493284985906),
 (3868, 355.8548787087542),
 (3446, 355.823778499238),
 (3025, 355.7652031517226),
 (3456, 355.52688826778916),
 (3173, 355.27659951355463),
 (3744, 355.20401242204997),
 (3315, 354.74570017577287),
 (3029, 354.53027419370744),
 (3172, 354.47266157450844),
 (3164, 354.260634222503),
 (3725, 354.2275981640385),
 (3306, 354.1525511015002),
 (3166, 354.095536994857),
 (3304, 354.06748082054173),
 (3458, 353.9052927891596),
 (3311, 353.8596321847752),
 (3724, 353.70601031718206),
 (3445, 353.6930262214071),
 (3865, 353.5686794252781),
 (3739, 353.53022174458584),
 (3163, 353.52728738314954),
 (3169, 353.47144580647586),
 (3596, 353.32950067163455),
 (4151, 353.31156472118084),
 (3167, 353.1572037033363),
 (3454, 352.99965723246845),
 (3305, 352.8125330537452),
 (3442, 352.68683197930454),
 (3585, 352.65281310962575),
 (3595, 352.51122767290025),
 (3171, 352.4147876242021),
 (3

# Alt

In [82]:
def dp(start_time, groups, counts, pfind):
    """
    Finds the exepected earnings of greedy agent.
    start_time : Shift start time, as minute of week (mow).
    group      : groupby object (by pickup mow)
    counts     : a dictionary of counting the number of trips with given pickup mow and cell
    pfind      : dictionary of probability of finding a trip at (pickup mow, pickup cell)
    """
    
    # expected earnings until end of shift
    reward = [Counter() for _ in range(SHIFT_MINS + 1)]
    
    for start in range(SHIFT_MINS - 1, -1, -1):
        curr_reward = reward[start]
        next_reward = reward[start + 1]
        # start is trip start time relative to shift
        # abs_start is trip start time relative to monday 12am
        abs_start = start_time + start
        
        curr_sub = groups.get_group(abs_start)
        for pu, dur, do, earn in zip(curr_sub.pickup, curr_sub.duration, curr_sub.dropoff, curr_sub.earning):
            # end is trip end time relative to shift
            end = start + int(dur)
            # combine current trip earnings and expected earnings of the destination state
            curr_reward[pu] += earn + (reward[end][do] if end < SHIFT_MINS else 0)
            
        # curr_reward current is a sum, needs to be normalised
        for pu, e in curr_reward.items():
            # expected earnings if no trip found
            nb_reward = next_reward[greedy_dest[pu]]
            # combine earnings if trip found or if no trip found
            curr_reward[pu] = e / counts[(abs_start, pu)]
            curr_reward[pu] = (
                pfind[(abs_start, pu)] * e / counts[(abs_start, pu)]
                + (1 - pfind[(abs_start, pu)]) * nb_reward
            )
    # the magic cell 25:68
    ans = reward[0][25 * 141 + 68]
    print(f'day {start_time // (24 * 60) % 7} {start_time // 60 % 24:02d}:{start_time % 60:02d} ${ans:.2f}')
    return reward

In [83]:
counts = df.groupby(['mow', 'pickup']).size()

In [84]:
# is this logic correct? probability of getting a trip is counting all trips and dividing by the number of days
pfind = Counter()
for k, v in counts.items():
    pfind[k] = min(1, v / f_dow[k[0] // DAY_MINS])

In [85]:
groups = df.groupby('mow')

In [86]:
%%time
reward2 = dp(720, groups, counts, pfind)

13.800952370204623
14.101359809675406
14.272680767577702
15.058025135511192
16.1093705556807
16.881986577884234
18.031794764996167
18.70469795760292
20.509849208575552
21.71630009060914
22.56158408844475
24.031768510661625
25.06072384012516
26.323747989859523
27.41945191807056
28.51809104623782
30.540375639814517
30.894351421612363
31.911152309860203
33.078279338373044
34.00638845181013
35.96420336559308
36.691399001150444
37.713063425260394
38.99613298370427
40.08145998662234
41.33245455042527
42.37505371950337
43.33651524842458
44.8933207241232
46.13147895459094
47.085679422755526
48.07615994273939
49.259432580130905
49.92636822626993
50.966279134433506
52.432906026958875
53.030957223852155
53.597868554114946
55.21314122741628
56.62589024064753
56.980224042983224
58.11276048091226
59.72263848867666
60.59758853847032
61.870625291424645
62.319559530715736
62.97706171299654
64.12675414130585
65.27167421580086
66.30839105385847
67.1685287626174
68.07345570274
68.80041676102371
69.9585058

In [88]:
sorted(reward2[0].items(), key=lambda x:x[1], reverse=True)

[(4577, 492.8800478970072),
 (4717, 487.8644641092548),
 (7101, 483.2303532331198),
 (8657, 478.80066847918545),
 (8515, 478.6526687751488),
 (7247, 478.16937919150394),
 (6961, 478.09588921288434),
 (5561, 475.36015323444536),
 (6818, 475.22984562093313),
 (8091, 474.6665249492274),
 (9507, 473.7312325877339),
 (9223, 472.7765658263452),
 (9788, 471.94937277153963),
 (6148, 468.7944483767219),
 (5711, 467.00668711315683),
 (4426, 466.69522979929116),
 (9226, 464.34470224991946),
 (4132, 463.6555916792638),
 (6541, 463.0026187547111),
 (9368, 462.5068637840872),
 (9505, 461.1576808344955),
 (9082, 459.6395912693704),
 (5272, 459.3794995260727),
 (5131, 459.3524978144593),
 (8798, 459.0639116288775),
 (3999, 457.9969006456044),
 (9646, 456.93638307292764),
 (8231, 456.087530838484),
 (9647, 455.55210079963877),
 (3009, 455.0495929449552),
 (3146, 454.5705875393754),
 (5974, 454.12476836728007),
 (3002, 453.77335296017407),
 (3288, 452.98491587234935),
 (4732, 449.64343215008114),
 (4995