# Coding challenge

In [1]:
import pandas as pd
import numpy as np

In [2]:
LABEL = [
    ('rented', 1),
    ('available', 0),
    ('unavailable', -1)
]

In [6]:
lab_to_v = dict(LABEL)
v_to_lab = {l[1]: l[0] for l in LABEL}

In [155]:
cars_df = pd.read_csv('../data/cars.csv', index_col='id', parse_dates=['created_at'])
rentals_df = pd.read_csv('../data/rentals.csv', index_col='id', parse_dates=['starts_at', 'ends_at'])
unavailabilities_df =  pd.read_csv('../data/unavailabilities.csv', index_col='id', parse_dates=['starts_at', 'ends_at'])

In [157]:
# (level 2) propagate last valid created_at forward to next valid created_at
cars_df.created_at.fillna(method='pad', inplace=True)

In [158]:
# get the list of halfdays for 2015
year_2015 = pd.date_range('2015-1-1 00:00:00', '2015-12-31 12:00:00', freq='12H').values

In [159]:
def initialise(start, mask):
    return np.where((mask < start), lab_to_v['unavailable'], lab_to_v['available'])

In [199]:
%%time
cars_df['availabilities'] = [initialise(start, year_2015) for start in cars_df.created_at.values]

CPU times: user 1.02 s, sys: 0 ns, total: 1.02 s
Wall time: 1.02 s


In [161]:
def mask_period(start, end, mask):
    return ((mask > start) & (mask < end))

In [214]:
# make a boolean list per car with true if rented anf false if not rented
%%time
cars_df['rented'] = rentals_df.groupby('car_id').apply(
    lambda car: np.vstack([
        mask_period(start, end, year_2015)
        for start, end in zip(car.starts_at.values, car.ends_at.values)
    ]).any(axis=0)
)

CPU times: user 44.9 s, sys: 124 ms, total: 45.1 s
Wall time: 45.1 s


In [215]:
# make a boolean list per car with true if unavailable anf false if not unavailable
%%time
cars_df['unavailable'] = unavailabilities_df.groupby('car_id').apply(
    lambda car: np.vstack([
        mask_period(start, end, year_2015)
        for start, end in zip(car.starts_at.values, car.ends_at.values)
    ]).any(axis=0)
)

CPU times: user 36.8 s, sys: 68 ms, total: 36.9 s
Wall time: 36.9 s


In [227]:
def merge_availabilities(availabilities, rented, unavailable):
    if not np.any(pd.isnull(rented)):
        availabilities[rented] = lab_to_v['rented']
    if not np.any(pd.isnull(unavailable)):
        availabilities[unavailable] = lab_to_v['unavailable']
    return availabilities

In [230]:
#update the availabilities with the rented and unavailable
%%time
cars_df.availabilities = [
    merge_availabilities(availabilities, rented, unavailable)
    for availabilities, rented, unavailable in zip(cars_df.availabilities, cars_df.rented, cars_df.unavailable)
]

CPU times: user 8.95 s, sys: 4 ms, total: 8.96 s
Wall time: 9.05 s


In [278]:
cars_state_df = pd.DataFrame(np.vstack(cars_df.availabilities.values), columns=year_2015, index=cars_df.index) \
    .applymap(lambda v: v_to_lab[v])

In [282]:
cars_state_df.T.to_csv('output/cars_half_day_state.csv')

# Level 3

I agragate the care from the same city and I do the occupation rate per half day. for the weekly occupation I aggregate the halfday occupation rate and get the average.

In [240]:
def occupacy_rate(availabilities, time_range = False):
    with np.errstate(divide='ignore', invalid='ignore'):
        res = (availabilities == 1).sum(axis=0) / (availabilities != -1).sum(axis=0)
    return res

In [248]:
city_half_day_occupation_rate_series = cars_df.groupby('city').apply(
    lambda city: occupacy_rate(np.vstack(city.availabilities))
)

In [262]:
city_half_day_occupation_rate_df = pd.DataFrame(
    np.vstack(city_half_day_occupation_rate_series.values), 
    columns=year_2015, 
    index=city_half_day_occupation_rate_series.index).T

In [263]:
city_half_day_occupation_rate_df['week'] = city_half_day_occupation_rate_df.index.week

In [265]:
city_weekly_occupation_rate_df = city_half_day_occupation_rate_df.groupby('week').mean()

In [266]:
city_weekly_occupation_rate_df

city,Abbeville,Achères,Agde,Agen,Aigues-Mortes,Aix-en-Provence,Aix-les-Bains,Ajaccio,Albertville,Albi,...,Épinal,Épinay-sous-Sénart,Épinay-sur-Orge,Épinay-sur-Seine,Éragny,Étampes,Évian-les-Bains,Évreux,Évron,Évry
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.239662,0.279545,0.209163,0.191923,0.27381,0.199723,0.299232,0.184632,0.0625,0.245145,...,0.231731,0.083333,0.292614,0.353365,0.31994,0.163121,0.12651,0.216256,0.375,0.145057
2,0.166257,0.131097,0.153489,0.135431,0.289116,0.144564,0.184916,0.123292,0.059524,0.128773,...,0.164673,0.142857,0.285833,0.22933,0.293665,0.140977,0.064951,0.158882,0.166667,0.129985
3,0.224591,0.209235,0.152712,0.213292,0.27381,0.173727,0.152103,0.134035,0.238095,0.088079,...,0.160205,0.058107,0.141689,0.161983,0.220935,0.192169,0.178719,0.116911,0.097619,0.187872
4,0.212321,0.020238,0.156242,0.164895,0.277381,0.127453,0.129252,0.124843,0.166667,0.059054,...,0.134453,0.094246,0.222527,0.19463,0.205641,0.119072,0.153038,0.108197,0.078571,0.185221
5,0.123029,0.141342,0.122996,0.164298,0.085374,0.135906,0.058163,0.151915,0.02381,0.103314,...,0.116974,0.02381,0.127028,0.089498,0.144525,0.064599,0.097745,0.090255,0.235714,0.070125
6,0.164101,0.072348,0.21081,0.164663,0.253401,0.141741,0.071743,0.182006,0.108844,0.182473,...,0.175971,0.19657,0.151996,0.189786,0.229079,0.079259,0.200473,0.136772,0.02381,0.086273
7,0.135596,0.179293,0.192973,0.180152,0.389456,0.20554,0.210518,0.180673,0.061905,0.176433,...,0.216611,0.391865,0.145778,0.290816,0.237752,0.237666,0.172567,0.177593,0.160714,0.192069
8,0.150675,0.099567,0.118242,0.162898,0.354762,0.208184,0.294823,0.161689,0.412245,0.160284,...,0.198014,0.091837,0.103306,0.150654,0.140769,0.162107,0.160201,0.143904,0.130952,0.213807
9,0.190909,0.119048,0.202596,0.131247,0.231293,0.152817,0.197491,0.158766,0.307823,0.161307,...,0.196443,0.090873,0.23785,0.050508,0.154572,0.183225,0.070343,0.13087,0.164286,0.14846
10,0.138945,0.174026,0.20481,0.168961,0.215986,0.176371,0.105902,0.166159,0.102041,0.201337,...,0.173992,0.224603,0.190114,0.114411,0.206237,0.122804,0.172273,0.18126,0.44881,0.278814
