# Reading the data

In [1]:
import pandas as pd
import seaborn as sns
from math import ceil
from matplotlib import pyplot as plt

In [2]:
path = 'stored_data_case.h5'

In [3]:
def read_HDF_file(file_name, table):
    with pd.HDFStore(file_name, complevel=9, complib='blosc') as store:
         return store[table]

In [4]:
df_groups = pd.read_csv('residential_zones.csv')
df_groups = df_groups[['residential_area', 'demand_category', 'cluster']]

In [5]:
df_groups.head()

Unnamed: 0,residential_area,demand_category,cluster
0,10E,medium high,3
1,10F,medium high,3
2,10G,medium high,3
3,11E,medium low,4
4,11F,medium low,4


In [6]:
df = read_HDF_file(path,'/transaction_and_locations')

In [7]:
df.head()

Unnamed: 0,parkmeter_id,user_type,payment_method,amount,duration_hours,parking_start,parking_end,parkmeter_address,arrondissement,system,residential_area,hourly_rate,parkmeter_coordinates
0,66391001,Rotatif,CB,2.4,2.0,2014-09-09 14:02:44,2014-09-09 16:02:44,121 BOULEVARD MURAT,16,MIX,16Z,2.4,"48.8373519997, 2.25909199964"
1,66391001,Rotatif,Paris Carte,0.3,0.25,2014-02-09 14:18:09,2014-02-09 14:33:09,121 BOULEVARD MURAT,16,MIX,16Z,2.4,"48.8373519997, 2.25909199964"
2084,66391001,Rotatif,CB,0.6,0.5,2014-05-09 18:55:21,2014-05-09 19:25:21,121 BOULEVARD MURAT,16,MIX,16Z,2.4,"48.8373519997, 2.25909199964"
2085,66391001,Rotatif,CB,0.9,0.75,2014-08-09 16:30:32,2014-08-09 17:15:32,121 BOULEVARD MURAT,16,MIX,16Z,2.4,"48.8373519997, 2.25909199964"
2086,66391001,Rotatif,Paris Carte,0.3,0.25,2014-01-09 18:40:25,2014-01-09 18:55:25,121 BOULEVARD MURAT,16,MIX,16Z,2.4,"48.8373519997, 2.25909199964"


In [8]:
#df_sample = df
df_sample = df.sample(n=100000)

In [9]:
df_sample.head()

Unnamed: 0,parkmeter_id,user_type,payment_method,amount,duration_hours,parking_start,parking_end,parkmeter_address,arrondissement,system,residential_area,hourly_rate,parkmeter_coordinates
18964205,60230101,Résident,CB,3.25,50.0,2014-01-12 13:01:08,2014-08-12 14:01:08,14 RUE CUVIER,5,MIX,5G,4.0,"48.8443614501, 2.3560184896"
8081839,98840106,Rotatif,CB,2.4,2.0,2014-03-07 15:32:24,2014-03-07 17:32:24,12 RUE DE LA VISTULE,13,MIX,13M,2.4,"48.8233319998, 2.36027900021"
19050724,46930402,Résident,Paris Carte,0.65,10.0,2014-11-21 10:56:36,2014-11-24 10:56:36,58 AVENUE D IENA,16,MIX,16G,2.4,"48.8683649998, 2.29585999935"
7762746,95100204,Rotatif,CB,4.8,2.0,2014-06-18 23:39:30,2014-06-19 13:00:00,24 RUE DE TURIN,8,MIX,8L,4.0,"48.881327, 2.32470700024"
6194767,70760301,Résident,Paris Carte,2.1,32.31,2014-02-16 12:33:20,2014-02-21 20:00:00,23 RUE PASCAL,5,MIX,5E,4.0,"48.8373930003, 2.34920200027"


# Revenue pre case 

In [19]:
df_sample['revenue'] = df_sample['duration_hours'] * df_sample['hourly_rate']
old = df_sample['revenue'].sum()

# Revenue post case 

In [11]:
df_sample = df_sample.merge(df_groups, on = ['residential_area'])
df_sample.head()

Unnamed: 0,parkmeter_id,user_type,payment_method,amount,duration_hours,parking_start,parking_end,parkmeter_address,arrondissement,system,residential_area,hourly_rate,parkmeter_coordinates,revenue,demand_category,cluster
0,60230101,Résident,CB,3.25,50.0,2014-01-12 13:01:08,2014-08-12 14:01:08,14 RUE CUVIER,5,MIX,5G,4.0,"48.8443614501, 2.3560184896",200.0,medium high,1
1,17540101,Rotatif,Paris Carte,2.7,0.75,2014-03-11 15:36:05,2014-03-11 16:21:05,3 RUE DES CHANTIERS,5,MIX,5G,4.0,"48.8489310002, 2.35484800059",3.0,medium high,1
2,73010204,Résident,CB,3.25,50.0,2014-02-25 19:56:28,2014-04-03 20:56:28,12 RUE PESTALOZZI,5,MIX,5G,4.0,"48.8417800001, 2.3509930006",200.0,medium high,1
3,52200604,Rotatif,Paris Carte,3.6,1.0,2014-05-15 14:07:00,2014-05-15 15:07:00,36 RUE LACEPEDE,5,MIX,5G,4.0,"48.8441389997, 2.35105999971",4.0,medium high,1
4,20940402,Rotatif,Paris Carte,0.9,0.25,2014-05-16 16:55:29,2014-05-16 17:10:29,48 RUE DE LA CLEF,5,MIX,5G,4.0,"48.8427430001, 2.35279300051",1.0,medium high,1


In [12]:
baselines = dict()
baselines['low'] = 3
baselines['medium low'] = 3.5
baselines['medium high'] = 4
baselines['high'] = 4.5 

In [13]:
discount = dict()
discount[1] = 0
discount[3] = 0.1
discount[4] = 0.15
discount[2] = 0.20 

In [14]:
test = df_sample[['duration_hours', 'demand_category','cluster']].copy()
test.loc[1]

duration_hours            0.75
demand_category    medium high
cluster                      1
Name: 1, dtype: object

In [15]:
def new_revenue(transaction) :
    duration = transaction[0]
    demand = transaction[1]
    cluster = transaction[2]
    n = ceil(duration)
    rate = (1-discount[cluster]**n)/(1-discount[cluster])
    return baselines[demand] * duration * rate

In [16]:
test['new_revenue'] = test.apply(lambda x: new_revenue(x), axis=1)

In [18]:
new = test['new_revenue'].sum()

# Impact 

In [22]:
(new*0.9 - old) / old 

3221570.702134208