## Time series estimation - Contemporary, daily at system level

% TODO: Check if R-squared are correctly computed

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import plotly.plotly
import plotly.tools as tls
import plotly.graph_objs as go

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
%matplotlib inline
import numpy as np
import datetime as dt
import time

In [None]:
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
info_dict = {'N': lambda x: x.nobs,
             'R2_adj': lambda x: x.rsquared_adj,
             'AIC': lambda x: x.aic,
             'F': lambda x: x.fvalue, 
             'P_F': lambda x: x.f_pvalue, 
             'DW': lambda x: sm.stats.stattools.durbin_watson(x.resid)}

In [None]:
from Utils import TransantiagoConstants

In [None]:
DTPMDir = TransantiagoConstants.DTPMDir
DTPM_TRXDir = TransantiagoConstants.DTPM_TRXDir

In [None]:
daily_input_path = os.path.join(DTPM_TRXDir,'3_DAILY/daily_summary.csv')
daily_trx = pd.read_csv(daily_input_path,sep=';',encoding='latin-1', index_col=0)

* Creating new dependent and independent variables

In [None]:
daily_trx.loc[:,'TOTAL_trx'] = daily_trx.loc[:,'pn_SUM_TRX_no_t'] + daily_trx.loc[:,'pn_SUM_TRX_3t'] + daily_trx.loc[:,'pn_SUM_TRX_tm'] + daily_trx.loc[:,'zp_SUM_TRX']

In [None]:
independent_variables_path = os.path.join(DTPM_TRXDir,'0_INDEPENDENTS/independents_variables.csv')
independent_variables = pd.read_csv(independent_variables_path,sep=';',encoding='latin-1', index_col=0, parse_dates=[1])

In [None]:
independent_variables.loc[:,'Verano'] =  independent_variables.loc[:,'Enero'] + independent_variables.loc[:,'Febrero']
independent_variables.loc[:,'Nov_Dic_2017'] = independent_variables.loc[:,'Nov_2017'] + independent_variables.loc[:,'Dic_2017']
independent_variables.loc[:,'WEEK_OF_YEAR'] = independent_variables.loc[:,'DATE'].apply(lambda x: x.week)
independent_variables = pd.get_dummies(independent_variables, columns=['WEEK_OF_YEAR'])

In [None]:
complete_db = daily_trx.merge(independent_variables, on =['YEAR','MONTH','YEAR_DAY'], how='left')

In [None]:
complete_db.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True,True,True], inplace=True)

* Descriptives: General

In [None]:
descriptives = pd.DataFrame()

In [None]:
descriptives = complete_db.loc[:,'TOTAL_trx'].describe().to_frame('total_trx')
descriptives = pd.concat([descriptives, complete_db.loc[:,'pn_SUM_TRX_no_t'].describe().to_frame()], axis=1, join='inner')
descriptives = pd.concat([descriptives, complete_db.loc[:,'pn_SUM_TRX_3t'].describe().to_frame()], axis=1, join='inner')
descriptives = pd.concat([descriptives, complete_db.loc[:,'pn_SUM_TRX_tm'].describe().to_frame()], axis=1, join='inner')
descriptives = pd.concat([descriptives, complete_db.loc[:,'zp_SUM_TRX'].describe().to_frame()], axis=1, join='inner')

In [None]:
#descriptives

In [None]:
#pd.set_option('display.float_format', '{:.3e}'.format)

In [None]:
#print(descriptives.to_latex())

* Descriptives: Only working days during normal period

In [None]:
working_complete_db = complete_db.loc[((complete_db['SATURDAY']==0)
                                    &(complete_db['SUNDAY']==0)
                                    &(complete_db['Feriado_laboral']==0)
                                    &(complete_db['WEEK_OF_YEAR_52']==0)
                                    &(complete_db['WEEK_OF_YEAR_53']==0)
                                    &(complete_db['WEEK_OF_YEAR_1']==0)
                                    &(complete_db['WEEK_OF_YEAR_2']==0)
                                    &(complete_db['WEEK_OF_YEAR_3']==0)
                                    &(complete_db['WEEK_OF_YEAR_4']==0)
                                    &(complete_db['WEEK_OF_YEAR_5']==0)
                                    &(complete_db['WEEK_OF_YEAR_6']==0)
                                    &(complete_db['WEEK_OF_YEAR_7']==0)
                                    &(complete_db['WEEK_OF_YEAR_8']==0)
                                    &(complete_db['WEEK_OF_YEAR_9']==0)),:]

In [None]:
descriptives_working = pd.DataFrame()

In [None]:
descriptives_working = working_complete_db.loc[:,'TOTAL_trx'].describe().to_frame('total_trx')

descriptives_working = pd.concat([descriptives_working, working_complete_db.loc[:,'pn_SUM_TRX_no_t'].describe().to_frame()], axis=1, join='inner')

descriptives_working = pd.concat([descriptives_working, working_complete_db.loc[:,'pn_SUM_TRX_3t'].describe().to_frame()], axis=1, join='inner')

descriptives_working = pd.concat([descriptives_working, working_complete_db.loc[:,'pn_SUM_TRX_tm'].describe().to_frame()], axis=1, join='inner')

descriptives_working = pd.concat([descriptives_working, working_complete_db.loc[:,'zp_SUM_TRX'].describe().to_frame()], axis=1, join='inner')

In [None]:
#descriptives_working

In [None]:
#print(descriptives_week.to_latex())

* Plotting by year

In [None]:
grouped_by_year = complete_db.groupby(['YEAR'])['TOTAL_trx','pn_SUM_TRX_no_t','pn_SUM_TRX_3t','pn_SUM_TRX_tm','zp_SUM_TRX'].sum()

In [None]:
grouped_by_year

In [None]:
colors = [(76, 181, 245),(183, 184, 182),(52, 103, 92),(179, 193, 0)]
for i in range(len(colors)):
    r, g, b = colors[i]  
    colors[i] = (r / 255., g / 255., b / 255.)

In [None]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x*1e-6)

In [None]:
year_values = grouped_by_year.index.values
no_turnstile_values = list(map(int, grouped_by_year['pn_SUM_TRX_no_t'].values))
three_turnstile_values = list(map(int,grouped_by_year['pn_SUM_TRX_3t'].values))
butterfly_turnstile_values = list(map(int,grouped_by_year['pn_SUM_TRX_tm'].values))
zp_values = list(map(int,grouped_by_year['zp_SUM_TRX'].values))
    
formatter = FuncFormatter(millions)
    
labels = ["ZP", "S/T", "C/T3", "C/TM"]
fig, ax = plt.subplots()

p1 = plt.bar(year_values, zp_values, color = colors[0], label='ZP')
p2 = plt.bar(year_values, no_turnstile_values, bottom=zp_values, color = colors[1], label='S/T')
p3 = plt.bar(year_values, three_turnstile_values, bottom= [sum(x) for x in zip(zp_values, no_turnstile_values)], color = colors[2],label='C/T3')
p4 = plt.bar(year_values, butterfly_turnstile_values, bottom= [sum(x) for x in zip(zp_values, no_turnstile_values,three_turnstile_values)], color = colors[3], label='C/TM')

ax.legend()
ax.set_title('Transacciones por año, 2015 a 2017')
ax.set_ylabel('Transacciones')
ax.yaxis.set_major_formatter(formatter)
ax.set_xlabel('Año')
#fig.autofmt_xdate()
plt.xticks(np.arange(min(year_values), max(year_values)+1, 1.0))
plt.tight_layout()
#plt.savefig('C:/Users/leoca_000/Desktop/Evasion/03_report/02_Figs/6_trxOverTimeByYear_all.pdf')
        
#plt.show()

* Choose of max_lags based on number of observations

In [None]:
import math

In [None]:
g_1 = math.floor(4*math.pow((1096/100),(2/9)))
#g_2 = math.floor(math.pow(1096,1/4))

* Defining a function to estimate and summarize prediction

In [None]:
def estimateWithStatsModels(Y,X,g,name):
    X = sm.add_constant(X)
    m = sm.OLS(Y, X)
    results = m.fit().get_robustcov_results(cov_type='HAC',maxlags=g)
    ypred = results.predict(X)
    complete_db.loc[:,'ypred_' + name] = ypred
    
    return results

### M1_1

* Complete model

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Elecciones',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_1 = estimateWithStatsModels(Y,X,g_1,'m1_1')
print(results_m1_1.summary())

### M1_2

* Elecciones is omitted.

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_2 = estimateWithStatsModels(Y,X,g_1,'m1_2')
print(results_m1_2.summary())

### M1_3

* Metro Hora Punta is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_3 = estimateWithStatsModels(Y,X,g_1,'m1_3')
print(results_m1_3.summary())

### M1_4

* Nov_2017 is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_4 = estimateWithStatsModels(Y,X,g_1,'m1_4')
print(results_m1_4.summary())

### M1_5

* Dic_2017 is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_5 = estimateWithStatsModels(Y,X,g_1,'m1_5')
print(results_m1_5.summary())

### M1_6

* Incidente_Metro is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_6 = estimateWithStatsModels(Y,X,g_1,'m1_6')
print(results_m1_6.summary())

### M1_7

* Corte_Metro is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_7 = estimateWithStatsModels(Y,X,g_1,'m1_7')
print(results_m1_7.summary())

### M1_8

* Bucle is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Retraso_Metro',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_8 = estimateWithStatsModels(Y,X,g_1,'m1_8')
print(results_m1_8.summary())

### <font color='green'> M1_9 </font>

* Retraso_Metro is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_9 = estimateWithStatsModels(Y,X,g_1,'m1_9')
print(results_m1_9.summary())

### <font color='green'>M1_10</font>

* kms_metro is omitted

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_tm']]

results_m1_10 = estimateWithStatsModels(Y,X,g_1,'m1_10')
print(results_m1_10.summary())

In [None]:
summary_m1_9_m1_10 = summary_col([results_m1_9,results_m1_10], model_names = ['m1_9','m1_10'], stars=True, info_dict=info_dict)

In [None]:
summary_m1_9_m1_10

In [None]:
working_complete_db.loc[:,'total_exp'] = working_complete_db.loc[:,'pn_SUM_EXP_no_t'] + working_complete_db.loc[:,'pn_SUM_EXP_3t'] + working_complete_db.loc[:,'pn_SUM_EXP_tm']

### M1_11

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_3t',
                       'ratio_tm']]

results_m1_11 = estimateWithStatsModels(Y,X,g_1,'m1_11')
print(results_m1_11.summary())

### M1_12

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_3t']]

results_m1_12 = estimateWithStatsModels(Y,X,g_1,'m1_12')
print(results_m1_12.summary())

--------------------

### M2_1

In [None]:
complete_db.loc[:,'ratio_tm_2'] = complete_db.loc[:,'ratio_tm']*complete_db.loc[:,'ratio_tm']

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Elecciones',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_1 = estimateWithStatsModels(Y,X,g_1,'m2_1')
print(results_m2_1.summary())

### M2_2

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_2 = estimateWithStatsModels(Y,X,g_1,'m2_2')
print(results_m2_2.summary())

### M2_3

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_3 = estimateWithStatsModels(Y,X,g_1,'m2_3')
print(results_m2_3.summary())

### M2_4

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_4 = estimateWithStatsModels(Y,X,g_1,'m2_4')
print(results_m2_4.summary())

### M2_5

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_5 = estimateWithStatsModels(Y,X,g_1,'m2_5')
print(results_m2_5.summary())

### M2_6

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_6 = estimateWithStatsModels(Y,X,g_1,'m2_6')
print(results_m2_6.summary())

### M2_7

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_7 = estimateWithStatsModels(Y,X,g_1,'m2_7')
print(results_m2_7.summary())

### M2_8

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_8 = estimateWithStatsModels(Y,X,g_1,'m2_8')
print(results_m2_8.summary())

### M2_9

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_9 = estimateWithStatsModels(Y,X,g_1,'m2_9')
print(results_m2_9.summary())

### M2_10

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm_2']]

results_m2_10 = estimateWithStatsModels(Y,X,g_1,'m2_10')
print(results_m2_10.summary())

### M2_11

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_tm',
                       'ratio_tm_2']]

results_m2_11 = estimateWithStatsModels(Y,X,g_1,'m2_11')
print(results_m2_11.summary())

### <font color='green'> M2_12 </font>

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_tm_2']]

results_m2_12 = estimateWithStatsModels(Y,X,g_1,'m2_12')
print(results_m2_12.summary())

### Building M3_0 based on M1_9. Storing all values in dummies_summary

In [None]:
max_ratio = math.ceil(complete_db.loc[complete_db['ratio_tm']==complete_db['ratio_tm'].max(),'ratio_tm'].item())

In [None]:
dummies_models = []
dummies_names = []

In [None]:
for i in range(0,max_ratio+1):
    
    complete_db.loc[:,'rtm_i'] = np.where((i<=complete_db.loc[:,'ratio_tm']),1,0)
    complete_db.loc[:,'r_tm_x_rtm_i'] = complete_db.loc[:,'ratio_tm'] * complete_db.loc[:,'rtm_i']
    
    Y = complete_db.loc[:,'TOTAL_trx']
    X = complete_db.loc[:,['SATURDAY',
                           'SUNDAY',
                           'kms_ofertados',
                           'WEEK_OF_YEAR_52',
                           'WEEK_OF_YEAR_53',
                           'WEEK_OF_YEAR_1',
                           'WEEK_OF_YEAR_2',
                           'WEEK_OF_YEAR_3',
                           'WEEK_OF_YEAR_4',
                           'WEEK_OF_YEAR_5',
                           'WEEK_OF_YEAR_6',
                           'WEEK_OF_YEAR_7',
                           'WEEK_OF_YEAR_8',
                           'WEEK_OF_YEAR_9',
                           'Julio',
                           't',
                           'Feriado_laboral',
                           'Feriado_no_laboral',
                           'Censo',
                           'Partido',
                           'FDS_Largo',
                           'Disturbios',
                           'Clima',
                           'visperas_laborales',
                           'kms_metro',
                           'N_ZPs',
                           'ratio_tm',
                           'r_tm_x_rtm_i']]
    

    results_m3_0_i = estimateWithStatsModels(Y,X,g_1,'m3_0_'+str(i))
    dummies_names.append('m3_0_'+str(i))
    dummies_models.append(results_m3_0_i)

In [None]:
dummies_summary = summary_col(dummies_models, model_names = dummies_names, stars=True, info_dict=info_dict)

### Building M3_1 based on M3_0

In [None]:
complete_db.loc[:,'rtm_18'] = np.where((18<=complete_db.loc[:,'ratio_tm']),1,0)
complete_db.loc[:,'r_tm_x_rtm_18'] = complete_db.loc[:,'ratio_tm'] * complete_db.loc[:,'rtm_18']

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Elecciones',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_1 = estimateWithStatsModels(Y,X,g_1,'m3_1')
print(results_m3_1.summary())

### M3_2

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_2 = estimateWithStatsModels(Y,X,g_1,'m3_2')
print(results_m3_2.summary())

### M3_3

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_3 = estimateWithStatsModels(Y,X,g_1,'m3_3')
print(results_m3_3.summary())

### M3_4

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_4 = estimateWithStatsModels(Y,X,g_1,'m3_4')
print(results_m3_4.summary())

### M3_5

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_5 = estimateWithStatsModels(Y,X,g_1,'m3_5')
print(results_m3_5.summary())

### M3_6

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_6 = estimateWithStatsModels(Y,X,g_1,'m3_6')
print(results_m3_6.summary())

### M3_7

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_7 = estimateWithStatsModels(Y,X,g_1,'m3_7')
print(results_m3_7.summary())

### M3_8

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_8 = estimateWithStatsModels(Y,X,g_1,'m3_8')
print(results_m3_8.summary())

### M3_9

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_9 = estimateWithStatsModels(Y,X,g_1,'m3_9')
print(results_m3_9.summary())

### M3_10

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'r_tm_x_rtm_18']]

results_m3_10 = estimateWithStatsModels(Y,X,g_1,'m3_10')
print(results_m3_10.summary())

### <font color='green'>M3_11</font>

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'r_tm_x_rtm_18']]

results_m3_11 = estimateWithStatsModels(Y,X,g_1,'m3_11')
print(results_m3_11.summary())

### M3_12

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'ratio_tm',
                       'r_tm_x_rtm_18']]

results_m3_12 = estimateWithStatsModels(Y,X,g_1,'m3_12')
print(results_m3_12.summary())

### M4_1

In [None]:
complete_db.loc[:,'log(r_tm + 1)'] = complete_db['ratio_tm'].apply(lambda x: math.log1p(x))

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Elecciones',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_1 = estimateWithStatsModels(Y,X,g_1,'m4_1')
print(results_m4_1.summary())

### M4_2

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_2 = estimateWithStatsModels(Y,X,g_1,'m4_2')
print(results_m4_2.summary())

### M4_3

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_3 = estimateWithStatsModels(Y,X,g_1,'m4_3')
print(results_m4_3.summary())

### M4_4

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_4 = estimateWithStatsModels(Y,X,g_1,'m4_4')
print(results_m4_4.summary())

### M4_5

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_5 = estimateWithStatsModels(Y,X,g_1,'m4_5')
print(results_m4_5.summary())

### M4_6

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'log(r_tm + 1)']]

results_m4_6 = estimateWithStatsModels(Y,X,g_1,'m4_6')
print(results_m4_6.summary())

----------------

### M5_1 based on M1_9

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_1 = estimateWithStatsModels(Y,X,g_1,'m5_1')
print(results_m5_1.summary())

### M5_2 based on M1_10

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_2 = estimateWithStatsModels(Y,X,g_1,'m5_2')
print(results_m5_2.summary())

### M5_3

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_3 = estimateWithStatsModels(Y,X,g_1,'m5_3')
print(results_m5_3.summary())

### M5_4

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_4 = estimateWithStatsModels(Y,X,g_1,'m5_4')
print(results_m5_4.summary())

### M5_5

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_5 = estimateWithStatsModels(Y,X,g_1,'m5_5')
print(results_m5_5.summary())

### M5_6

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_6 = estimateWithStatsModels(Y,X,g_1,'m5_6')
print(results_m5_6.summary())

### M5_7

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_7 = estimateWithStatsModels(Y,X,g_1,'m5_7')
print(results_m5_7.summary())

### M5_8

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_8 = estimateWithStatsModels(Y,X,g_1,'m5_8')
print(results_m5_8.summary())

### <font color='green'>M5_9</font>

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Censo',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_9 = estimateWithStatsModels(Y,X,g_1,'m5_9')
print(results_m5_9.summary())

### M5_10

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['t',
                       'Clima',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_10 = estimateWithStatsModels(Y,X,g_1,'m5_10')
print(results_m5_10.summary())

### M5_11

In [None]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['Clima',
                       'N_ZPs',
                       'pn_SUM_EXP_no_t',
                       'pn_SUM_EXP_3t',
                       'pn_SUM_EXP_tm']]

results_m5_11 = estimateWithStatsModels(Y,X,g_1,'m5_11')
print(results_m5_11.summary())

* Summarizing every model tested

In [None]:
final_summary = summary_col([],stars=True, info_dict=info_dict) #BE AWARE: THE FIRST ARGUMENT LIST SHOULD BE FILLED.

* Plotting

Interactive, Daily

In [None]:
def interactiveDailyPlotting(days_values, yact, ypred, model_name):
    trace1 = go.Scatter(x=days_values, y=yact, name='Actual', marker=dict(color='rgb(76, 181, 245)'),opacity=1)
    trace2 = go.Scatter(x=days_values, y=ypred, name='Predict', marker=dict(color='rgb(183, 184, 182)'),opacity=1)
    data = [trace1, trace2]
    layout = go.Layout(title='Transacciones por día (actual vs. predict.), ' + model_name,  yaxis=dict(title='Transacciones'))

    fig = go.Figure(data=data, layout=layout)
    plotly.offline.plot(fig, filename='act_vs_predict_day_' + model_name + '.html')

In [None]:
days_values = complete_db.loc[:,'DATE']
yact = complete_db.loc[:,'TOTAL_trx']

In [None]:
#interactiveDailyPlotting(days_values, yact, ypred_m1, 'M1')
#interactiveDailyPlotting(days_values, yact, ypred_m2, 'M2')
#interactiveDailyPlotting(days_values, yact, ypred_m3, 'M3')
#interactiveDailyPlotting(days_values, yact, ypred_m4, 'M4')
#interactiveDailyPlotting(days_values, yact, ypred_m5, 'M5')
#interactiveDailyPlotting(days_values, yact, ypred_m6, 'M6')
#interactiveDailyPlotting(days_values, yact, ypred_m7, 'M7')
#interactiveDailyPlotting(days_values, yact, ypred_m8, 'M8')
#interactiveDailyPlotting(days_values, yact, ypred_m9, 'M9')
interactiveDailyPlotting(days_values, yact, ypred_m10, 'M10')

Interactive, Monthly

In [None]:
def interactiveMonthlyPlotting(month_values, yact, ypred, model_name):
    trace1 = go.Scatter(x=month_values, y=yact, name='Actual', marker=dict(color='rgb(76, 181, 245)'),opacity=1)
    trace2 = go.Scatter(x=month_values, y=ypred, name='Predict', marker=dict(color='rgb(183, 184, 182)'),opacity=1)
    data = [trace1, trace2]
    layout = go.Layout(title='Transacciones por mes (actual vs. predict.), ' + model_name,  yaxis=dict(title='Transacciones'))

    fig = go.Figure(data=data, layout=layout)
    plotly.offline.plot(fig, filename='act_vs_predict_month_' + model_name + '.html')

In [None]:
grouped_by_month = complete_db.groupby(pd.Grouper(key='DATE', freq='M'))
months_values = list(grouped_by_month.groups.keys())
yact = grouped_by_month['TOTAL_trx'].sum()
ypred_m1 = grouped_by_month['ypred_m1'].sum()
ypred_m2 = grouped_by_month['ypred_m2'].sum()
ypred_m3 = grouped_by_month['ypred_m3'].sum()
ypred_m4 = grouped_by_month['ypred_m4'].sum()
ypred_m5 = grouped_by_month['ypred_m5'].sum()
ypred_m6 = grouped_by_month['ypred_m6'].sum()
ypred_m7 = grouped_by_month['ypred_m7'].sum()
ypred_m8 = grouped_by_month['ypred_m8'].sum()
ypred_m9 = grouped_by_month['ypred_m9'].sum()
ypred_m10 = grouped_by_month['ypred_m10'].sum()

In [None]:
#interactiveMonthlyPlotting(months_values,yact,ypred_m1, 'M1')
#interactiveMonthlyPlotting(months_values,yact,ypred_m2, 'M2')
#interactiveMonthlyPlotting(months_values,yact,ypred_m3, 'M3')
#interactiveMonthlyPlotting(months_values,yact,ypred_m4, 'M4')
#interactiveMonthlyPlotting(months_values,yact,ypred_m5, 'M5')
#interactiveMonthlyPlotting(months_values,yact,ypred_m6, 'M6')
#interactiveMonthlyPlotting(months_values,yact,ypred_m7, 'M7')
#interactiveMonthlyPlotting(months_values,yact,ypred_m8, 'M8')
#interactiveMonthlyPlotting(months_values,yact,ypred_m9, 'M9')
interactiveMonthlyPlotting(months_values,yact,ypred_m10, 'M10')

Static, Monthly

In [None]:
colors = [(76, 181, 245),(183, 184, 182),(52, 103, 92),(179, 193, 0)]
for i in range(len(colors)):
    r, g, b = colors[i]  
    colors[i] = (r / 255., g / 255., b / 255.)

In [None]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x*1e-6)

In [None]:
def staticMonthlyPlotting(month_values, yact, ypred, model_name):
    formatter = FuncFormatter(millions)
    fig, ax = plt.subplots()
    ax.plot(months_values, yact,'ro-' ,color = colors[0], label = 'Actual')
    ax.plot(months_values, ypred,'d-', color = colors[1], label = 'Predict')
    formatter = FuncFormatter(millions)
    ax.yaxis.set_major_formatter(formatter)
    ax.set_title('Transacciones - Actual vs. Predict., ' + model_name)
    ax.set_ylabel('Transacciones')
    ax.set_xlabel('Mes del año')
    fig.autofmt_xdate()
    ax.legend(loc=3)

    plt.savefig('C:/Users/leoca_000/Desktop/Evasion/03_report/02_Figs/act_vs_predict_month_' + model_name + '.pdf')

In [None]:
#staticMonthlyPlotting(months_values,yact,ypred_m1, 'M1')
#staticMonthlyPlotting(months_values,yact,ypred_m2, 'M2')
#staticMonthlyPlotting(months_values,yact,ypred_m3, 'M3')
#staticMonthlyPlotting(months_values,yact,ypred_m4, 'M4')
#staticMonthlyPlotting(months_values,yact,ypred_m5, 'M5')
#staticMonthlyPlotting(months_values,yact,ypred_m6, 'M6')
#staticMonthlyPlotting(months_values,yact,ypred_m7, 'M7')
#staticMonthlyPlotting(months_values,yact,ypred_m8, 'M8')
#staticMonthlyPlotting(months_values,yact,ypred_m9, 'M9')
staticMonthlyPlotting(months_values,yact,ypred_m10, 'M10')

### Printing everything to file

In [None]:
complete_ddbb_path = os.path.join(DTPM_TRXDir,'5_RESULTS/1_SYSTEM/0_original/contemporary_complete_db.csv')
complete_db.to_csv(complete_ddbb_path,sep=';',encoding='latin-1')

OLS_Summary_path = os.path.join(DTPM_TRXDir,'5_RESULTS/1_SYSTEM/0_original/contemporary_OLS_Summary.csv')
OLS_Summary.to_csv(OLS_Summary_path, sep=';',encoding='latin-1')

## Closed