# More analysis of contemporary system-models

Includes:
1. Analysis of with-turnstile-percentage threshold evolution
2. Analysis of variables-significance

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import plotly.plotly
import plotly.tools as tls
import plotly.graph_objs as go

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
import numpy as np
import datetime as dt
import time
import math

In [3]:
import statsmodels.api as sm


The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.



In [4]:
from Utils import TransantiagoConstants

In [5]:
DTPMDir = TransantiagoConstants.DTPMDir
DTPM_TRXDir = TransantiagoConstants.DTPM_TRXDir

In [6]:
daily_input_path = os.path.join(DTPM_TRXDir,'3_DAILY/daily_summary.csv')
daily_trx = pd.read_csv(daily_input_path,sep=';',encoding='latin-1', index_col=0)

* DAILY AT SYSTEM LEVEL. CREATING NEW DEPENDENT AND INDEPENDENT VARIABLES

In [7]:
daily_trx.loc[:,'TOTAL_trx'] = daily_trx.loc[:,'pn_SUM_TRX_no_t'] + daily_trx.loc[:,'pn_SUM_TRX_3t'] + daily_trx.loc[:,'pn_SUM_TRX_tm'] + daily_trx.loc[:,'zp_SUM_TRX']

In [8]:
independent_variables_path = os.path.join(DTPM_TRXDir,'0_INDEPENDENTS/independents_variables.csv')
independent_variables = pd.read_csv(independent_variables_path,sep=';',encoding='latin-1', index_col=0, parse_dates=[1])

In [9]:
independent_variables.loc[:,'Verano'] =  independent_variables.loc[:,'Enero'] + independent_variables.loc[:,'Febrero']
independent_variables.loc[:,'Nov_Dic_2017'] = independent_variables.loc[:,'Nov_2017'] + independent_variables.loc[:,'Dic_2017']
independent_variables.loc[:,'WEEK_OF_YEAR'] = independent_variables.loc[:,'DATE'].apply(lambda x: x.week)
independent_variables = pd.get_dummies(independent_variables, columns=['WEEK_OF_YEAR'])

In [10]:
complete_db = daily_trx.merge(independent_variables, on =['YEAR','MONTH','YEAR_DAY'], how='left')

In [11]:
complete_db.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True,True,True], inplace=True)

* Dataframe to summarize results for dummy analyses

In [12]:
dummies_summary = pd.DataFrame()

In [13]:
def summarizingResults(x, df, i):
    params = x.params.rename('params_' + str(i)) #x.params is a pandas series. The rename call is to assign a name to the series.
    t = x.tvalues.rename('t_' + str(i)) #x.tvalues is a pandas series
    p = x.pvalues.rename('p_' + str(i)) #x.pvalues is a pandas series

    results = pd.concat([params, t, p], axis=1).T    
    df = pd.concat([df, results], axis=0)
    
    return df

## Begin analyses for dummy variables...

* Selected model to analyse evolution of 'r_tm_x_rtm_i' variable is M8

In [14]:
max_ratio = math.ceil(complete_db.loc[complete_db['ratio_tm']==complete_db['ratio_tm'].max(),'ratio_tm'].item())

In [15]:
for i in range(0,max_ratio+1):
    
    complete_db.loc[:,'rtm_i'] = np.where((i<=complete_db.loc[:,'ratio_tm']),1,0)
    complete_db.loc[:,'r_tm_x_rtm_i'] = complete_db.loc[:,'ratio_tm'] * complete_db.loc[:,'rtm_i']
    
    Y = complete_db.loc[:,'TOTAL_trx']
    X = complete_db.loc[:,['SATURDAY',
                           'SUNDAY',
                           'ratio_tm',
                           'r_tm_x_rtm_i',
                           'kms_ofertados',
                           'WEEK_OF_YEAR_52',
                           'WEEK_OF_YEAR_53',
                           'WEEK_OF_YEAR_1',
                           'WEEK_OF_YEAR_2',
                           'WEEK_OF_YEAR_3',
                           'WEEK_OF_YEAR_4',
                           'WEEK_OF_YEAR_5',
                           'WEEK_OF_YEAR_6',
                           'WEEK_OF_YEAR_7',
                           'WEEK_OF_YEAR_8',
                           'WEEK_OF_YEAR_9',
                           'Julio',
                           't',
                           'Feriado_laboral',
                           'Feriado_no_laboral',
                           'FDS_Largo',
                           'Disturbios',
                           'Clima',
                           'Partido',
                           'Censo',
                           'visperas_laborales',
                           'N_ZPs']]
    X = sm.add_constant(X)
    m = sm.OLS(Y, X)
    results = m.fit()
    
    dummies_summary = summarizingResults(results, dummies_summary, i)

In [16]:
#dummies_summary_path = os.path.join(DTPM_TRXDir,'5_RESULTS/1_SYSTEM/0_original/contemporary_dummies_summary.csv')
#dummies_summary.to_csv(dummies_summary_path, sep=';',encoding='latin-1')

--------------

## Begin analyses for variables-significance...

* M5

In [17]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Elecciones',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_1 = sm.OLS(Y, X)
results_m5_1 = m5_1.fit()
print(results_m5_1.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     711.4
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        15:56:51   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.958e+04
Df Residuals:                    1060   BIC:                         2.976e+04
Df Model:                          35                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2284.3987   1375

Elecciones is omitted...

In [19]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Nov_2017',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_2 = sm.OLS(Y, X)
results_m5_2 = m5_2.fit()
print(results_m5_2.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     733.0
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:01:58   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.958e+04
Df Residuals:                    1061   BIC:                         2.975e+04
Df Model:                          34                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2285.2504   1375

Nov_2017 is omitted...

In [20]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       'Dic_2017',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_3 = sm.OLS(Y, X)
results_m5_3 = m5_3.fit()
print(results_m5_3.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     755.9
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:02:52   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.957e+04
Df Residuals:                    1062   BIC:                         2.974e+04
Df Model:                          33                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2469.3598   1039

Dic_2017 is omitted...

In [21]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'Metro Hora Punta',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_4 = sm.OLS(Y, X)
results_m5_4 = m5_4.fit()
print(results_m5_4.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     780.2
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:04:05   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.957e+04
Df Residuals:                    1063   BIC:                         2.974e+04
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2520.4439   1016

Metro Hora Punta is omitted...

In [22]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Incidente_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_5 = sm.OLS(Y, X)
results_m5_5 = m5_5.fit()
print(results_m5_5.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     806.1
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:04:52   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.957e+04
Df Residuals:                    1064   BIC:                         2.973e+04
Df Model:                          31                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2269.9127    240

Incidente_Metro is omitted...

In [23]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Bucle',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_6 = sm.OLS(Y, X)
results_m5_6 = m5_6.fit()
print(results_m5_6.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     833.6
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:05:52   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.957e+04
Df Residuals:                    1065   BIC:                         2.972e+04
Df Model:                          30                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2266.8529    240

Bucle is omitted...

In [24]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Corte_Metro',
                       'Retraso_Metro',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_7 = sm.OLS(Y, X)
results_m5_7 = m5_7.fit()
print(results_m5_7.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     862.8
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:07:14   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.957e+04
Df Residuals:                    1066   BIC:                         2.972e+04
Df Model:                          29                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2269.8924    240

Corte_Metro is omitted...

In [26]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Retraso_Metro',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_8 = sm.OLS(Y, X)
results_m5_8 = m5_8.fit()
print(results_m5_8.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     894.3
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:08:27   Log-Likelihood:                -14753.
No. Observations:                1096   AIC:                         2.956e+04
Df Residuals:                    1067   BIC:                         2.971e+04
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2268.4856    240

Retraso_Metro is omitted...

In [27]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_9 = sm.OLS(Y, X)
results_m5_9 = m5_9.fit()
print(results_m5_9.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     926.4
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:09:25   Log-Likelihood:                -14754.
No. Observations:                1096   AIC:                         2.956e+04
Df Residuals:                    1068   BIC:                         2.970e+04
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2279.3360    240

estaciones_metro is omitted since kms_metro makes more sense...

In [28]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'kms_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_10 = sm.OLS(Y, X)
results_m5_10 = m5_10.fit()
print(results_m5_10.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     926.4
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:10:44   Log-Likelihood:                -14754.
No. Observations:                1096   AIC:                         2.956e+04
Df Residuals:                    1068   BIC:                         2.970e+04
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2.508e+06   2.23

estaciones_metro is re-incorporated and kms_metro is omitted...

In [29]:
Y = complete_db.loc[:,'TOTAL_trx']
X = complete_db.loc[:,['SATURDAY',
                       'SUNDAY',
                       'kms_ofertados',
                       'WEEK_OF_YEAR_52',
                       'WEEK_OF_YEAR_53',
                       'WEEK_OF_YEAR_1',
                       'WEEK_OF_YEAR_2',
                       'WEEK_OF_YEAR_3',
                       'WEEK_OF_YEAR_4',
                       'WEEK_OF_YEAR_5',
                       'WEEK_OF_YEAR_6',
                       'WEEK_OF_YEAR_7',
                       'WEEK_OF_YEAR_8',
                       'WEEK_OF_YEAR_9',
                       'Julio',
                       't',
                       'Feriado_laboral',
                       'Feriado_no_laboral',
                       'Censo',
                       'Partido',
                       'FDS_Largo',
                       'Disturbios',
                       'Clima',
                       'visperas_laborales',
                       'estaciones_metro',
                       'N_ZPs',
                       'ratio_tm']]

X = sm.add_constant(X)
m5_11 = sm.OLS(Y, X)
results_m5_11 = m5_11.fit()
print(results_m5_11.summary())

                            OLS Regression Results                            
Dep. Variable:              TOTAL_trx   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     926.4
Date:                Mon, 26 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:19:51   Log-Likelihood:                -14754.
No. Observations:                1096   AIC:                         2.956e+04
Df Residuals:                    1068   BIC:                         2.970e+04
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               2.299e+06   3.35

## Closed