## Time series estimation

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
import time

In [3]:
import statsmodels.api as sm

  from pandas.core import datetools


In [4]:
from Utils import TransantiagoConstants

In [5]:
DTPMDir = TransantiagoConstants.DTPMDir
DTPM_TRXDir = TransantiagoConstants.DTPM_TRXDir

In [6]:
independent_variables_path = os.path.join(DTPM_TRXDir,'0_INDEPENDENTS/independents_variables.csv')
independent_variables = pd.read_csv(independent_variables_path,sep=';',encoding='latin-1', index_col=0)

In [7]:
independent_variables.sort_values(by=['YEAR','MONTH','YEAR_DAY'],inplace=True)

### DAILY AT SYSTEM LEVEL. CREATING NEW INDEPENDENT VARIABLES

In [8]:
daily_input_path = os.path.join(DTPM_TRXDir,'3_DAILY/daily_summary.csv')
daily_trx = pd.read_csv(daily_input_path,sep=';',encoding='latin-1', index_col=0)

In [9]:
daily_trx.sort_values(by=['YEAR','MONTH','YEAR_DAY'],inplace=True)

In [10]:
independent_variables.loc[:,'Verano'] =  independent_variables.loc[:,'Enero'] + independent_variables.loc[:,'Febrero']

In [11]:
independent_variables.loc[:,'Nov_Dic_2017'] = independent_variables.loc[:,'Nov_2017'] + independent_variables.loc[:,'Dic_2017']

In [12]:
independent_variables = independent_variables.merge(daily_trx.loc[:,['YEAR','MONTH','YEAR_DAY','ratio_tm']], on=['YEAR','MONTH','YEAR_DAY'], how='left')

In [13]:
total_trx = daily_trx.loc[:,'pn_SUM_TRX_no_t'] + daily_trx.loc[:,'pn_SUM_TRX_3t'] + daily_trx.loc[:,'pn_SUM_TRX_tm'] + daily_trx.loc[:,'zp_SUM_TRX']

#### ESTIMATION. 1ST MODEL

Estimado por OLS.<br>
<strong>Y:</strong><br>
'total de transacciones'<br>
<strong>X:</strong><br>
'SATURDAY',<br>
'SUNDAY',<br>
'ratio',<br>
'kms_ofertados',<br>
'Verano',<br>
'Julio',<br>
'Nov_Dic_2017',<br>
't',<br>
'Feriado_laboral',<br>
'Feriado_no_laboral',<br>
'Censo_Elecciones',<br>
'Partido',<br>
'FDS_Largo',<br>
'Disturbios',<br>
'Bucle',<br>
'Clima',<br>
'visperas_laborales'

In [14]:
Y = total_trx
X = independent_variables.loc[:,['SATURDAY', 'SUNDAY', 'ratio_tm', 'kms_ofertados', 'Verano', 'Julio', 'Nov_Dic_2017',
                                                't', 'Feriado_laboral', 'Feriado_no_laboral', 'Censo_Elecciones', 'Partido',
                                                'FDS_Largo', 'Disturbios','Bucle', 'Clima', 'visperas_laborales']]

In [15]:
X = sm.add_constant(X)

In [16]:
model = sm.OLS(Y, X)

In [17]:
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.952
Model:                            OLS   Adj. R-squared:                  0.951
Method:                 Least Squares   F-statistic:                     1255.
Date:                Mon, 05 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:18:06   Log-Likelihood:                -14842.
No. Observations:                1096   AIC:                         2.972e+04
Df Residuals:                    1078   BIC:                         2.981e+04
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               3.049e+06   3.33

----------------

#### ESTIMATION. 2ND MODEL

Estimado por OLS.<br>
<strong>Y:</strong><br>
'total de transacciones'<br>
<strong>X:</strong><br>
'SATURDAY',<br>
'SUNDAY',<br>
'ratio',<br>
'kms_ofertados',<br>
'Verano',<br>
'Julio',<br>
'Nov_Dic_2017',<br>
't',<br>
'Feriado_laboral',<br>
'Feriado_no_laboral',<br>
'Censo_Elecciones',<br>
'Partido',<br>
'FDS_Largo',<br>
'Disturbios',<br>
'Bucle',<br>
'Clima',<br>
'visperas_laborales'<br>
<strong>Omitidas:</strong><br>
'Nov_Dic_2017',<br>
'Bucle'

In [22]:
Y = total_trx
X = independent_variables.loc[:,['SATURDAY','SUNDAY','ratio_tm','kms_ofertados','Verano','Julio','t','Feriado_laboral','Feriado_no_laboral','Censo_Elecciones',
                                 'Partido','FDS_Largo','Disturbios','Clima','visperas_laborales']]

In [23]:
X = sm.add_constant(X)

In [24]:
model = sm.OLS(Y, X)

In [25]:
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.952
Model:                            OLS   Adj. R-squared:                  0.951
Method:                 Least Squares   F-statistic:                     1423.
Date:                Mon, 05 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:35:47   Log-Likelihood:                -14843.
No. Observations:                1096   AIC:                         2.972e+04
Df Residuals:                    1080   BIC:                         2.980e+04
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               3.053e+06   3.31

----------------

#### ESTIMATION. 3RD MODEL

Estimado por OLS.<br>
<strong>Y:</strong><br>
'total de transacciones'<br>
<strong>X:</strong><br>
'SATURDAY',<br>
'SUNDAY',<br>
'ratio',<br>
'kms_ofertados',<br>
'Verano',<br>
'Julio',<br>
'Nov_Dic_2017',<br>
't',<br>
'Feriado_laboral',<br>
'Feriado_no_laboral',<br>
'Partido',<br>
'FDS_Largo',<br>
'Disturbios',<br>
'Bucle',<br>
'Clima',<br>
'visperas_laborales'<br>
<strong>Omitidas:</strong><br>
'Nov_Dic_2017',<br>
'Bucle',<br>
'Censo_Elecciones'

In [26]:
Y = total_trx
X = independent_variables.loc[:,['SATURDAY','SUNDAY','ratio_tm','kms_ofertados','Verano','Julio','t','Feriado_laboral','Feriado_no_laboral',
                                 'Partido','FDS_Largo','Disturbios','Clima','visperas_laborales']]

In [27]:
X = sm.add_constant(X)

In [28]:
model = sm.OLS(Y, X)

In [29]:
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.952
Model:                            OLS   Adj. R-squared:                  0.951
Method:                 Least Squares   F-statistic:                     1521.
Date:                Mon, 05 Mar 2018   Prob (F-statistic):               0.00
Time:                        16:47:43   Log-Likelihood:                -14845.
No. Observations:                1096   AIC:                         2.972e+04
Df Residuals:                    1081   BIC:                         2.979e+04
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               3.053e+06   3.31

## Closed