### Time series processing - By day

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
#%matplotlib inline
%matplotlib qt5
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
import time

In [None]:
import plotly.plotly
import plotly.graph_objs as go

In [3]:
from Utils import TransantiagoConstants

In [4]:
DTPMDir = TransantiagoConstants.DTPMDir
DTPM_TRXDir = TransantiagoConstants.DTPM_TRXDir

In [5]:
from Utils import ReadTurnstilesDataBase
[ana_turnstiles_df, mauricio_turnstiles_df] = ReadTurnstilesDataBase.readTurnstileData()
ana_turnstiles_df = ReadTurnstilesDataBase.processAnaTurnstiles(ana_turnstiles_df)

In [6]:
def saturday(x):
    if(x.weekday()==5):
        return 1
    else:
        return 0

In [7]:
def sunday(x):
    if(x.weekday()==6):
        return 1
    else:
        return 0

##### Creating function to analyze normal-stops by years

In [8]:
def year_pn_analyses_trx(year,ana_turnstiles_df,mauricio_turnstiles_df):
    path = os.path.join(DTPM_TRXDir,'un_ppu_sersen_sumtrx_' + str(year) + '_by_date.csv')
    df = pd.read_csv(path, sep=";", header=None, encoding='latin-1', usecols=[0,1,2,3,4,5], parse_dates=[3])
    df.columns = ['UN','PPU','SER_SEN','DATE','SUM_TRX','COUNT']
    print('Number of observations before merge info. of turnstile is: ' + str(len(df.index)))
    
    df = df.merge(ana_turnstiles_df, left_on = 'PPU', right_on = 'sitio_subida', how='left', suffixes=('','_ana'))
    df = df.merge(mauricio_turnstiles_df, left_on = 'PPU', right_on = 'sitio_subida' , suffixes=('_ana', '_mauricio'), how='left')
    print('Number of observations after merge info. of turnstile is: ' + str(len(df.index)))
    
    torniquetes_mariposa_conditions = (df.loc[:,'fecha_instalacion_ana'].dt.date<df.loc[:,'DATE'].dt.date)
    df.loc[:,'min_fecha'] = pd.concat([df['fecha_instalacion_ana'], df['fecha_instalacion_mauricio']], axis=1).min(axis=1)
    no_torniquetes_conditions = (((df.loc[:,'fecha_instalacion_ana'].isnull()) & (df.loc[:,'fecha_instalacion_mauricio'].isnull())) | (df.loc[:,'DATE'].dt.date<=df['min_fecha'].dt.date))
    df.loc[:,'torniquete_mariposa'] = np.where(torniquetes_mariposa_conditions,1,0)
    df.loc[:,'no_torniquete'] = np.where(no_torniquetes_conditions,1,0)

    new_year_day = dt.date(year=year, month=1, day=1)
    df.loc[:,'YEAR_DAY'] = df.loc[:,'DATE'].apply(lambda x: (x.date() - new_year_day).days + 1)
    df.loc[:,'MONTH'] = df.loc[:,'DATE'].dt.month
    df.loc[:,'YEAR'] = df.loc[:,'DATE'].dt.year
    
    df.loc[:,'SATURDAY'] = df.loc[:,'DATE'].apply(saturday)
    df.loc[:,'SUNDAY'] = df.loc[:,'DATE'].apply(sunday)    

    f = {'SUM_TRX':
         {'pn_SUM_TRX':['sum']},
         'COUNT':
         {'pn_SUM_EXP':['sum']}}
    
    grouped_df = df.groupby(['YEAR','MONTH','YEAR_DAY','SATURDAY','SUNDAY','DATE','torniquete_mariposa','no_torniquete']).agg(f)
    grouped_df.columns = grouped_df.columns.droplevel(1)
    grouped_df.reset_index(inplace=True,level=['torniquete_mariposa','no_torniquete'])
    days = grouped_df.groupby(['YEAR_DAY']).agg({'pn_SUM_EXP': 'sum'})
    grouped_df.loc[:,'ratio'] = grouped_df['pn_SUM_EXP'].div(days['pn_SUM_EXP'], level='YEAR_DAY') * 100
    
    return grouped_df

##### Creating function to analyze zp-stops years

In [9]:
def year_zp_analyses_trx(year):
    path = os.path.join(DTPM_TRXDir,'trxzp_' + str(year))
    df = pd.read_csv(path, sep=";", header=None, encoding='latin-1', parse_dates=[2])
    df.columns = ['UN','RMZP','DATE','TIPODIA','MHORA','PERIODO','TRX_VALIDAS','TARJETAS_NO_VALIDAS','TRX_NO_VALIDAS']
    print('Number of observations is: ' + str(len(df.index)))
    
    new_year_day = dt.date(year=year, month=1, day=1)
    df.loc[:,'YEAR_DAY'] = df.loc[:,'DATE'].apply(lambda x: (x.date() - new_year_day).days + 1)    
    df.loc[:,'MONTH'] = df.loc[:,'DATE'].dt.month
    df.loc[:,'YEAR'] = df.loc[:,'DATE'].dt.year
    
    df.loc[:,'SATURDAY'] = df.loc[:,'DATE'].apply(saturday)
    df.loc[:,'SUNDAY'] = df.loc[:,'DATE'].apply(sunday)   
#    df.loc[:,'WEEK'] = df.loc[:,'DATE'].apply(lambda x: x.date().isocalendar()[1])
    
    f = {'TRX_VALIDAS':
         {'zp_SUM_TRX':['sum']},
        'TRX_NO_VALIDAS':
        {'zp_SUM_TRX_NO_VALIDAS':['sum']}}
    
    grouped_df = df.groupby(['YEAR','MONTH','YEAR_DAY','SATURDAY','SUNDAY','DATE']).agg(f)
    grouped_df.columns = grouped_df.columns.droplevel(1)

    return grouped_df

##### Colors and others settings for plotting

In [10]:
colors = [(76, 181, 245),(183, 184, 182),(52, 103, 92),(179, 193, 0)]
for i in range(len(colors)):
    r, g, b = colors[i]  
    colors[i] = (r / 255., g / 255., b / 255.)

In [11]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x*1e-6)

In [12]:
from matplotlib.ticker import FuncFormatter
formatter = FuncFormatter(millions)

##### Getting trxs in pn and zp in 2015, 2016 and 2017

In [13]:
tic = time.clock()

pn_grouped_2015_df = year_pn_analyses_trx(2015,ana_turnstiles_df,mauricio_turnstiles_df)
zp_grouped_2015_df = year_zp_analyses_trx(2015)

pn_grouped_2015_df.reset_index(inplace=True)
zp_grouped_2015_df.reset_index(inplace=True)

Number of observations before merge info. of turnstile is: 7787251
Number of observations after merge info. of turnstile is: 7787251


  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Number of observations is: 592589


In [14]:
pn_grouped_2016_df = year_pn_analyses_trx(2016,ana_turnstiles_df,mauricio_turnstiles_df)
zp_grouped_2016_df = year_zp_analyses_trx(2016)

pn_grouped_2016_df.reset_index(inplace=True)
zp_grouped_2016_df.reset_index(inplace=True)

Number of observations before merge info. of turnstile is: 8034722
Number of observations after merge info. of turnstile is: 8034722


  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Number of observations is: 591218


In [15]:
pn_grouped_2017_df = year_pn_analyses_trx(2017,ana_turnstiles_df,mauricio_turnstiles_df)
zp_grouped_2017_df = year_zp_analyses_trx(2017)

pn_grouped_2017_df.reset_index(inplace=True)
zp_grouped_2017_df.reset_index(inplace=True)

toc = time.clock()
print(toc-tic)

Number of observations before merge info. of turnstile is: 7963925
Number of observations after merge info. of turnstile is: 7963925


  return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)


Number of observations is: 755171
878.4994776668555


In [16]:
pn_frames = [pn_grouped_2015_df,pn_grouped_2016_df,pn_grouped_2017_df]
zp_frames = [zp_grouped_2015_df,zp_grouped_2016_df,zp_grouped_2017_df]

pn_summary = pd.concat(pn_frames)
zp_summary = pd.concat(zp_frames)

In [17]:
pn_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,ratio
1090,2017,12,364,1,0,2017-12-30,0,1,771911,26799,49.161652
1091,2017,12,364,1,0,2017-12-30,1,0,500790,20279,37.200983
1092,2017,12,365,0,1,2017-12-31,0,0,109242,5860,13.73298
1093,2017,12,365,0,1,2017-12-31,0,1,431542,20227,47.402217
1094,2017,12,365,0,1,2017-12-31,1,0,299808,16584,38.864803


In [18]:
zp_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,zp_SUM_TRX,zp_SUM_TRX_NO_VALIDAS
360,2017,12,361,0,0,2017-12-27,441841,5524.0
361,2017,12,362,0,0,2017-12-28,431401,5649.0
362,2017,12,363,0,0,2017-12-29,380967,5721.0
363,2017,12,364,1,0,2017-12-30,76326,1759.0
364,2017,12,365,0,1,2017-12-31,14158,219.0


In [19]:
len(pn_summary.index)

3167

In [20]:
len(zp_summary.index)

1096

###### Appending fare. Test based on dataset length is <font color='green'> passed </font>

In [21]:
fares_path = os.path.join(DTPMDir,'08_Tarifas/Tarifas_2007_2017.xlsx')
fares_df = pd.read_excel(fares_path) #dates are already parsed

In [22]:
fares_df.loc[:,'YEAR'] = fares_df.loc[:,'Mes'].dt.year
fares_df.loc[:,'MONTH'] = fares_df.loc[:,'Mes'].dt.month

In [23]:
fares_df.head()

Unnamed: 0,Mes,Buses,Metro Hora Punta,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior,YEAR,MONTH
0,2007-02-01,380,440,380,360,130,2007,2
1,2007-03-01,380,440,380,360,130,2007,3
2,2007-04-01,380,440,380,360,130,2007,4
3,2007-05-01,380,440,380,360,130,2007,5
4,2007-06-01,380,440,380,360,130,2007,6


In [24]:
print('Length of pn_summary before 1st-merge is: ' + str(len(pn_summary.index)))
print('Length of zp_summary before 1st-merge is: ' + str(len(zp_summary.index)))

Length of pn_summary before 1st-merge is: 3167
Length of zp_summary before 1st-merge is: 1096


In [25]:
pn_summary = pn_summary.merge(fares_df,how='left',left_on=['YEAR','MONTH'],right_on=['YEAR','MONTH'])
zp_summary = zp_summary.merge(fares_df,how='left',left_on=['YEAR','MONTH'],right_on=['YEAR','MONTH'])

In [26]:
print('Length of pn_summary after 1st-merge is: ' + str(len(pn_summary.index)))
print('Length of zp_summary after 1st-merge is: ' + str(len(zp_summary.index)))

Length of pn_summary after 1st-merge is: 3167
Length of zp_summary after 1st-merge is: 1096


In [27]:
pn_summary.head()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,ratio,Mes,Buses,Metro Hora Punta,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior
0,2015,1,1,0,0,2015-01-01,0,0,116766,6586,17.890905,2015-01-01,640,720,660,610,210
1,2015,1,1,0,0,2015-01-01,0,1,514892,30226,82.109095,2015-01-01,640,720,660,610,210
2,2015,1,2,0,0,2015-01-02,0,0,326616,13361,19.540482,2015-01-01,640,720,660,610,210
3,2015,1,2,0,0,2015-01-02,0,1,1604441,55015,80.459518,2015-01-01,640,720,660,610,210
4,2015,1,3,1,0,2015-01-03,0,0,247882,9551,17.360089,2015-01-01,640,720,660,610,210


##### Appending kms_comerciales validamente ofertados. Test based on dataset length is <font color='green'> passed </font>

In [28]:
kms_path = os.path.join(DTPMDir,'06_LBS/1_consolidados/kms_recorridos.txt')
kms_df = pd.read_table(kms_path,sep=';', encoding='latin-1',index_col = 0) #Dates are not parsed. m_ofertados are not parsed.

In [29]:
kms_df['Fecha'] = kms_df['Fecha'].apply(lambda x: pd.to_datetime(x, format='%d/%m/%Y'))

In [30]:
def year_day_calc(x):
    if(x[1].year==2015):
        new_year_day = dt.date(year=2015, month=1, day=1)
    elif(x[1].year==2016):
        new_year_day = dt.date(year=2016, month=1, day=1)
    else:
        new_year_day = dt.date(year=2017, month=1, day=1)
        
    return ((x[1].date() - new_year_day).days + 1)

In [31]:
kms_df['YEAR_DAY'] = kms_df.apply(year_day_calc, axis=1)    
kms_df['MONTH'] = kms_df['Fecha'].dt.month
kms_df['YEAR'] = kms_df['Fecha'].dt.year

In [32]:
kms_df['m_ofertados'] = kms_df['m_ofertados'].apply(lambda x: x.replace(',','.'))
kms_df['m_ofertados'] = kms_df['m_ofertados'].apply(lambda x: float(x))

In [33]:
kms_df.head()

Unnamed: 0,UN,Fecha,m_ofertados,YEAR_DAY,MONTH,YEAR
0,U1,2015-01-01,70832790.0,1,1,2015
1,U1,2015-01-02,136306900.0,2,1,2015
2,U1,2015-01-03,124941400.0,3,1,2015
3,U1,2015-01-04,198590200.0,4,1,2015
4,U1,2015-01-05,136532000.0,5,1,2015


In [34]:
grouped_kms_df = kms_df.groupby(['YEAR','MONTH','YEAR_DAY'])['m_ofertados'].sum().to_frame().reset_index()

In [35]:
grouped_kms_df.head()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,m_ofertados
0,2015,1,1,689192500.0
1,2015,1,2,1273185000.0
2,2015,1,3,1029196000.0
3,2015,1,4,1680623000.0
4,2015,1,5,1265276000.0


In [36]:
pn_summary = pn_summary.merge(grouped_kms_df,how='left',left_on=['YEAR','MONTH','YEAR_DAY'],right_on=['YEAR','MONTH','YEAR_DAY'])
zp_summary = zp_summary.merge(grouped_kms_df,how='left',left_on=['YEAR','MONTH','YEAR_DAY'],right_on=['YEAR','MONTH','YEAR_DAY'])

In [37]:
print('Length of pn_summary after 2nd-merge is: ' + str(len(pn_summary.index)))
print('Length of zp_summary after 2nd-merge is: ' + str(len(zp_summary.index)))

Length of pn_summary after 2nd-merge is: 3167
Length of zp_summary after 2nd-merge is: 1096


In [38]:
pn_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,ratio,Mes,Buses,Metro Hora Punta,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados
3162,2017,12,364,1,0,2017-12-30,0,1,771911,26799,49.161652,2017-12-01,640,740,660,610,210,1025748000.0
3163,2017,12,364,1,0,2017-12-30,1,0,500790,20279,37.200983,2017-12-01,640,740,660,610,210,1025748000.0
3164,2017,12,365,0,1,2017-12-31,0,0,109242,5860,13.73298,2017-12-01,640,740,660,610,210,796000600.0
3165,2017,12,365,0,1,2017-12-31,0,1,431542,20227,47.402217,2017-12-01,640,740,660,610,210,796000600.0
3166,2017,12,365,0,1,2017-12-31,1,0,299808,16584,38.864803,2017-12-01,640,740,660,610,210,796000600.0


In [39]:
zp_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,zp_SUM_TRX,zp_SUM_TRX_NO_VALIDAS,Mes,Buses,Metro Hora Punta,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados
1091,2017,12,361,0,0,2017-12-27,441841,5524.0,2017-12-01,640,740,660,610,210,1331226000.0
1092,2017,12,362,0,0,2017-12-28,431401,5649.0,2017-12-01,640,740,660,610,210,1310356000.0
1093,2017,12,363,0,0,2017-12-29,380967,5721.0,2017-12-01,640,740,660,610,210,1251596000.0
1094,2017,12,364,1,0,2017-12-30,76326,1759.0,2017-12-01,640,740,660,610,210,1025748000.0
1095,2017,12,365,0,1,2017-12-31,14158,219.0,2017-12-01,640,740,660,610,210,796000600.0


In [40]:
pn_summary['kms_ofertados'] = pn_summary['m_ofertados'].apply(lambda x: x/1000)
zp_summary['kms_ofertaods'] = zp_summary['m_ofertados'].apply(lambda x: x/1000)

In [41]:
pn_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,ratio,Mes,Buses,Metro Hora Punta,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados,kms_ofertados
3162,2017,12,364,1,0,2017-12-30,0,1,771911,26799,49.161652,2017-12-01,640,740,660,610,210,1025748000.0,1025748.0
3163,2017,12,364,1,0,2017-12-30,1,0,500790,20279,37.200983,2017-12-01,640,740,660,610,210,1025748000.0,1025748.0
3164,2017,12,365,0,1,2017-12-31,0,0,109242,5860,13.73298,2017-12-01,640,740,660,610,210,796000600.0,796000.6
3165,2017,12,365,0,1,2017-12-31,0,1,431542,20227,47.402217,2017-12-01,640,740,660,610,210,796000600.0,796000.6
3166,2017,12,365,0,1,2017-12-31,1,0,299808,16584,38.864803,2017-12-01,640,740,660,610,210,796000600.0,796000.6


###### Creating dummy variables for Enero, Febrero and Julio => Estival and Invierno

In [42]:
pn_summary['Enero'] = pn_summary['MONTH'].apply(lambda x: 1 if x==1 else 0)
pn_summary['Febrero'] = pn_summary['MONTH'].apply(lambda x: 1 if x==2 else 0)
pn_summary['Julio'] = pn_summary['MONTH'].apply(lambda x: 1 if x==7 else 0)

zp_summary['Enero'] = zp_summary['MONTH'].apply(lambda x: 1 if x==1 else 0)
zp_summary['Febrero'] = zp_summary['MONTH'].apply(lambda x: 1 if x==2 else 0)
zp_summary['Julio'] = zp_summary['MONTH'].apply(lambda x: 1 if x==7 else 0)

###### Creating dummy variables for Noviembre and Diciembre 2017, since implementation of L6 was made during these months.

In [43]:
def dummy_noviembre_2017(x):
    if((x[0]==2017)&(x[1]==11)):
        return 1
    else:
        return 0

def dummy_diciembre_2017(x):
    if((x[0]==2017)&(x[1]==12)):
        return 1
    else:
        return 0

pn_summary['Nov_2017'] = pn_summary.apply(dummy_noviembre_2017, axis=1)
pn_summary['Dic_2017'] = pn_summary.apply(dummy_diciembre_2017, axis=1)

zp_summary['Nov_2017'] = zp_summary.apply(dummy_noviembre_2017, axis=1)
zp_summary['Dic_2017'] = zp_summary.apply(dummy_diciembre_2017, axis=1)

In [44]:
pn_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Metro Hora Valle,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados,kms_ofertados,Enero,Febrero,Julio,Nov_2017,Dic_2017
3162,2017,12,364,1,0,2017-12-30,0,1,771911,26799,...,660,610,210,1025748000.0,1025748.0,0,0,0,0,1
3163,2017,12,364,1,0,2017-12-30,1,0,500790,20279,...,660,610,210,1025748000.0,1025748.0,0,0,0,0,1
3164,2017,12,365,0,1,2017-12-31,0,0,109242,5860,...,660,610,210,796000600.0,796000.6,0,0,0,0,1
3165,2017,12,365,0,1,2017-12-31,0,1,431542,20227,...,660,610,210,796000600.0,796000.6,0,0,0,0,1
3166,2017,12,365,0,1,2017-12-31,1,0,299808,16584,...,660,610,210,796000600.0,796000.6,0,0,0,0,1


###### Creating temporal variable

In [45]:
pn_summary.loc[pn_summary['YEAR']==2015,'YEAR_DAY'].max()

365

In [46]:
pn_summary.loc[pn_summary['YEAR']==2016,'YEAR_DAY'].max()

366

In [47]:
pn_summary.loc[pn_summary['YEAR']==2017,'YEAR_DAY'].max()

365

In [48]:
def temporal_variable(x):
    if(x[0]==2015):
        return x[2]
    elif(x[0]==2016):
        return 365+x[2]
    else:
        return 365+366+x[2]

In [49]:
pn_summary['t'] = pn_summary.apply(temporal_variable, axis=1)

zp_summary['t'] = zp_summary.apply(temporal_variable, axis=1)

In [50]:
pn_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados,kms_ofertados,Enero,Febrero,Julio,Nov_2017,Dic_2017,t
3162,2017,12,364,1,0,2017-12-30,0,1,771911,26799,...,610,210,1025748000.0,1025748.0,0,0,0,0,1,1095
3163,2017,12,364,1,0,2017-12-30,1,0,500790,20279,...,610,210,1025748000.0,1025748.0,0,0,0,0,1,1095
3164,2017,12,365,0,1,2017-12-31,0,0,109242,5860,...,610,210,796000600.0,796000.6,0,0,0,0,1,1096
3165,2017,12,365,0,1,2017-12-31,0,1,431542,20227,...,610,210,796000600.0,796000.6,0,0,0,0,1,1096
3166,2017,12,365,0,1,2017-12-31,1,0,299808,16584,...,610,210,796000600.0,796000.6,0,0,0,0,1,1096


In [51]:
zp_summary.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,zp_SUM_TRX,zp_SUM_TRX_NO_VALIDAS,Mes,Buses,...,Metro Hora Baja,Estudiantes Ed. Media/Superior,m_ofertados,kms_ofertaods,Enero,Febrero,Julio,Nov_2017,Dic_2017,t
1091,2017,12,361,0,0,2017-12-27,441841,5524.0,2017-12-01,640,...,610,210,1331226000.0,1331226.0,0,0,0,0,1,1092
1092,2017,12,362,0,0,2017-12-28,431401,5649.0,2017-12-01,640,...,610,210,1310356000.0,1310356.0,0,0,0,0,1,1093
1093,2017,12,363,0,0,2017-12-29,380967,5721.0,2017-12-01,640,...,610,210,1251596000.0,1251596.0,0,0,0,0,1,1094
1094,2017,12,364,1,0,2017-12-30,76326,1759.0,2017-12-01,640,...,610,210,1025748000.0,1025748.0,0,0,0,0,1,1095
1095,2017,12,365,0,1,2017-12-31,14158,219.0,2017-12-01,640,...,610,210,796000600.0,796000.6,0,0,0,0,1,1096


###### Creating dummy variables per type of special days. Test based on dataset length is <font color='green'> passed </font>

In [52]:
DES_path = os.path.join(DTPMDir,'07_DES/resumen_des.xlsx')
DES_df = pd.read_excel(DES_path) #Dates are already parsed.

In [53]:
DES_df.head()

Unnamed: 0,Fecha,Descripción,REALIZA_BUCLE,Feriado_laboral,Feriado_no_laboral,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
0,2015-01-01,Año Nuevo 2015,-,1,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
1,2015-01-02,Interferiado año nuevo 2015,-,1,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
2,2015-01-03,Fin de semana largo por año nuevo,-,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
3,2015-01-04,Fin de semana largo por año nuevo,-,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
4,2015-03-29,Día del joven combatiente,-,0,0,0,0,0,1,0.0,0.0,0.0,0.0,0,0


* Days with lack of information

In [54]:
DES_df.loc[DES_df['Corte_Metro'].isnull(),:]

Unnamed: 0,Fecha,Descripción,REALIZA_BUCLE,Feriado_laboral,Feriado_no_laboral,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
318,2016-03-26,Línea 1,NO,0,0,0,0,0,0,,,,0.0,0,0


In [55]:
DES_df.loc[DES_df['Bucle'].isnull(),:]

Unnamed: 0,Fecha,Descripción,REALIZA_BUCLE,Feriado_laboral,Feriado_no_laboral,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
380,2016-10-07,Corte Línea 2,-,0,0,0,0,0,0,1.0,0.0,0.0,,0,0
452,2017-06-16,Corte Línea 5,-,0,0,0,0,0,0,1.0,0.0,0.0,,0,0
455,2017-06-16,Retraso Línea 4,-,0,0,0,0,0,0,0.0,1.0,0.0,,0,0
469,2017-07-25,Retraso Línea 1,-,0,0,0,0,0,0,0.0,1.0,0.0,,0,0


In [56]:
def f(x):
    suma_dummies = x.sum(skipna=False) #Sums with NaNs return NaN. Day descriptions are omitted.
    if(suma_dummies>1):
        return 1
    else:
        return suma_dummies

In [57]:
grouped_DES_df = DES_df.groupby(['Fecha']).agg(f).reset_index()

In [58]:
grouped_DES_df.head()

Unnamed: 0,Fecha,Feriado_laboral,Feriado_no_laboral,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
0,2015-01-01,1,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
1,2015-01-02,1,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
2,2015-01-03,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
3,2015-01-04,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0
4,2015-01-14,0,0,0,0,0,0,1.0,0.0,0.0,1.0,0,0


In [59]:
def year_day_calc_2(x):
    if(x[0].year==2015):
        new_year_day = dt.date(year=2015, month=1, day=1)
    elif(x[0].year==2016):
        new_year_day = dt.date(year=2016, month=1, day=1)
    else:
        new_year_day = dt.date(year=2017, month=1, day=1)
        
    return ((x[0].date() - new_year_day).days + 1)

In [60]:
grouped_DES_df['YEAR_DAY'] = grouped_DES_df.apply(year_day_calc_2, axis=1)    
grouped_DES_df['MONTH'] = grouped_DES_df['Fecha'].dt.month
grouped_DES_df['YEAR'] = grouped_DES_df['Fecha'].dt.year

In [61]:
grouped_DES_df.tail()

Unnamed: 0,Fecha,Feriado_laboral,Feriado_no_laboral,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales,YEAR_DAY,MONTH,YEAR
381,2017-12-27,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,361,12,2017
382,2017-12-28,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,362,12,2017
383,2017-12-29,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,1,363,12,2017
384,2017-12-30,0,0,0,0,1,0,1.0,0.0,0.0,1.0,0,0,364,12,2017
385,2017-12-31,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0,0,365,12,2017


In [62]:
pn_summary = pn_summary.merge(grouped_DES_df,how='left',left_on=['YEAR','MONTH','YEAR_DAY'],right_on=['YEAR','MONTH','YEAR_DAY'])
zp_summary = zp_summary.merge(grouped_DES_df,how='left',left_on=['YEAR','MONTH','YEAR_DAY'],right_on=['YEAR','MONTH','YEAR_DAY'])

In [63]:
print('Length of pn_summary after 3rd-merge is: ' + str(len(pn_summary.index)))
print('Length of zp_summary after 3rd-merge is: ' + str(len(zp_summary.index)))

Length of pn_summary after 3rd-merge is: 3167
Length of zp_summary after 3rd-merge is: 1096


In [64]:
pn_summary.head()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
0,2015,1,1,0,0,2015-01-01,0,0,116766,6586,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2015,1,1,0,0,2015-01-01,0,1,514892,30226,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2015,1,2,0,0,2015-01-02,0,0,326616,13361,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2015,1,2,0,0,2015-01-02,0,1,1604441,55015,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2015,1,3,1,0,2015-01-03,0,0,247882,9551,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
zp_summary.head()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,zp_SUM_TRX,zp_SUM_TRX_NO_VALIDAS,Mes,Buses,...,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
0,2015,1,1,0,0,2015-01-01,3202,48.0,2015-01-01,640,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2015,1,2,0,0,2015-01-02,204808,5287.0,2015-01-01,640,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2015,1,3,1,0,2015-01-03,23592,615.0,2015-01-01,640,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2015,1,4,0,1,2015-01-04,15762,213.0,2015-01-01,640,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2015,1,5,0,0,2015-01-05,331379,6799.0,2015-01-01,640,...,,,,,,,,,,


In [66]:
pn_summary = pn_summary.fillna(0) #Assuming 0 values in NaNs obtained via f(x)
zp_summary = zp_summary.fillna(0) #Assuming 0 values in NaNs obtained via f(x)

###### Summarizing all the info

In [67]:
no_turnstile = pn_summary.loc[(pn_summary['torniquete_mariposa']==0)
                              &(pn_summary['no_torniquete']==1),:]

three_turnstile = pn_summary.loc[(pn_summary['torniquete_mariposa']==0)
                                 &(pn_summary['no_torniquete']==0),:]

butterfly_turnstile = pn_summary.loc[(pn_summary['torniquete_mariposa']==1)
                                     &(pn_summary['no_torniquete']==0),:]

In [68]:
butterfly_turnstile.head()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
244,2015,5,122,1,0,2015-05-02,1,0,175,6,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
247,2015,5,123,0,1,2015-05-03,1,0,200,4,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
250,2015,5,124,0,0,2015-05-04,1,0,396,8,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
253,2015,5,125,0,0,2015-05-05,1,0,486,12,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
256,2015,5,126,0,0,2015-05-06,1,0,1688,74,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


* ... Be aware of this. It was already tested in TimesSeriesEstimationByDay.ipynb

In [69]:
zero_butterfly_turnstile = no_turnstile.loc[(no_turnstile['YEAR'] == 2015)&(no_turnstile['YEAR_DAY'].between(1,121,inclusive=True)),:]

In [70]:
zero_butterfly_turnstile.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
233,2015,4,117,0,0,2015-04-27,0,1,2359878,60528,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
235,2015,4,118,0,0,2015-04-28,0,1,2381346,60840,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
237,2015,4,119,0,0,2015-04-29,0,1,2399138,60175,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
239,2015,4,120,0,0,2015-04-30,0,1,2458424,58221,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
241,2015,5,121,0,0,2015-05-01,0,1,736920,41797,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
len(zero_butterfly_turnstile.index)

121

In [72]:
zero_butterfly_turnstile.loc[:,'no_torniquete'] = 0
zero_butterfly_turnstile.loc[:,'torniquete_mariposa'] = 1
zero_butterfly_turnstile.loc[:,'pn_SUM_TRX'] = 0
zero_butterfly_turnstile.loc[:,'pn_SUM_EXP'] = 0
zero_butterfly_turnstile.loc[:,'ratio'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [73]:
zero_butterfly_turnstile.tail()

Unnamed: 0,YEAR,MONTH,YEAR_DAY,SATURDAY,SUNDAY,DATE,torniquete_mariposa,no_torniquete,pn_SUM_TRX,pn_SUM_EXP,...,Censo_Elecciones,Partido,FDS_Largo,Disturbios,Corte_Metro,Retraso_Metro,Incidente_Metro,Bucle,Clima,visperas_laborales
233,2015,4,117,0,0,2015-04-27,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
235,2015,4,118,0,0,2015-04-28,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
237,2015,4,119,0,0,2015-04-29,1,0,0,0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
239,2015,4,120,0,0,2015-04-30,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
241,2015,5,121,0,0,2015-05-01,1,0,0,0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
butterfly_turnstile = pd.concat([butterfly_turnstile,zero_butterfly_turnstile])

###### Sorting for plotting

In [75]:
no_turnstile.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True, True, True], inplace=True)
three_turnstile.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True, True, True], inplace=True)
butterfly_turnstile.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True, True, True], inplace=True)

zp_summary.sort_values(by=['YEAR','MONTH','YEAR_DAY'], ascending=[True, True, True], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


###### PLOTTING EVERYTHING.

In [105]:
dates_values = no_turnstile['DATE'].values
no_turnstile_values = list(map(int, no_turnstile['pn_SUM_TRX'].values))
three_turnstile_values = list(map(int,three_turnstile['pn_SUM_TRX'].values))
butterfly_turnstile_values = list(map(int,butterfly_turnstile['pn_SUM_TRX'].values))
zp_values = list(map(int,zp_summary['zp_SUM_TRX'].values))
ratio_butterfly_turnstile_values = list(map(int,butterfly_turnstile['ratio'].values))

In [106]:
labels = ["zp", "S/T", "C/T3", "C/TM"]
fig, ax = plt.subplots()
ax.stackplot(dates_values, zp_values, no_turnstile_values , three_turnstile_values , butterfly_turnstile_values , labels = labels , colors=colors)
ax.legend(loc=2)
ax.set_title('Transacciones por día')
ax.set_ylabel('Transacciones')
ax.yaxis.set_major_formatter(formatter)
ax.set_xlabel('Fecha (día)')
fig.autofmt_xdate()

ax_1 = ax.twinx()
ax_1.plot(dates_values, ratio_butterfly_turnstile_values, color = 'black', label = '% Exp.')
ax_1.set_ylabel('% Expediciones con torniquete mariposa')
ax_1.legend(loc=3)
ax_1.set_ylim(0, 100)

#plt.xticks(np.arange(min(months_values), max(months_values)+1, 1.0))
plt.savefig('C:/Users/leoca_000/Desktop/Evasion/03_report/02_Figs/5_trxOverTimeByDay_all.pdf')
plt.show()

In [107]:
plotly_dates_values = no_turnstile['DATE'].apply(lambda x: x.date())

In [108]:
trace1 = go.Bar(
    x=plotly_dates_values,
    y=zp_values,
    name='ZP',
    marker=dict(
        color='rgb(76, 181, 245)',
    ),
    opacity=0.6
)

trace2 = go.Bar(
    x=plotly_dates_values,
    y=no_turnstile_values,
    name='S/T',
    marker=dict(
        color='rgb(183, 184, 182)',
    ),
    opacity=0.6
)

trace3 = go.Bar(
    x=plotly_dates_values,
    y=three_turnstile_values,
    name='C/T3',
    marker=dict(
        color='rgb(52, 103, 92)',
    ),
    opacity=0.6    

)
trace4 = go.Bar(
    x=plotly_dates_values,
    y=butterfly_turnstile_values,
    name='C/TM',
    marker=dict(
        color='rgb(179, 193, 0)',
    ),
    opacity=0.6 
)

trace5 = go.Scatter(
    x = plotly_dates_values,
    y = ratio_butterfly_turnstile_values,
    yaxis='y2',
    name = '%Exp.C/TM',
    marker=dict(
        color='rgb(0, 0, 0)',
    ),
)


data = [trace1, trace2, trace3, trace4, trace5]

layout = go.Layout(
    title='Transacciones en bus y zona paga, 2015 a 2017',
    barmode='stack',
    yaxis=dict(
        title='Transacciones'
    ),
    yaxis2=dict(
        title='% exps. diarias C/TM',
        overlaying='y',
        side='right',
        range=[0, 100]
    )
)

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, filename='STACKED_ALLDATESBYDATE')


Your filename `STACKED_ALLDATESBYDATE` didn't end with .html. Adding .html to the end of your file.



'file://C:\\Users\\leoca_000\\Desktop\\Evasion\\01_analisis\\04_scripts\\TSAnalysis\\Developing\\STACKED_ALLDATESBYDATE.html'

##### PLOTTING ONLY WORKING DAYS WITHOUT SPECIAL CONDITIONS

In [109]:
dates_values = no_turnstile.loc[(no_turnstile['SATURDAY']==0)
                                &(no_turnstile['SUNDAY']==0)
                                &(no_turnstile['Feriado_laboral']==0)
                                &(no_turnstile['Censo_Elecciones']==0)
                                &(no_turnstile['Clima']==0),'DATE'].values

no_turnstile_values = list(map(int, no_turnstile.loc[(no_turnstile['SATURDAY']==0)
                                                     &(no_turnstile['SUNDAY']==0)
                                                     &(no_turnstile['Feriado_laboral']==0)
                                                     &(no_turnstile['Censo_Elecciones']==0)
                                                     &(no_turnstile['Clima']==0),'pn_SUM_TRX'].values))

three_turnstile_values = list(map(int,three_turnstile.loc[(three_turnstile['SATURDAY']==0)
                                                          &(three_turnstile['SUNDAY']==0)
                                                          &(three_turnstile['Feriado_laboral']==0)
                                                          &(three_turnstile['Censo_Elecciones']==0)
                                                          &(three_turnstile['Clima']==0),'pn_SUM_TRX'].values))

butterfly_turnstile_values = list(map(int,butterfly_turnstile.loc[(butterfly_turnstile['SATURDAY']==0)
                                                                  &(butterfly_turnstile['SUNDAY']==0)
                                                                  &(butterfly_turnstile['Feriado_laboral']==0)
                                                                  &(butterfly_turnstile['Censo_Elecciones']==0)
                                                                  &(butterfly_turnstile['Clima']==0),'pn_SUM_TRX'].values))

zp_values = list(map(int,zp_summary.loc[(zp_summary['SATURDAY']==0)
                                        &(zp_summary['SUNDAY']==0)
                                        &(zp_summary['Feriado_laboral']==0)
                                        &(zp_summary['Censo_Elecciones']==0)
                                        &(zp_summary['Clima']==0),'zp_SUM_TRX'].values))

ratio_butterfly_turnstile_values = list(map(int,butterfly_turnstile.loc[(butterfly_turnstile['SATURDAY']==0)
                                                                        &(butterfly_turnstile['SUNDAY']==0)
                                                                        &(butterfly_turnstile['Feriado_laboral']==0)
                                                                        &(butterfly_turnstile['Censo_Elecciones']==0)
                                                                        &(butterfly_turnstile['Clima']==0),'ratio'].values))

In [110]:
plotly_dates_values = no_turnstile.loc[(no_turnstile['SATURDAY']==0)
                                 &(no_turnstile['SUNDAY']==0)
                                 &(no_turnstile['Feriado_laboral']==0)
                                 &(no_turnstile['Censo_Elecciones']==0)
                                 &(no_turnstile['Clima']==0),'DATE'].apply(lambda x: x.date())

In [111]:
labels = ["zp", "S/T", "C/T3", "C/TM"]
fig, ax = plt.subplots()
ax.stackplot(dates_values, zp_values, no_turnstile_values , three_turnstile_values , butterfly_turnstile_values , labels = labels , colors=colors)
ax.legend(loc=2)
ax.set_title('Transacciones por día laboral normal')
ax.set_ylabel('Transacciones')
ax.yaxis.set_major_formatter(formatter)
ax.set_xlabel('Fecha (día)')
fig.autofmt_xdate()

ax_1 = ax.twinx()
ax_1.plot(dates_values, ratio_butterfly_turnstile_values, color = 'black', label = '% Exp.')
ax_1.set_ylabel('% Expediciones con torniquete mariposa')
ax_1.legend(loc=3)
ax_1.set_ylim(0, 100)

#plt.xticks(np.arange(min(months_values), max(months_values)+1, 1.0))
plt.savefig('C:/Users/leoca_000/Desktop/Evasion/03_report/02_Figs/5_trxOverTimeByWorkingDay_all.pdf')
plt.show()

In [112]:
trace1 = go.Bar(
    x=plotly_dates_values,
    y=zp_values,
    name='ZP',
    marker=dict(
        color='rgb(76, 181, 245)',
    ),
    opacity=0.6
)

trace2 = go.Bar(
    x=plotly_dates_values,
    y=no_turnstile_values,
    name='S/T',
    marker=dict(
        color='rgb(183, 184, 182)',
    ),
    opacity=0.6
)

trace3 = go.Bar(
    x=plotly_dates_values,
    y=three_turnstile_values,
    name='C/T3',
    marker=dict(
        color='rgb(52, 103, 92)',
    ),
    opacity=0.6    

)
trace4 = go.Bar(
    x=plotly_dates_values,
    y=butterfly_turnstile_values,
    name='C/TM',
    marker=dict(
        color='rgb(179, 193, 0)',
    ),
    opacity=0.6 
)

trace5 = go.Scatter(
    x = plotly_dates_values,
    y = ratio_butterfly_turnstile_values,
    yaxis='y2',
    name = '%Exp.C/TM',
    marker=dict(
        color='rgb(0, 0, 0)',
    ),
)


data = [trace1, trace2, trace3, trace4, trace5]

layout = go.Layout(
    title='Transacciones en bus y zona paga en día laboral normal, 2015 a 2017',
    barmode='stack',
    yaxis=dict(
        title='Transacciones'
    ),
    yaxis2=dict(
        title='% exps. diarias C/TM',
        overlaying='y',
        side='right',
        range=[0, 100]
    )
)

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, filename='STACKED_WORKINGDATESBYDATE')


Your filename `STACKED_WORKINGDATESBYDATE` didn't end with .html. Adding .html to the end of your file.



'file://C:\\Users\\leoca_000\\Desktop\\Evasion\\01_analisis\\04_scripts\\TSAnalysis\\Developing\\STACKED_WORKINGDATESBYDATE.html'

###### Printing summaries to a file

In [102]:
#pn_output_path = os.path.join(DTPM_TRXDir, '1_DAILY_SUMMARY/daily_pn_summary.csv')
#zp_output_path = os.path.join(DTPM_TRXDir, '1_DAILY_SUMMARY/daily_zp_summary.csv')

In [103]:
#pn_summary.to_csv(pn_output_path, sep=';', encoding = 'latin-1')

In [104]:
#zp_summary.to_csv(zp_output_path, sep=';', encoding = 'latin-1')

# Closed