In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing 

In [None]:
plt.style.use('seaborn')
plt.rcParams.update({'figure.figsize': (16,8)})
sns.set_context("talk")

In [None]:
INPATH = "data/"
# src: https://www.ine.es/dyngs/INEbase/es/categoria.htm?c=Estadistica_P&cid=1254735576863
INFILE1 = "10822bsc.csv"  # Número de turistas según país de residencia
INFILE2 = "13938bsc.csv"  # Gasto de los turistas internacionales según partidas de gasto

In [None]:
visits_df = pd.read_csv(INPATH + INFILE1, delimiter=';',  decimal=',', thousands='.', encoding='ANSI')
visits_df['Date'] = pd.to_datetime(visits_df['Periodo'], format='%YM%m')
visits_df['yr'] = visits_df['Date'].dt.year
visits_df.pivot(index='Date', columns='País de residencia', values='Total').head(3)

In [None]:
exp_df = pd.read_csv(INPATH + INFILE2, delimiter=';',  decimal=',', thousands='.', encoding='ANSI')
exp_df['Date'] = pd.to_datetime(exp_df['Periodo'], format='%YM%m')
exp_df['yr'] = exp_df['Date'].dt.year
exp_df.pivot(index='Date', columns='Partidas de gasto', values='Total').head(3)

In [None]:
select_cols = ['Date', 'Total']
exp_tot_df = exp_df.loc[exp_df['Partidas de gasto'] == 'Gasto total', select_cols].\
    rename(columns={'Total': 'exp_total'}).set_index('Date')
visits_tot_df = visits_df.loc[visits_df['País de residencia'] == 'Total', select_cols]\
    .rename(columns={'Total': 'visit_total'}).set_index('Date')

In [None]:
data = visits_tot_df.merge(exp_tot_df, how='inner', left_index=True, right_index=True).sort_index()
data.index.freq = 'MS'
data.info()


In [None]:
sns.jointplot(data=data, x='visit_total', y='exp_total', kind='reg')

In [None]:
data.plot(secondary_y='exp_total')
plt.show()

In [None]:
tour = visits_df.loc[visits_df['País de residencia'] == 'Total', ['Date', 'Total', 'yr']]

In [None]:
sns.barplot(data=tour, x='yr', y='Total', estimator=np.sum)

In [None]:
tour.groupby('yr')['Total'].sum()

In [None]:
tour.groupby('yr')['Total'].sum().pct_change()

In [None]:
visits_qtr = pd.read_csv(INPATH + INFILE1, delimiter=';',  decimal=',', thousands='.',  encoding='ANSI')
visits_qtr['qtr'] = pd.to_datetime(visits_qtr['Periodo'], format='%YM%m')
visits_qtr['yr'] = visits_qtr['qtr'].dt.year
visits_qtr

In [None]:
visits_qtr.groupby('yr')['Total'].sum()

In [None]:
endog_train = data.loc[:'2019', 'visit_total'].copy()
hw = ExponentialSmoothing(endog=endog_train, trend='additive', damped=True, seasonal='additive', seasonal_periods=12)
hw_res = hw.fit(optimized=True)

In [None]:
hw_preds = pd.Series(index=endog_train.index, data=hw_res.fittedvalues, name='p_visit_total')

In [None]:
hw_res.summary()

In [None]:
ax = hw_preds.plot(label='predicted')
data.loc[:'2019', 'visit_total'].plot(label='actual', style='o', ax=ax)
plt.title('Total mensual de visitas totales en España')
plt.legend()
plt.show()