# 3. Series de tiempo y ARIMA

### Finanzas Cuantitativas y Ciencia de Datos
#### Rodrigo Lugo Frias y León Berdichevsky Acosta
#### ITAM Primavera 2019


Con este notebook pueden ver de principio a fin como trabajar con series de tiempo e implementar un modelo de prediccion basado en ARIMA.

---

_INSTRUCCIONES:_
* Todas las celdas se corren haciendo __Shift + Enter__ o __Ctrl + Enter__

_NOTAS:_
* _Notebook adaptado de distintas fuentes y proyectos_

In [None]:
%matplotlib inline

# Librerias importantes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(font_scale=1.5)
import datetime as dt

#Silence all warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
stocks = ['data/ALSEA MM Equity.csv','data/AMXL MM Equity.csv', 'data/BIMBOA MM Equity.csv', 'data/PE&OLES MM Equity.csv']

alsea   = pd.read_csv(stocks[0])
amxl    = pd.read_csv(stocks[1])
bimbo   = pd.read_csv(stocks[2])
penoles = pd.read_csv(stocks[3])

In [None]:
penoles.info()

In [None]:
def change_date( df ):
    df.Date = df.Date.apply(lambda x : pd.to_datetime(str(x), format = "%Y%m%d"))
    df.set_index(df.Date, inplace = True)
    df = df.copy()[df.columns[1:]]
    return df

penoles = change_date(penoles)

penoles.tail()

In [None]:
penoles.info()

In [None]:
penoles.describe()

In [None]:
alsea = change_date(alsea)
amxl  = change_date(amxl)
bimbo = change_date(bimbo)

In [None]:
x  = 'Last'
df = pd.concat([alsea[x],amxl[x],bimbo[x],penoles[x]],axis=1)
df.columns = ['ALSEA', 'AMXL', 'BIMBO', 'PENOLES']
df = df.copy().tail(1000)

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel(' ')
ax.set_ylabel('Price ($ MXN)')
ax.set_title('Mexican companies stocks')
df.plot(ax = ax, figsize = (10,7))
plt.show()

In [None]:
# Yearly average number of shares
shares = {'2019': 172e6, '2018': 168e6,  '2017': 162e6,  '2016': 144e6,  '2015': 128e6}

# Create a year column 
df['Year'] = df.index.year

# Take Dates from index and move to Date column 
df.reset_index(level=0, inplace = True)
df['MktCap_ALSEA']   = 0
df['MktCap_AMXL']    = 0
df['MktCap_BIMBO']   = 0
df['MktCap_PENOLES'] = 0

In [None]:
df.info()

In [None]:
df.tail()

In [None]:
# Calculate market cap for all years
for i, year in enumerate(df['Year']):
    # Retrieve the shares for the year
    shares_ = shares[str(year)]
    
    # Update the cap column to shares times the price
    df.ix[i, 'MktCap_ALSEA']    = (shares_ * df.ix[i, 'ALSEA'])/1e9
    df.ix[i, 'MktCap_AMXL']     = (shares_ * df.ix[i, 'AMXL'])/1e9
    df.ix[i, 'MktCap_BIMBO']    = (shares_ * df.ix[i, 'BIMBO'])/1e9
    df.ix[i, 'MktCap_PENOLES']  = (shares_ * df.ix[i, 'PENOLES'])/1e9

In [None]:
df.info()

In [None]:
df.sample(5)

In [None]:
market_cap = df.copy()[['Date','MktCap_ALSEA','MktCap_AMXL','MktCap_BIMBO']]
market_cap.columns = ['Date','ALSEA', 'AMXL', 'BIMBO']
market_cap.set_index('Date',inplace=True)
market_cap.tail()

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel(' ')
ax.set_ylabel('Market Cap ($ Bn)')
ax.set_title('Mexican companies stocks')
market_cap.plot(ax = ax, figsize = (10,7))
plt.show()

#### Under this analysis is AMXL still an atractive company to invest?

In [None]:
amxl_corp = df.copy()[['Date','AMXL','MktCap_AMXL']]
amxl_corp.set_index('Date',inplace=True)
amxl_corp.columns = ['Price','MktCap']

fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()

ax1.set_xlabel(' ')
ax1.set_ylabel('Price ($)')
ax2.set_ylabel('Market Cap ($ Bn)')
ax.set_title('Intel Corporation')

amxl_corp.Price.plot(ax = ax1, figsize = (10,7), legend=False, color='r')
amxl_corp.MktCap.plot(ax = ax2, figsize = (10,7), legend=False, color='g')

plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel('Price ($)')
ax.set_ylabel('Prob. Density')
ax.set_title('Technology companies stocks')
amxl_corp.Price.plot.density(ax = ax, figsize = (10,7))
plt.show()

#### ARIMA

In [None]:
from pandas.plotting import autocorrelation_plot

amxl_sample = amxl_corp.copy().Price.head(60)

fig, ax = plt.subplots()
autocorrelation_plot(amxl_sample, ax=ax)
plt.show()

In [None]:
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(amxl_sample, order=(2,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

In [None]:
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
plt.show()
residuals.plot(kind='kde')
plt.show()
print(residuals.describe())