What you can find here:
 - Time Series

## *Time Series*

Don't forget to convert the timestamp column of Object Data type to pandas-DateTime Data Type.

In [None]:
# Dickey-Fuller test
## p-value > 0.05: Fail to reject the null hypothesis (H0), the data has a unit root and is non-stationary.
## p-value <= 0.05: Reject the null hypothesis (H0), the data does not have a unit root and is stationary.

X = data['close_price'].values

result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

In [None]:
# Diference Transform

NewData = data['close_price'] - data['close_price'].shift(1)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error
from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error

from scipy.optimize import minimize
import statsmodels.tsa.api as smt
import statsmodels.api as sm

from tqdm import tqdm_notebook

from itertools import product

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

DATAPATH = './data/stock_prices_sample.csv'

data = pd.read_csv(DATAPATH, index_col=['DATE'], parse_dates=['DATE'])
data.head(10)

data = data[data.TICKER != 'GEF']
data = data[data.TYPE != 'Intraday']

drop_cols = ['SPLIT_RATIO', 'EX_DIVIDEND', 'ADJ_FACTOR', 'ADJ_VOLUME', 'ADJ_CLOSE', 'ADJ_LOW', 'ADJ_HIGH', 'ADJ_OPEN', 'VOLUME', 'FREQUENCY', 'TYPE', 'FIGI']

data.drop(drop_cols, axis=1, inplace=True)

data.head()

In [None]:
def plot_moving_average(series, window, plot_intervals=False, scale=1.96):

    rolling_mean = series.rolling(window=window).mean()

    plt.figure(figsize=(17,8))
    plt.title('Moving average\n window size = {}'.format(window))
    plt.plot(rolling_mean, 'g', label='Rolling mean trend')

    #Plot confidence intervals for smoothed values
    if plot_intervals:
        mae = mean_absolute_error(series[window:], rolling_mean[window:])
        deviation = np.std(series[window:] - rolling_mean[window:])
        lower_bound = rolling_mean - (mae + scale * deviation)
        upper_bound = rolling_mean + (mae + scale * deviation)
        plt.plot(upper_bound, 'r--', label='Upper bound / Lower bound')
        plt.plot(lower_bound, 'r--')

    plt.plot(series[window:], label='Actual values')
    plt.legend(loc='best')
    plt.grid(True)

#Smooth by the previous 5 days (by week)
plot_moving_average(data.CLOSE, 5)

#Smooth by the previous month (30 days)
plot_moving_average(data.CLOSE, 30)

#Smooth by previous quarter (90 days)
plot_moving_average(data.CLOSE, 90, plot_intervals=True)

In [None]:
#esse código veio de outro lugar - ver se tem algo util nele

import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from pyramid.arima import auto_arima

#cria a base da dados
AirPassengers = '/home/salomao/Desktop/Data Science - Udemy/4 - Séries Temporais/Dados/AirPassengers.csv'
dateparse = lambda dates: pd.datetime.strptime(dates,'%Y-%m') #transf para tipo de date
base = pd.read_csv(AirPassengers, parse_dates=['Month'],index_col='Month',date_parser=dateparse)
ts = base['#Passengers']

#exibição e slices
ts_ano = ts.resample('A').sum() #faz a somatoria por ano
ts_mes = ts.groupby([lambda x: x.month]).sum() #faz a somatoria por mes
plt.plot(ts_mes)

#decomposição
decomposicao = seasonal_decompose(ts) #faz a decomposição
tendencia = decomposicao.trend #da tendencia
sazonal = decomposicao.seasonal #da sozonalidade
aleatorio = decomposicao.resid #da aleatoriedade
plt.subplot(4,1,1) #plotando o grafico de cada decomposiçao
plt.plot(ts, label = 'Original')
plt.subplot(4,1,2)
plt.plot(tendencia, label = 'Tendencia')
plt.subplot(4,1,3)
plt.plot(sazonal, label = 'Sazonalidade')
plt.subplot(4,1,4)
plt.plot(aleatorio, label = 'Aleatório')
plt.tight_layout() #configurar o layout

#previsão (3 maneiras de fazer)
#1: media
ts.mean() #grande chances de erro, pois a serie n é estacionaria
ts['1960-01-01':'1960-12-31'].mean() #faz a media apenas do ultimo ano
#2: media movel
media_movel = ts.rolling(window = 12).mean() #calcula a media movel (window = subconj)
previsoes = []
for i in range(1,13): #coloca as medias das medias moveis no vetor de previsoes
    superior = len(media_movel) - i
    inferior = superior - 11
    previsoes.append(media_movel[inferior:superior].mean())
previsoes = previsoes[::-1]
plt.plot(previsoes)
#3: arima
modeo_auto = auto_arima(ts,m=12,seasonal = True, trace=True).summary() #vc acha os valores pra colocar no order abaixo (esta em SARIMAX)
modelo = ARIMA(ts, order=(2,1,2)) #cria o modelo
modelo_treinado = modelo.fit() #treina o modelo
previsoes = modelo_treinado.forecast(steps=12) #realiza as previsoes (qtd de meses previstos)
eixo = ts.plot()
start,end = '1960-01-01','1962-01-01'
modelo_treinado.plot_predict(start,end,ax=eixo,plot_insample=True) #plota e faz predição (inicio, fim /da previsao, junta os graficos)
plt.show()