In [None]:
#kernel utilizado para a segunda aula interativa de AMI
#importando as bibliotecas
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, LSTM
import matplotlib.pyplot as plt

In [None]:
import pandas as pd
df_belem = pd.read_csv("../input/temperature-timeseries-for-some-brazilian-cities/station_belem.csv")

In [None]:
df_belem.info()

In [None]:
df_belem.head()

In [None]:
df_belem.tail()

In [None]:
df_belem.boxplot('JAN')

In [None]:
df_belem.describe()

In [None]:
columns = df_belem.columns[1:13]

for col in columns:
    b = df_belem[col] 
    df_belem[col] = b.mask(b == 999.90)
    

In [None]:
df_clean_belem = df_belem.interpolate(method='linear',limit_direction='backward',axis=0)
df_clean_belem.tail()

In [None]:
import matplotlib.ticker as ticker

#plotando de uma forma "mais legal"
plt.figure(figsize=(10,10))  #define o tamanho da figura
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.plot(df_clean_curitiba['YEAR'], df_clean_curitiba['JUL'], color='r')
ax.plot(df_clean_belem['YEAR'], df_clean_belem['JUL'], color='b')
start, end = ax.get_ylim()
ax.yaxis.set_ticks(np.arange(start, end, 0.75))
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
start, end = ax.get_xlim()
ax.xaxis.set_ticks(np.arange(start, end, 5.0))
ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))
ax.set_xlabel('Anos')
ax.set_ylabel('Temperatura (ºC)')
ax.set_title('Temperaturas em Julho nas cidades de Curitiba e Belem')
ax.legend(["Curitiba", "Belém"])
plt.show()

In [None]:
# Example of the Paired Student's t-test
from scipy.stats import ttest_rel
data1 = df_clean_curitiba['JUL'].values
data2 = df_clean_belem['JUL'].values
stat, p = ttest_rel(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
	print('Probably the same distribution')
else:
	print('Probably different distributions')

In [None]:
# Example of the Analysis of Variance Test
from scipy.stats import f_oneway
stat, p = f_oneway(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
	print('Probably the same distribution')
else:
	print('Probably different distributions')

***Criando um modelo de previsão de séries temporais***

In [None]:
#coleta a serie temporal para o modelo de previsão
df_out_new=df_clean_curitiba[columns].values

In [None]:
df_out_new.shape

In [None]:
#normaliza os dados
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
serie_out = scaler.fit_transform(df_out_new.reshape(-1, 1)[:-2]) #retirando Nov e Dez de 2019

In [None]:
serie_out.shape

In [None]:
#define a quantidade de dados para treinamento e teste
train=serie_out[:int(len(serie_out)*0.7)]
test=serie_out[int(len(serie_out)*0.7):]

In [None]:
train.shape

In [None]:
test.shape

In [None]:
#comprimento do teste e treinamento
print(len(train), len(test))

In [None]:
#define o intervalo a ser utilizado para entrada da rede neural LSTM
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
look_back = 12
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
trainX.shape

In [None]:
trainY.shape

In [None]:
#colocando os dados no formato para entrada na rede LSTM em deep learning [amostras, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], look_back, 1))
testX = np.reshape(testX, (testX.shape[0], look_back, 1))

Definindo o modelo LSTM

In [None]:
from tensorflow.keras.models import Sequential

In [None]:
model = Sequential()
model.add(LSTM(4, input_shape=(look_back,1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
#mostra o modelo
model.summary()

In [None]:
#Treina o modelo
model.fit(trainX, trainY, epochs=15, batch_size=1, verbose=2)

In [None]:
# realizando as previsões
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)


In [None]:
# invertendo as previsões
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])


In [None]:
import math
from sklearn.metrics import mean_squared_error

In [None]:
# Calculando os erros de previsão
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Pontuação para o treinamento: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Pontuação para o teste: %.2f RMSE' % (testScore))

In [None]:
# realizando o shift dos valores para construir o plot dos valores
trainPredictPlot = np.empty_like(serie_out)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict


In [None]:
# realizando m shift dos dados de previsão para o treinamento
testPredictPlot = np.empty_like(serie_out)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(serie_out)-1, :] = testPredict


In [None]:
plt.figure(figsize=(16,8))
plt.plot(scaler.inverse_transform(serie_out))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.legend(["Real","Previsao Treinamento","Previsão Teste"])
plt.show()