In [2]:
import torch; torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions
import numpy as np
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200
from torch.autograd import Variable


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
import pandas as pd
df = pd.read_csv("data/imputed/data_imp1.csv")

In [101]:
# Group data by sitename
sites = df['sitename'].unique()
sites_df = [df[df['sitename'] == site] for site in sites]

for i in range(len(sites_df)):
    sites_df[i]['date'] = pd.to_datetime(sites_df[i]['date'], format="%Y-%m-%d")
    sites_df[i] = sites_df[i].set_index("date")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sites_df[i]['date'] = pd.to_datetime(sites_df[i]['date'], format="%Y-%m-%d")


In [110]:
X = df[df.sitename != sites[65]].drop(columns=["Unnamed: 0","sitename","date"]).values
X_test = df[df.sitename == sites[65]].drop(columns=["Unnamed: 0","sitename","date"]).values

In [111]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test =  scaler.transform(X_test)

In [112]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [113]:
x_train,y_train = sliding_windows(X,32)
x_test,y_test = sliding_windows(X_test,32)

In [114]:
trainX = Variable(torch.Tensor(np.array(x_train)))
trainY = Variable(torch.Tensor(np.array(y_train[:,9].reshape(-1,1))))

testX = Variable(torch.Tensor(np.array(x_test)))
testY = Variable(torch.Tensor(np.array(y_test[:,9].reshape(-1,1))))

In [115]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [153]:
num_epochs = 100
learning_rate = 0.01

input_size = 10
hidden_size = 5
num_layers = 1

num_classes = 1
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

In [154]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

n_batches=32
# Train the model
for epoch in range(num_epochs):
    for i in range(n_batches):
        # Local batches and labels
        local_X, local_y = trainX[i*n_batches:(i+1)*n_batches,], trainY[i*n_batches:(i+1)*n_batches,]
        outputs = lstm(local_X)
        optimizer.zero_grad()
    
        # obtain the loss function
        loss = criterion(outputs, local_y)
        
        loss.backward()
    
        optimizer.step()
    if epoch % 20 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))


Epoch: 0, loss: 5.16136
Epoch: 20, loss: 1.11405
Epoch: 40, loss: 0.82107
Epoch: 60, loss: 1.00603
Epoch: 80, loss: 0.89819


In [155]:
criterion(lstm(testX),testY)

tensor(0.5584, grad_fn=<MseLossBackward>)

In [156]:
%matplotlib widget
index = [i for i in range(testY.shape[0])]
plt.ion()
plt.scatter(index, lstm(testX).detach().numpy(), label="Predictions", s=1)
plt.scatter(index, testY.detach().numpy(), label="Ground truth"+" "+sites_df[0]['sitename'].unique(), s=1)
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …