In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from models.LSTMReg import LSTM
from sklearn.preprocessing import MinMaxScaler

In [2]:
df=pd.read_csv('data/jena_climate_2009_2016.csv')
df["Date Time"]=pd.to_datetime(df["Date Time"])

In [3]:
important_features=['VPmax (mbar)','wv (m/s)','T (degC)']

In [4]:
TrainFrame=pd.DataFrame({"VPmax (mbar)":df["VPmax (mbar)"], 'wv (m/s)':df['wv (m/s)'], 'T (degC)':df['T (degC)']})

In [5]:
def create_features(df,shift,cols):
    values=df['T (degC)'].values
    for j,col in enumerate(cols):
        for i in range(shift,0,-1):
            df[f'var {j+1} (t-{i})']=df[col].shift(i)
        df.drop([col], axis=1, inplace=True)
    df["var(t)"]=values
    return df

def split_data(df,day_start, length, test_day):
    startTrain=day_start*144
    endTrain=(day_start+length)*144
    
    X_train=df.iloc[startTrain:endTrain, :-1]
    y_train=df.iloc[startTrain:endTrain, -1]
    
    endTest=(length+test_day)*144
    X_test=df.iloc[endTrain:endTest, :-1]
    y_test=df.iloc[endTrain:endTest, -1]
    
    return X_train, y_train, X_test, y_test

In [6]:
TrainFrame=create_features(TrainFrame, 3, important_features)

In [7]:
TrainFrame.fillna(0, inplace=True)
TrainFrame

Unnamed: 0,var 1 (t-3),var 1 (t-2),var 1 (t-1),var 2 (t-3),var 2 (t-2),var 2 (t-1),var 3 (t-3),var 3 (t-2),var 3 (t-1),var(t)
0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,-8.02
1,0.00,0.00,3.33,0.00,0.00,1.03,0.00,0.00,-8.02,-8.41
2,0.00,3.33,3.23,0.00,1.03,0.72,0.00,-8.02,-8.41,-8.51
3,3.33,3.23,3.21,1.03,0.72,0.19,-8.02,-8.41,-8.51,-8.31
4,3.23,3.21,3.26,0.72,0.19,0.34,-8.41,-8.51,-8.31,-8.27
...,...,...,...,...,...,...,...,...,...,...
420546,4.51,4.62,4.56,1.25,0.89,0.56,-4.09,-3.76,-3.93,-4.05
420547,4.62,4.56,4.52,0.89,0.56,0.67,-3.76,-3.93,-4.05,-3.35
420548,4.56,4.52,4.77,0.56,0.67,1.14,-3.93,-4.05,-3.35,-3.16
420549,4.52,4.77,4.84,0.67,1.14,1.08,-4.05,-3.35,-3.16,-4.23


In [8]:
scaler = MinMaxScaler(feature_range=(0, 1))
TrainFrame = scaler.fit_transform(TrainFrame)
TrainFrame=pd.DataFrame(TrainFrame)

In [9]:
X_train, y_train, X_test, y_test=split_data(TrainFrame, 0, 30, 1)

In [10]:
X_train = torch.from_numpy(X_train.to_numpy()).type(torch.Tensor)
X_test = torch.from_numpy(X_test.to_numpy()).type(torch.Tensor)
y_train = torch.from_numpy(y_train.to_numpy()).type(torch.Tensor).view(-1)
y_test = torch.from_numpy(y_test.to_numpy()).type(torch.Tensor).view(-1)

In [11]:
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)

train_dl = DataLoader(train_ds, 16, sampler = RandomSampler(train_ds))
test_dl = DataLoader(test_ds, 16, sampler = SequentialSampler(test_ds))

In [14]:
device=torch.device('cuda')
torch.cuda.set_device(0)

In [15]:
from torch.nn import functional as F
input_size = 9    
hidden_size = 100
num_layer = 2
output_dim = 1


lr = 0.003
n_epochs = 25
iterations_per_epoch = len(train_dl)
best_error = 1000000
patience, trials = 100, 0

model = LSTM(input_size, hidden_size, num_layer, output_dim, device)
model = model.to(device)

loss_fn = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=lr)


print('Start model training')

for epoch in range(1, n_epochs + 1):
    
    for i, (x_batch, y_batch) in enumerate(train_dl):
        model.train()
        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()
        optimiser.zero_grad()
        x_batch1 = x_batch.view([1, 16,9])
        out = model(x_batch1)
        loss = loss_fn(out, y_batch)
        loss.backward()
        optimiser.step()
    
    model.eval()
    correct, total = 0, 0
    outs=[]
    y_vals=[]
    error=0
    for x_val, y_val in test_dl:
        x_val, y_val = [t.cuda() for t in (x_val, y_val)]
        x_val = x_val.view([1, 16,9])
        out = model(x_val)
        error +=loss_fn(out, y_val)

    if epoch % 2 == 0:
        print(f'Epoch: {epoch:3d}. Loss: {loss.item():.4f}. Error.: {error:2.2}')

    if error < best_error:
        trials = 0
        best_error = error
        torch.save(model.state_dict(), 'best.pth')
        print(f'Epoch {epoch} best model saved with Error: {best_error:2.2}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break

Start model training
Epoch 1 best model saved with Error: 0.025
Epoch:   2. Loss: 0.0070. Error.: 0.0011
Epoch 2 best model saved with Error: 0.0011
Epoch:   4. Loss: 0.0108. Error.: 0.0012
Epoch:   6. Loss: 0.0081. Error.: 0.00038
Epoch 6 best model saved with Error: 0.00038
Epoch:   8. Loss: 0.0060. Error.: 0.00013
Epoch 8 best model saved with Error: 0.00013
Epoch:  10. Loss: 0.0032. Error.: 0.0053
Epoch 11 best model saved with Error: 6.8e-05
Epoch:  12. Loss: 0.0107. Error.: 0.00018
Epoch:  14. Loss: 0.0110. Error.: 0.0015
Epoch:  16. Loss: 0.0063. Error.: 0.00019
Epoch:  18. Loss: 0.0094. Error.: 0.0042
Epoch:  20. Loss: 0.0098. Error.: 7.6e-05
Epoch:  22. Loss: 0.0069. Error.: 0.00068
Epoch:  24. Loss: 0.0117. Error.: 7.1e-05


In [16]:
import decimal
decimal.Decimal('6.8e-05')

Decimal('0.000068')