In [1]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from torch.utils.data import TensorDataset, DataLoader

In [2]:
# load the data
_dir = os.path.abspath('')
data_path = os.path.join(_dir, "../data/cleaned_data.csv")
df = pd.read_csv(data_path)
df = df.drop(df.columns[0], axis=1)
df = df.drop(df.columns[0], axis=1)
print(df)

      val   val.1  val.2  val.3  val.4  val.5        mu
0   69.05   43.50 -23.12   1.00    1.1   -3.7  0.122147
1   60.75   34.33 -15.00   1.67    1.1   -5.8  0.050202
2   55.60   44.00  -8.05   2.85    1.1   -8.5  0.058697
3   63.50   53.75  -1.43   3.95    1.2   -9.1  0.182528
4   62.33   82.57   6.03   6.33    1.5   76.4  0.234987
..    ...     ...    ...    ...    ...    ...       ...
79  61.93  109.15  16.28  13.98    1.8   86.4  0.148817
80  74.20  259.08   9.73  10.52    1.5   61.0  0.175548
81  74.93   77.93   2.95   6.53    1.2    0.7  0.172814
82  77.00   57.15  -3.83   4.57    1.1  -18.1  0.169309
83  73.93   62.28 -11.73   2.62    1.1   -7.5  0.163561

[84 rows x 7 columns]


In [3]:
# preprocess the data
# split it into train, validation, test sets
train_df = df.iloc[:36]
print(train_df)
valid_df = df.iloc[36:60]
print(valid_df)
test_df = df.iloc[60:]
print(test_df)

      val   val.1  val.2  val.3  val.4  val.5        mu
0   69.05   43.50 -23.12   1.00    1.1   -3.7  0.122147
1   60.75   34.33 -15.00   1.67    1.1   -5.8  0.050202
2   55.60   44.00  -8.05   2.85    1.1   -8.5  0.058697
3   63.50   53.75  -1.43   3.95    1.2   -9.1  0.182528
4   62.33   82.57   6.03   6.33    1.5   76.4  0.234987
5   64.72  127.23  12.05  10.05    2.1  116.3  0.188156
6   62.20   67.75  15.95  13.98    2.2  117.1  0.191477
7   68.15   75.32  13.53  12.73    2.2   86.2  0.138820
8   62.55  103.60  11.23  10.55    1.8   57.9  0.178267
9   68.75   56.33   3.75   6.93    1.4  -13.1  0.200559
10  79.28   66.75  -3.93   4.20    1.1  -13.0  0.192960
11  59.08   62.15 -16.88   1.77    1.1   -6.0  0.161768
12  60.00   29.35 -20.00   1.20    1.1   -6.8  0.121903
13  59.08   36.08 -14.62   1.70    1.1   -4.9  0.063480
14  63.40   47.03 -10.12   2.35    1.1   -8.9  0.054093
15  65.12   64.43   1.57   4.65    1.1   -7.6  0.156610
16  61.88   37.67   7.38   7.28    1.4   64.0  0

In [11]:
# setup train data
train_x, train_y = train_df.iloc[:, :-1].to_numpy(), train_df.iloc[:, [-1]].to_numpy() # raw numpy

# setup validation data
valid_x, valid_y = valid_df.iloc[:, :-1].to_numpy(), valid_df.iloc[:, [-1]].to_numpy()

# setup test data
test_x, test_y = test_df.iloc[:, :-1].to_numpy(), test_df.iloc[:, [-1]].to_numpy()

#print(train_x)
#print(train_y)

# convert to torch datasets
# train data
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
# validation data
valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
# test data
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

batch_size = 6 # hyperparam for batch size

# setup DataLoaders
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)


print(train_x.shape)

# setup pytorch to use cuda (gpu training) if possible
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

(36, 6)


In [12]:
# setup architecture of LSTM model
class SoilNet(nn.Module):
    def __init__(self, feature_size, output_size, hidden_dim, n_layers):
        super(SoilNet, self).__init__()
        self.output_size = output_size
        self.hidden_dim = hidden_dim
        
        self.lstm = nn.LSTM(feature_size, hidden_dim, batch_first=True) # LSTM layer
        #self.dropout = nn.Dropout(drop_prob) # dropout layer, probably not necessary with only 6 features...
        self.predict = nn.Linear(hidden_dim, output_size) # output layer
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x.view(len(x), 1, -1))
        pred = self.predict(lstm_out.view(len(x), -1))
        return pred

In [17]:
# define arguments and instantiate model
feature_size = 6 # 6 features
output_size = 1 # just output a number
hidden_dim = 128 # size of hidden state and cell state at each time step
n_layers = 2

print(device)

model = SoilNet(feature_size, output_size, hidden_dim, n_layers)
model = model.double()
model.to(device)

# hyperparams
lr=0.005
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

print(model)

cuda
SoilNet(
  (lstm): LSTM(6, 128, batch_first=True)
  (predict): Linear(in_features=128, out_features=1, bias=True)
)


In [20]:
# now start the training
epochs = 2
counter = 0
valid_loss_min = np.Inf

model.train()
for i in range(epochs):    
    for inputs, labels in train_loader:
        counter += 1
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
        #print(inputs)
        y_pred = model(inputs.cuda())
        
        loss = criterion(y_pred, labels.cuda())
        loss.backward()
        optimizer.step()
        
        # do validation here
        val_losses = []
        model.eval()
        for inp, lab in valid_loader:
            inp, lab = inp.to(device), lab.to(device)
            val_out = model(inp.cuda())
            val_loss = criterion(val_out, lab.cuda())
            val_losses.append(val_loss.item())
        
        model.train()
        
        # print stuff here
        print("Epoch: {}/{}...".format(i+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Validation Loss: {:.6f}".format(np.mean(val_losses)))

Epoch: 1/2... Step: 1... Loss: 0.003653... Validation Loss: 0.003925
Epoch: 1/2... Step: 2... Loss: 0.003605... Validation Loss: 0.003228
Epoch: 1/2... Step: 3... Loss: 0.005022... Validation Loss: 0.002572
Epoch: 1/2... Step: 4... Loss: 0.006836... Validation Loss: 0.002319
Epoch: 1/2... Step: 5... Loss: 0.001898... Validation Loss: 0.002865
Epoch: 1/2... Step: 6... Loss: 0.002595... Validation Loss: 0.004136
Epoch: 2/2... Step: 7... Loss: 0.002395... Validation Loss: 0.004514
Epoch: 2/2... Step: 8... Loss: 0.000795... Validation Loss: 0.004128
Epoch: 2/2... Step: 9... Loss: 0.004753... Validation Loss: 0.003280
Epoch: 2/2... Step: 10... Loss: 0.003354... Validation Loss: 0.002320
Epoch: 2/2... Step: 11... Loss: 0.003814... Validation Loss: 0.002187
Epoch: 2/2... Step: 12... Loss: 0.004402... Validation Loss: 0.002233


In [35]:
# now check accuracy for test set...
test_losses = []
num_correct = 0

predictions = []
actuals = []

model.eval()
for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)
    y_pred = model(inputs.cuda())
    
    test_loss = criterion(y_pred, labels.cuda())
    test_losses.append(test_loss.item())
    pred_cpu = y_pred.cpu()
    pred_cpu = pred_cpu.detach().numpy()[:, 0]
    pred_cpu = pred_cpu.tolist()
    
    lab_cpu = labels.cpu()
    lab_cpu = lab_cpu.numpy()[:, 0]
    lab_cpu = lab_cpu.tolist()
    
    predictions.extend(pred_cpu)
    actuals.extend(lab_cpu)
    

print("Test loss: ", test_losses)
print("Predictions:")
print(predictions)
print()
print("Actual values:")
print(actuals)

Test loss:  [0.0011746644763321126, 0.0024528105013722004, 0.0010338094452213488, 0.0015597434763214948]
[0.10146143505201931, 0.07371485993516688, 0.11916405107177873, 0.11979740720896144, 0.04267764307009221, 0.11928014513908854, 0.07347810599997297, 0.11519151081771702, 0.14518082763932497, 0.08736716412244107, 0.14081182071463705, 0.17988216867677226, 0.08721132081601539, 0.13364888113389806, 0.20163440014586095, 0.11486611667701606, 0.14787361112987893, 0.1291463218034714, 0.13569190645350993, 0.04948930149314386, 0.12899652856152263, 0.06008494139953598, 0.12178414341073683, 0.2339041964698658]
[0.0993261088709677, 0.103205309139785, 0.12638180901143198, 0.179852501737318, 0.0461615750169722, 0.169309059233449, 0.0881411290322581, 0.190332661290323, 0.15846438172042998, 0.16356132392473097, 0.161292534722222, 0.130342251563586, 0.0364564648729447, 0.132499652294854, 0.15219724462365603, 0.148817126269956, 0.153092361111111, 0.130387936827957, 0.17281412037037, 0.0865833333333333,