In [None]:
# Adapted from Robert Guthrie https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html
# And: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/
import sklearn
from sklearn.linear_model import LinearRegression
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import json
import zoib

np.random.seed(55)
torch.manual_seed(25)

In [None]:
def split_train_test_val(df):
    ind_year = np.where(np.array(df.index.names)=='year')[0][0]
    train_df = df.loc[df.index.get_level_values(ind_year)<=2010]
    val_df = df.loc[(df.index.get_level_values(ind_year)>2010) & (df.index.get_level_values(ind_year)<=2014)]
    test_df = df.loc[df.index.get_level_values(ind_year)>2014]
    return train_df, val_df, test_df

def split_by_playa(x, y, seq_length):
    seq_starts = np.arange(0, x.shape[0], seq_length)
    x_arr = np.array([np.array(x[i:(i+seq_length)]) for i in seq_starts])
    ids_arr = np.array([np.repeat(j, seq_length) for j in range(seq_starts.shape[0])])
    y_arr = np.array([np.array(y[i:(i+seq_length)]) for i in seq_starts])
    return x_arr, ids_arr, y_arr

# Load data

In [None]:
# Set the number of playas from 1 to 8
num_playas = 8

In [None]:
traj = pd.read_csv('./prepped_8.csv')

In [None]:
# Read csv and set to appropriate number of playas
traj = pd.read_csv('./prepped_8.csv')
traj = traj.loc[traj['new_id'].isin(range(num_playas))]

# Set indices
traj.set_index(['year','month','id'], inplace=True)

# Drop new_id (used for embedding, but we're not doing that)
traj.drop(columns=['new_id'], inplace=True)

# Here's a chance to drop everything else, if you'd like
# traj = traj[['precip', 'temp', 'vpd', 'acres', 'new_id', 'inundation']]

# Drop everything that's all zeros
traj.drop(columns=traj.columns[(traj.sum(axis=0)==0)], inplace=True)

# Plot inundation
traj['inundation'].plot()

# Prep data

In [None]:
# Params to set
hidden_dim = 64
embedding_dim = 4
num_layers=1
learning_rate = 0.05
num_epochs = 300
weight_decay = 0
lr_gamma = 0.25
lr_decay_step_size = 75 # Set this high if not needed
regularization_weight = 0
batch_size = 8

early_stopping=100 # Set high if not needed

In [None]:
scaler = StandardScaler()
train, val, test = split_train_test_val(traj)
train_X, train_y = train.values[:, :-1], train.values[:, -1]
val_X, val_y = val.values[:, :-1], val.values[:, -1]
test_X, test_y = test.values[:, :-1],  test.values[:, -1]

# Run scaler
train_X[:,:] = scaler.fit_transform(train_X[:,:])
val_X[:,:] = scaler.transform(val_X[:,:])
test_X[:,:] = scaler.transform(test_X[:,:])

lstm_input_size = train_X.shape[1]

In [None]:
# Split into arrays by id
train_X_array, train_ids, train_y_array = split_by_playa(train_X, train_y, seq_length=int(train.shape[0]/num_playas))
val_X_array, val_ids, val_y_array = split_by_playa(val_X, val_y, seq_length=int(val.shape[0]/num_playas))
test_X_array, test_ids, test_y_array = split_by_playa(test_X, test_y, seq_length=int(test.shape[0]/num_playas))


In [None]:
train_val_ds = torch.utils.data.TensorDataset(
    torch.Tensor(train_X_array), torch.Tensor(train_ids).long(), torch.Tensor(train_y_array),
    torch.Tensor(val_X_array), torch.Tensor(val_ids).long(), torch.Tensor(val_y_array))
train_val_loader = torch.utils.data.DataLoader(train_val_ds, batch_size=batch_size, shuffle=False, drop_last=False)


In [None]:
# Here we define our model as a class
class LSTM(nn.Module):

    def __init__(self, input_dim, hidden_dim, embedding_dim, num_playas, output_dim=1,
                    num_layers=1):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.num_playas = num_playas
        
        # Define embedding layer
        self.embedding = nn.Embedding(self.num_playas, self.embedding_dim)
        
        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim + self.embedding_dim, self.hidden_dim, self.num_layers, batch_first=True)

        # Define activations for output
        self.linear = nn.Linear(self.hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
        self.exp = torch.exp
    

    def init_hidden(self, batch_size):
        self.h = torch.zeros(self.num_layers, batch_size, self.hidden_dim)
        self.c = torch.zeros(self.num_layers, batch_size, self.hidden_dim)
        return

    def forward(self, input, playa_ids):
        # Forward pass through LSTM layer
        # shape of input: [batch_size, timesteps, input_dims]
        # shape of lstm_out: [batch_size, timesteps, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        # Shape of y_pred: [batch_size, timesteps, 4]
        # Run ids through embedding layer        
        # Concat and run through LSTM
        
        
        # Check that hidden layers have expected shape
        assert self.h.shape == torch.Size([self.num_layers, input.size(0), self.hidden_dim])
        assert self.c.shape == torch.Size([self.num_layers, input.size(0), self.hidden_dim])

        # Run ids through embedding
        self.emb_layer = self.embedding(playa_ids)

        # Concat embedding and inputs and run through LSTM
        lstm_out, (self.h, self.c) = self.lstm(torch.cat((input, self.emb_layer), 2), (self.h, self.c))
        
        # Assert that shapes are still as expected
        assert self.h.shape == torch.Size([self.num_layers, input.size(0), self.hidden_dim])
        assert self.c.shape == torch.Size([self.num_layers, input.size(0), self.hidden_dim])
        assert lstm_out.shape == torch.Size([input.size(0), input.size(1), self.hidden_dim]) # batch, seq_len, hidden

        
        # Run activation and get outputs
        lin_act = self.linear(lstm_out)
        y_pred = torch.cat((self.sigmoid(lin_act[:,:,0:2]),torch.exp(lin_act[:,:,2:4])), 2)

        # Check that outputs are expected shape [batch_size, seq_len, 1]
        assert lin_act.shape == torch.Size([input.size(0), input.size(1), 4])
        assert lin_act.shape == y_pred.shape

        return y_pred


model = LSTM(input_dim = lstm_input_size,
             hidden_dim=hidden_dim,
             embedding_dim=embedding_dim,
             num_playas=num_playas,
             output_dim=4,
             num_layers=num_layers)

In [None]:
   
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.StepLR(optimiser, step_size=lr_decay_step_size, gamma=lr_gamma)
#####################---------------------------------------------------------------------------
# Train model
#####################

loss_history = []
val_loss_history = []

best_loss = 1000
train_y_ordered = np.array([])
val_y_ordered = np.array([])
for t in range(num_epochs):
    epoch_loss = 0
    val_epoch_loss = 0
    total_items = 0
    val_total_items = 0
    # List to store all predictions
    all_train_pred = []
    all_val_pred = []
    for x_batch, ids_batch, y_batch, val_x_batch, val_ids_batch, val_y_batch in train_val_loader: 
        
        # Get groundtruth in shuffle order
        if t == 0:
                train_y_ordered = np.concatenate([train_y_ordered, y_batch.view(-1).detach().numpy()])
                val_y_ordered = np.concatenate([val_y_ordered, val_y_batch.view(-1).detach().numpy()])
                
                
        # Clear stored gradient
        optimiser.zero_grad()
        
        # Init hidden state
        model.init_hidden(batch_size=x_batch.size(0))
        
        # Training: Predict and calc loss
        train_pred = model(x_batch, ids_batch)
        
        loss = zoib.zoib_loss(
            train_pred,
            y_batch 
        )
        
        all_train_pred.append(train_pred.view(-1, 4))
        
        # Tracking mean loss across batches
        epoch_loss += train_pred.shape[0]*loss.item()
        total_items += train_pred.shape[0]
        
        
        # Validation: predict and calc loss
        val_pred = model(val_x_batch, val_ids_batch)
        
        val_loss = zoib.zoib_loss(
            val_pred,
            val_y_batch
        ).float()
        all_val_pred.append(val_pred.view(-1, 4))

        # Tracking mean loss across batches
        val_epoch_loss += val_pred.shape[0]*val_loss.item()
        val_total_items += val_pred.shape[0]


        # Backward pass
        loss.backward()
        
        # Update parameters
        optimiser.step()

    # LR decay
    scheduler.step()
    
    epoch_loss = epoch_loss/total_items
    val_epoch_loss = val_epoch_loss/val_total_items
    loss_history.append(epoch_loss)
    val_loss_history.append(val_epoch_loss)
    if t%10==0:
        print("Epoch ", t, "Train Loss: ", epoch_loss, "Val Loss:", val_epoch_loss, "LR: ", optimiser.param_groups[0]["lr"])
        
    if np.isnan(epoch_loss):
        break
        
    # Early stopping
    if epoch_loss < best_loss:
        i = 0
        # Save best loss, predictoins, and hidden state
        best_loss = epoch_loss
        best_train_pred = torch.cat(all_train_pred, dim=0)
        best_val_pred = torch.cat(all_val_pred, dim=0)
    if (i > early_stopping):
        break


# View results

In [None]:
print(best_loss)
plt.plot(loss_history)
plt.show()

In [None]:
def zoib_expected(t):
    # E = q*(1-p) + (1-p-q)*(conc1/(conc1+conc0))
    # Or # = prob_1_given_not0*(1-prob_0) + (1 - prob_bernoulli)*(expect_val_beta)
    t = t.detach().numpy()
    prob_1 = t[:,1]*(1-t[:,0])
    prob_beta = (1 - t[:,0])*(1 - t[:,1])
    beta_expected = t[:,2]/(t[:,3]+t[:,2])
    return prob_1 + prob_beta*beta_expected

In [None]:
plt.scatter(train_y_ordered, zoib_expected(best_train_pred))
plt.plot([0,1],[0,1])
plt.xlabel('True')
plt.ylabel('Pred')
plt.show()

In [None]:
pd.DataFrame({'Pred':zoib_expected(best_train_pred), 'True':train_y_ordered}).plot(xlim=[300,450])

In [None]:
plt.scatter(val_y_ordered, zoib_expected(best_val_pred))
plt.xlabel('True')
plt.ylabel('Pred')
plt.show()

In [None]:
pd.DataFrame({'Pred':zoib_expected(best_val_pred), 'True':val_y.flatten()}).plot()

In [None]:
param_df = pd.DataFrame(best_train_pred.detach().numpy())
param_df.columns = ['p','q','conc1','conc0']
pd.plotting.scatter_matrix(param_df)
plt.show()