<a href="https://colab.research.google.com/github/shouvikcirca/WaterPrediction/blob/main/RNN_Succesful.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
! unzip gdrive/MyDrive/Water_Prediction/WaterQuality_1.zip
! unzip "Water Quality"/DataSet/WaterQualityData.zip

In [None]:
import pandas as pd

In [None]:
def getData():
  devicemetadata = pd.read_csv('WaterQualityData/devicemetadata-tng.csv')
  flowmeterdata = pd.read_csv('WaterQualityData/flowmeterdata-tng.csv')
  sensordata = pd.read_csv('WaterQualityData/sensordata-tng.csv')
  return devicemetadata, flowmeterdata, sensordata

In [None]:
devicemetadata, flowmeterdata, sensordata = getData()

In [None]:
# Doing a Full Outer Join on devicemetadata and flowmeterdata on the 'deviceid' column
devicemetadata_flowmeterdata_merged = pd.merge(devicemetadata, flowmeterdata,  on='deviceid', how='outer')

In [None]:
newFrame = devicemetadata_flowmeterdata_merged.copy()

In [None]:
newFrame.duplicated().sum() # No duplicates

0

In [None]:
datetimeFrame = newFrame.copy()[newFrame['villagename'] == 'Agling'].sort_values('datetime')[['datetime','quantity']]

In [None]:
datetimeFrame.head(5)

In [None]:
timeseriesPlotterFrame = datetimeFrame.set_index(['datetime'])

In [None]:
timeseriesPlotterFrame = timeseriesPlotterFrame.dropna()

In [None]:
timeseriesPlotterFrame

In [None]:
timeseriesPlotterFrame.plot()

In [None]:
trainTimeSeries = timeseriesPlotterFrame.copy().iloc[:round(0.8*timeseriesPlotterFrame.shape[0]),:] # Train set formed by first 80% of the original frame
testTimeSeries = timeseriesPlotterFrame.copy().iloc[trainTimeSeries.shape[0]:,:] # Train set formed by the last 20% of the original frame
valTimeSeries = trainTimeSeries.copy().iloc[round(0.8*trainTimeSeries.shape[0]):,:] # Val Set formed by the last 20% of the train frame
trainTimeSeries = trainTimeSeries.copy().iloc[:round(0.8*trainTimeSeries.shape[0]),:] # Train frame left with first 80% of its previous version

In [None]:
assert trainTimeSeries.shape[0] + testTimeSeries.shape[0] + valTimeSeries.shape[0] == timeseriesPlotterFrame.shape[0] # Verifying the size

In [None]:
assert ( (pd.concat([trainTimeSeries, valTimeSeries, testTimeSeries]) == timeseriesPlotterFrame).sum() == timeseriesPlotterFrame.shape[0] )[0]# Verifying that split has happened properly by comparing the combined with the original frame

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Converting to torch tensors
torch_train = torch.Tensor(trainTimeSeries.to_numpy())
torch_test = torch.Tensor(testTimeSeries.to_numpy())
torch_val = torch.Tensor(valTimeSeries.to_numpy())

In [None]:
# Function to get features
def getFeaturesAndLabels(inp, window_size = 5):

  featureTensor = torch.Tensor([])
  labelTensor = torch.Tensor([])

  for i in range(inp.shape[0] - window_size): # -1 because the last element will be a target
    featureTensor = torch.cat([featureTensor, inp[i:i+window_size].t()]) 

  for i in range(window_size, inp.shape[0]):
    labelTensor = torch.cat([labelTensor, inp[i].unsqueeze(0)])

  return featureTensor, labelTensor

In [None]:
window_size = 4

In [None]:
train_features, trainLabels = getFeaturesAndLabels(torch_train, window_size)
train_features.shape, trainLabels.shape

(torch.Size([21, 4]), torch.Size([21, 1]))

In [None]:
val_features, valLabels = getFeaturesAndLabels(torch_val, window_size)
val_features.shape, valLabels.shape

(torch.Size([2, 4]), torch.Size([2, 1]))

In [None]:
test_features, testLabels = getFeaturesAndLabels(torch_test, window_size)
test_features.shape, testLabels.shape

(torch.Size([4, 4]), torch.Size([4, 1]))

In [None]:
train = TensorDataset(train_features, trainLabels)
val = TensorDataset(val_features, valLabels)
test = TensorDataset(test_features, testLabels)

In [None]:
batch_size = 5

In [None]:
train_loader = DataLoader(train, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)
test_loader_one = DataLoader(test, batch_size=1, shuffle=False)

In [None]:
class RNNModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(RNNModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # RNN layers
        self.rnn = torch.nn.RNN(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )
        # Fully connected layer
        self.fc = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        out, h0 = self.rnn(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out

In [None]:
def get_model(model, model_params):
    models = {
        "rnn": RNNModel
        # "lstm": LSTMModel,
        # "gru": GRUModel,
    }
    return models.get(model.lower())(**model_params)

In [None]:
class Optimization:
    def __init__(self, model, loss_fn, optimizer):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.train_losses = []
        self.val_losses = []
    
    def train_step(self, x, y):
        # Sets model to train mode
        self.model.train()

        # Makes predictions
        yhat = self.model(x)

        # Computes loss
        loss = self.loss_fn(y, yhat)

        # Computes gradients
        loss.backward()

        # Updates parameters and zeroes gradients
        self.optimizer.step()
        self.optimizer.zero_grad()

        # Returns the loss
        return loss.item()

    def train(self, train_loader, val_loader, batch_size=64, n_epochs=50, n_features=1):
        # model_path = f'models/{self.model}_{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'

        for epoch in range(1, n_epochs + 1):
            batch_losses = []
            for x_batch, y_batch in train_loader:
                x_batch = x_batch.view([min(batch_size, x_batch.shape[0]), -1, n_features])
                y_batch = y_batch
                loss = self.train_step(x_batch, y_batch)
                batch_losses.append(loss)
            training_loss = np.mean(batch_losses)
            self.train_losses.append(training_loss)

            with torch.no_grad():
                batch_val_losses = []
                for x_val, y_val in val_loader:
                    x_val = x_val.view([min(batch_size, x_val.shape[0]), -1, n_features])
                    y_val = y_val
                    self.model.eval()
                    yhat = self.model(x_val)
                    val_loss = self.loss_fn(y_val, yhat).item()
                    batch_val_losses.append(val_loss)
                validation_loss = np.mean(batch_val_losses)
                self.val_losses.append(validation_loss)

            if (epoch <= 10) | (epoch % 50 == 0):
                print(
                    f"[{epoch}/{n_epochs}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}"
                )


    def evaluate(self, test_loader, batch_size=1, n_features=1):
        with torch.no_grad():
            predictions = []
            values = []
            for x_test, y_test in test_loader:
                x_test = x_test.view([min(batch_size, x_test.shape[0]), -1, n_features])
                y_test = y_test
                self.model.eval()
                yhat = self.model(x_test)
                predictions.append(yhat.detach().numpy())
                values.append(y_test.detach().numpy())

        return predictions, values

    def get_Model(self):
     return self.model


In [None]:
import torch.optim as optim
import numpy as np

input_dim = window_size
print(input_dim)
output_dim = 1
hidden_dim = 64
layer_dim = 3
dropout = 0.2
n_epochs = 300
learning_rate = 1e-3
weight_decay = 1e-6

model_params = {'input_dim': input_dim,
                'hidden_dim' : hidden_dim,
                'layer_dim' : layer_dim,
                'output_dim' : output_dim,
                'dropout_prob' : dropout}

model = get_model('rnn', model_params)

loss_fn = torch.nn.MSELoss(reduction="mean")
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)
opt.train(train_loader, val_loader, batch_size=batch_size, n_epochs=n_epochs, n_features=input_dim)
# opt.plot_losses()

predictions, values = opt.evaluate(test_loader_one, batch_size=1, n_features=input_dim)

5
[1/300] Training loss: 211.6105	 Validation loss: 336.1044
[2/300] Training loss: 201.9517	 Validation loss: 313.9707
[3/300] Training loss: 191.8343	 Validation loss: 291.7420
[4/300] Training loss: 179.9974	 Validation loss: 270.7157
[5/300] Training loss: 169.3514	 Validation loss: 251.0869
[6/300] Training loss: 160.5145	 Validation loss: 233.1469
[7/300] Training loss: 149.5741	 Validation loss: 217.2370
[8/300] Training loss: 141.4016	 Validation loss: 203.3471
[9/300] Training loss: 134.6356	 Validation loss: 191.3038
[10/300] Training loss: 127.6016	 Validation loss: 180.6290
[50/300] Training loss: 38.9992	 Validation loss: 28.3063
[100/300] Training loss: 17.3936	 Validation loss: 1.4026
[150/300] Training loss: 15.7879	 Validation loss: 0.0001
[200/300] Training loss: 22.6197	 Validation loss: 0.0000
[250/300] Training loss: 20.2719	 Validation loss: 0.0149
[300/300] Training loss: 19.3204	 Validation loss: 0.0201


In [None]:
trained_rnn_model = opt.get_Model()
with torch.no_grad():
  rnn_preds = trained_rnn_model(test_features.view([min(batch_size, test_features.shape[0]), -1, window_size]))


In [None]:
rnn_preds.shape, testLabels.shape
torch.nn.L1Loss()(rnn_preds.squeeze(1), testLabels.squeeze(1))

tensor(10.5038)

In [None]:
test_features.shape, train_features.shape, trainLabels.shape

(torch.Size([3, 5]), torch.Size([20, 5]), torch.Size([20, 1]))

##Averaging ( Baseline )

In [None]:
# Mean Absolute Error
torch.nn.L1Loss()(test_features.mean(axis = 1), testLabels.squeeze(1))

tensor(3.8195)

## Linear Regression

In [None]:
class LinearRegressionModel(torch.nn.Module):
    def __init__(self, input_dim):
      super(LinearRegressionModel, self).__init__()
      self.Linear = torch.nn.Sequential(
          torch.nn.Linear(input_dim, 1)
      )

    def forward(self, x):
        out = self.Linear(x)
        return out

In [None]:
def train(x_train, y_train, time_series_window_size = 5, learning_rate = 1e-3, weight_decay = 1e-6, bs = 64, epochs = 10):
    model = LinearRegressionModel(time_series_window_size)

    criterion = torch.nn.MSELoss(reduction="mean")
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
    model.train()
    for epoch in range(epochs):
        print('epoch: {}'.format(epoch))
        for i in range(0, max(1, int(round(x_train.shape[0]/bs)))):
            tempX = x_train[i*bs:min(((i+1)*bs), x_train.shape[0])]
            tempY = y_train[i*bs:min(((i+1)*bs), x_train.shape[0])]
            tr_loss = 0
            ptx_train, pty_train = torch.autograd.Variable(torch.Tensor(tempX)), torch.autograd.Variable(torch.Tensor(tempY))
            optimizer.zero_grad()
            output_train = model(ptx_train)
            loss_train = criterion(output_train, pty_train)
            loss_train.backward()
            optimizer.step()
            tr_loss = loss_train.item()
    return model

In [None]:
linreg_learning_rate = 1e-3
linreg_weight_decay = 1e-6

In [None]:
train_features.shape

torch.Size([21, 4])

In [None]:
trainedModel = train(train_features, trainLabels, window_size,  linreg_learning_rate, linreg_weight_decay)

epoch: 0
epoch: 1
epoch: 2
epoch: 3
epoch: 4
epoch: 5
epoch: 6
epoch: 7
epoch: 8
epoch: 9


In [None]:
with torch.no_grad():
  linreg_preds = trainedModel(test_features)


In [None]:
torch.nn.L1Loss()(linreg_preds, testLabels)

tensor(38.5045)