In [1]:
!pip install mlflow
import mlflow

You should consider upgrading via the '/Users/thota/sid/thesis/env/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd

In [13]:
# Reading csv file 
data = pd.read_csv('Data/Data-set.csv')

In [14]:
data.head()

Unnamed: 0,value
0,22976
1,251288
2,239840
3,87320
4,38352


In [15]:
#converting to list
data = data.value.values

In [16]:
#80% for training and 20% for testing
# data preparation for training and testing
train_index = int(len(data)*0.8)

In [17]:
train = data[:train_index].reshape(-1,1)

In [18]:
test = data[train_index:].reshape(-1,1)

In [19]:
# standardize data
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(train)

In [20]:
train = scaler.transform(train)

In [21]:
test = scaler.transform(test)

In [22]:
import numpy as np
# create data with a sliding window size 10 and step 1
def create_sliding_window_data(arr, window_size=10):
    X, y = [], []
    for i in range(len(arr)-window_size):
        # Create a window of size 10
        window = arr[i:i+window_size]
        # Append the window to the input X
        X.append(window)
        # Append the next element as the output y
        y.append(arr[i+window_size])
    
    # Convert the input and output to NumPy arrays
    X = np.array(X)
    y = np.array(y)
    
    return X.squeeze(), y.squeeze()

In [23]:
"""#Linear regression model
from sklearn.linear_model import LinearRegression

model = LinearRegression()

# Train the model on the data
model.fit(X_train, y_train)"""

'#Linear regression model\nfrom sklearn.linear_model import LinearRegression\n\nmodel = LinearRegression()\n\n# Train the model on the data\nmodel.fit(X_train, y_train)'

In [24]:
"""#testing the linear regression model
y_pred = model.predict(X_test)"""

'#testing the linear regression model\ny_pred = model.predict(X_test)'

In [25]:
def log_scalar(name, value, step):
    """Log a scalar value to both MLflow and TensorBoard"""
    mlflow.log_metric(name, value, step=step)

In [26]:
import torch
from torch.utils.data import Dataset

# to create data loaders
class Data(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, i):
        
        x = self.x[i]
        y = self.y[i]

        return x, y

In [27]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from collections import namedtuple

from torchvision.models import resnet18

# Deep Regression model 
class RegressionModel(nn.Module):
    def __init__(self, input_size = 10):
        super(RegressionModel, self).__init__()
        self.linear1 = nn.Linear(input_size, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        return x

# LSTM model
class MyLSTM(nn.Module):
    def __init__(self, hidden_size=64, input_size = 10):
        super(MyLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        # x.shape = (batch_size, sequence_length, input_size)
        output, (hidden, cell) = self.lstm(x)
        # output.shape = (batch_size, sequence_length, hidden_size)
        # hidden.shape = (1, batch_size, hidden_size)
        # cell.shape = (1, batch_size, hidden_size)
        output = self.linear(hidden.squeeze().double())
        # output.shape = (batch_size, 1)
        return output

#RNN model   
class RNN(nn.Module):
    def __init__(self, input_size = 10, hidden_size=64, output_size=1):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).double()
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

class TransformerModel(nn.Module):
    def __init__(self, input_dim=10, output_dim=1, hidden_dim=32, num_layers=2, num_heads=2, dropout=0.3):
        super(TransformerModel, self).__init__()

        self.embedding = nn.Linear(input_dim, hidden_dim)

        encoder_layer = nn.TransformerEncoderLayer(hidden_dim, num_heads, hidden_dim, dropout)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)

        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)
        x = self.encoder(x)
        x = x.permute(1, 0, 2)
        x = self.fc(x[:, -1, :])
        return x


class ResLSTM(nn.Module):
    def __init__(self, input_size=10, hidden_size=64, num_layers=2, dropout=0.3):
        super(ResLSTM, self).__init__()

        # ResNet block as feature extractor
        self.resnet = resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

        # LSTM to process the extracted features
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

        # Fully connected layer for prediction
        self.fc = nn.Linear(hidden_size, 1)
        self.compressor = nn.Linear(512, 10)

    def forward(self, x):
        # Reshape input data to match ResNet input
        x = x.unsqueeze(1)
        x = self.resnet(x)
        # Pass ResNet features through LSTM
        x = self.compressor(x)
        x = x.view(x.size(0), 1, -1)
        x, _ = self.lstm(x)

        # Get the last output from LSTM and pass it through a fully connected layer for prediction
        x = self.fc(x[:, -1, :])

        return x


#Define the model, loss function, and optimizer
#model = RNN(input_size=params.window_size, hidden_size=10, output_size=1).double()


In [28]:
!pip install livelossplot
import livelossplot

You should consider upgrading via the '/Users/thota/sid/thesis/env/bin/python3 -m pip install --upgrade pip' command.[0m


In [31]:
from livelossplot import PlotLosses
from sklearn.metrics import mean_squared_error

def trainer(params):
  X_train, y_train = create_sliding_window_data(train, window_size = params['window_size'])
  X_test, y_test = create_sliding_window_data(test, window_size = params['window_size'])

  X_train = torch.from_numpy(X_train).double()
  y_train = torch.from_numpy(y_train).double()

  train_dataset = Data(X_train, y_train)
  train_loader = DataLoader(train_dataset, batch_size = params['batch_size'], shuffle=True)

  test_dataset = Data(X_test, y_test)
  test_loader = DataLoader(test_dataset, batch_size = params['batch_size'], shuffle=True)

  if params["model"] =="resnet":
    model = ResLSTM().double()
  elif params["model"] == "transformer":
    model = TransformerModel().double()
  elif params["model"] == "rnn":
    model = RNN().double()
  elif params["model"] == "lstm":
    model = MyLSTM().double()
  else:
    model = RegressionModel().double()

  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr= params['lr'])

  with mlflow.start_run() as run:  
    # Log our parameters into mlflow
    for key, value in params.items():
        mlflow.log_param(key, value)

    print('Deep Learning models')
    print('====================')

    print('Training phase')

    liveloss = PlotLosses()
    for epoch in range(params['epochs']):

        print("Active Run ID: %s, Epoch: %s \n" % (run.info.run_uuid, epoch+1))
        total_loss = 0
        logs = {}
        i = 0
        for X_train, y_train in train_loader:
          i += 1
          optimizer.zero_grad()

          outputs = model(X_train.view(X_train.shape[0], 1, X_train.shape[-1]))
          #outputs = model(X_train)
          loss = criterion(outputs.squeeze(), y_train)
          total_loss += loss.item()
          loss.backward()
          optimizer.step()
        print('Epoch {}, Loss: {:.4f}'.format(epoch+1, total_loss/i))
        logs['MSE loss'] = total_loss/i 
        liveloss.update(logs)
        liveloss.send()
        log_scalar('train_loss', total_loss/i, epoch)

    torch.save(model.state_dict(), f"{params['model']}_{params['epochs']}_{params['batch_size']}_{params['window_size']}.pth")
    
    print('Test Phase')

    with torch.no_grad():
        total_loss = 0
        i = 0
        for X_test, y_test in test_loader:
          i += 1
          outputs = model(X_test.view(X_test.shape[0], 1, X_test.shape[-1]))
          #outputs = model(X_test)
          
          total_loss += criterion(outputs, y_test)
        print('Loss: {:.4f}'.format(total_loss.item()/i))
        log_scalar('test_loss', total_loss/i, step=1)

In [32]:
params = {'lr' : 0.001, 'batch_size': 32, 'epochs':100, 'window_size':10, 'model': "transformer" }
trainer(params)

RestException: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}