Here we examine MLP for processing PL based temperature data

In [46]:
#import modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import itertools


In [47]:
#import data matrix
fpath = 'C:\processed_data_sensor_2\data_matricies\svd_data_matrix\cycle1_week1\demeaned_data_matrix_first_cycle_week1'
df = pd.read_csv(fpath, sep=',', header=0)

In [48]:
df = df.rename(columns={'Unnamed: 0': 'temperature'});
df.head(2)

Unnamed: 0,temperature,613.1581342695605,613.2882643171293,613.4183935430451,613.5485219466715,613.6786495273719,613.8087762845105,613.9389022174508,614.0690273255567,614.1991516081921,...,785.257345426425,785.3858247710828,785.5143024593286,785.6427784905485,785.7712528641274,785.8997255794505,786.0281966359041,786.1566660328739,786.2851337697449,786.413599845903
0,-30.0,-98.646169,-53.342284,-118.585942,-211.861645,-145.965698,-187.985642,-186.366585,-314.417365,-294.625502,...,-1469.783244,-1359.474947,-1294.192464,-1471.931681,-1309.164901,-1320.80611,-1364.259771,-1519.678929,-1440.182644,-1145.276082
1,-30.0,-147.646169,-148.342284,-195.585942,-234.861645,-238.965698,-231.985642,-272.366585,-134.417365,-248.625502,...,-1344.783244,-1442.474947,-1320.192464,-1424.931681,-1411.164901,-1250.80611,-1490.259771,-1271.678929,-1652.182644,-1286.276082


In [49]:
col_list = df.columns
col_list[0]

# slice the ddf into X and y
X = df[col_list[1:]]
y = df[col_list[0]]; 

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

In [50]:
# implement custon dataset class

class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)  # Number of samples

    def __getitem__(self, index):
        # Return a sample from the dataset
        sample = self.X[index]
        label = self.y[index]
        return sample, label


In [51]:
# convert data into tensors

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values.astype('float32'))
y_train_tensor = torch.tensor(y_train.values.astype('float32'))

# Create the dataset
train_dataset = MyDataset(X_train_tensor, y_train_tensor)

# Convert to PyTorch tensors
X_test_tensor = torch.tensor(X_test.values.astype('float32'))
y_test_tensor = torch.tensor(y_test.values.astype('float32'))

# Create the dataset
test_dataset = MyDataset(X_test_tensor, y_test_tensor)

In [52]:
train_dataloader = DataLoader(train_dataset, batch_size=60000, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=60000, shuffle=False)


In [53]:
# define layer sizes

input_layer = X.shape[1]
h1_layer = 1000
h2_layer = 500
h3_layer = 250
h4_layer = 150
h5_layer = 75
h6_layer = 30


output_layer = 1

In [54]:
# define a neural network 

class PL_nn_regressor(nn.Module):
    def __init__(self, input_layer, h1_layer, h2_layer,h3_layer, h4_layer, h5_layer, h6_layer, output_layer):
        super(PL_nn_regressor, self).__init__()
        self.flatten = nn.Flatten(start_dim=1)
        self.fc1 = nn.Linear(input_layer, h1_layer)
        self.fc2 = nn.Linear(h1_layer, h2_layer)
        self.fc3 = nn.Linear(h2_layer, h3_layer)
        self.fc4 = nn.Linear(h3_layer, h4_layer)
        self.fc5 = nn.Linear(h4_layer, h5_layer)
        self.fc6 = nn.Linear(h5_layer, h6_layer)
        self.fc7 = nn.Linear(h6_layer, output_layer)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = self.fc7(x)
        return x
    
    def feature_extractor(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        return x

    
        

In [55]:
# instatiate the model, optimizer and define the loss function
model = PL_nn_regressor(input_layer, h1_layer, h2_layer, h3_layer, h4_layer, h5_layer, h6_layer, output_layer)
optimizer = optim.Adam(model.parameters(), lr = 0.001, betas = (0.91, 0.99))
criterion = nn.MSELoss()

In [56]:
X.shape[1]


1340

In [57]:
y.shape

(342952,)

In [None]:
# Initialize variables
num_epochs = 30  # Total number of epochs
early_stopping_threshold = 0.1  # Threshold for early stopping based on loss difference
epoch_losses = []  # To store loss values for the last 3 epochs

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    train_loss = 0.0  # Initialize training loss to 0
    
    # Train the model (assuming you have a DataLoader for the training set)
    for batch_x, batch_y in train_dataloader:
        optimizer.zero_grad()  # Zero the parameter gradients
        
        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y.unsqueeze(1))  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize model parameters
        
        train_loss += loss.item()  # Accumulate loss

    avg_train_loss = train_loss / len(train_dataloader)  # Average loss for the epoch

    # Store the loss for the current epoch
    epoch_losses.append(avg_train_loss)

    # If we have at least 3 losses stored, check early stopping condition
    if len(epoch_losses) > 3:
        # Check if the difference between the max and min loss of the last 3 epochs is <= 0.1
        last_three_losses = epoch_losses[-3:]
        max_loss = max(last_three_losses)
        min_loss = min(last_three_losses)

        if max_loss - min_loss <= early_stopping_threshold:
            print(f"Early stopping triggered at epoch {epoch+1}.")
            break  # Stop training if early stopping condition is met

    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():  # Disable gradient computation for validation
        for batch_x, batch_y in test_dataloader:  # Use your validation DataLoader
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y.unsqueeze(1))  # Compute validation loss
            val_loss += loss.item()

    val_loss /= len(test_dataloader)  # Average validation loss

    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}')






'''
#training loop
for epoch in range(n_epochs):
    #print(epoch)
    for batch_x, batch_y in train_dataloader:
        #print(batch_x.shape)
        model.train() # set the model to training
        optimizer.zero_grad() # clear out the gradients
        #### make a forward pass
        output =  model(batch_x)
        loss = criterion(batch_y.unsqueeze(1), output) #compute loss
        #### make a backward pass
        loss.backward()  # conpute gradients
        optimizer.step() # update weights

    
    if (epoch + 1) % 2 == 0:  # Print every 100 epochs
        print(f'Epoch [{epoch + 1}/{n_epochs}], Loss: {np.sqrt(loss.item()):.4f}')

    # Evaluation of training
    model_.eval()
    with torch.no_grad():
        val_outputs = model_(X_tensor)
        val_loss = criterion(val_outputs, y_tensor).item()
        print('',val_loss)
    
    return val_loss
    '''


Epoch 1/30, Training Loss: 906.2576
Epoch 1/30, Validation Loss: 143.0416
Epoch 2/30, Training Loss: 130.1379
Epoch 2/30, Validation Loss: 312.1274
Epoch 3/30, Training Loss: 166.7900
Epoch 3/30, Validation Loss: 132.7069


In [None]:
### testing loop
#turn off gradients and put model in evaluation mode

'''
test_loss = []

model.eval() 
with torch.no_grad():
    for batch_x, batch_y in test_dataloader:
        y_pred = model(batch_x)
        pred_loss = criterion(batch_y.unsqueeze(1), y_pred)
        test_loss.append(np.sqrt(pred_loss.item()))
        #print(f'prediction loss = {pred_loss.item():.4f}')
        '''
    


In [None]:
y_pred = model(X_train_tensor)

In [None]:
y_train_pred = y_pred.detach().numpy().squeeze()

In [None]:
#y_train.shape

In [None]:
plt.plot(y_train, y_train_pred,'x');

In [None]:
a = y_pred.detach().numpy()


In [None]:
np.sqrt(np.mean(a)) - np.sqrt(np.mean(y_train))

In [None]:

model.eval()
y_test_pred = model(X_test_tensor)

In [None]:
plt.plot(y_test, y_test_pred.detach().numpy(), 'x')

In [None]:
# Save the model's state dictionary
#torch.save(model.state_dict(), 'model.pth')


In [None]:
#plt.plot(y_test,test_loss, 'x')

In [None]:
# Save the model and optimizer states
'''
# Assuming you have the following hyperparameters defined
input_layer = 64
h1_layer = 128
h2_layer = 64
h3_layer = 32
h4_layer = 16
h5_layer = 8
h6_layer = 4
output_layer = 1
learning_rate = 0.001
batch_size = 32
epochs = 30
'''
# Save the model, optimizer, and training state
torch.save({
    'epoch': epoch,  # The current epoch of training
    'model_state_dict': model.state_dict(),  # Model weights
    'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
    'loss': avg_train_loss,  # Last training loss (optional)
    'input_layer': input_layer,  # Model architecture parameters
    'h1_layer': h1_layer,
    'h2_layer': h2_layer,
    'h3_layer': h3_layer,
    'h4_layer': h4_layer,
    'h5_layer': h5_layer,
    'h6_layer': h6_layer,
    'output_layer': output_layer,
    'learning_rate': learning_rate,  # Training parameters
    'batch_size': batch_size,
    'epochs': epochs,
}, 'checkpoint_2_ml_pl_cycle_1.pth')


In [None]:
# Load the checkpoint
checkpoint = torch.load('checkpoint_2_ml_pl_cycle_1.pth')

# Retrieve hyperparameters and training state from the checkpoint
input_layer = checkpoint['input_layer']
h1_layer = checkpoint['h1_layer']
h2_layer = checkpoint['h2_layer']
h3_layer = checkpoint['h3_layer']
h4_layer = checkpoint['h4_layer']
h5_layer = checkpoint['h5_layer']
h6_layer = checkpoint['h6_layer']
output_layer = checkpoint['output_layer']
learning_rate = checkpoint['learning_rate']
batch_size = checkpoint['batch_size']
epochs = checkpoint['epochs']
epoch = checkpoint['epoch']  # Current epoch at the time of saving
avg_train_loss = checkpoint['loss']  # Last training loss

# Re-initialize the model with the saved architecture hyperparameters
model = PL_nn_regressor(input_layer, h1_layer, h2_layer, h3_layer, h4_layer, h5_layer, h6_layer, output_layer)

# Load the model's state dict
model.load_state_dict(checkpoint['model_state_dict'])

# Re-initialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # Use the saved learning rate
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Set the model to training mode
model.train()

# Optionally, set the model to the appropriate device (e.g., CUDA or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Now, you can resume training using the loaded model, optimizer, and training state


In [None]:
# Initialize variables
num_epochs = 30  # Total number of epochs
early_stopping_threshold = 0.1  # Threshold for early stopping based on loss difference
epoch_losses = []  # To store loss values for the last 3 epochs

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    train_loss = 0.0  # Initialize training loss to 0
    
    # Train the model (assuming you have a DataLoader for the training set)
    for batch_x, batch_y in train_dataloader:
        optimizer.zero_grad()  # Zero the parameter gradients
        
        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y.unsqueeze(1))  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize model parameters
        
        train_loss += loss.item()  # Accumulate loss

    avg_train_loss = train_loss / len(train_dataloader)  # Average loss for the epoch

    # Store the loss for the current epoch
    epoch_losses.append(avg_train_loss)

    # If we have at least 3 losses stored, check early stopping condition
    if len(epoch_losses) > 3:
        # Check if the difference between the max and min loss of the last 3 epochs is <= 0.1
        last_three_losses = epoch_losses[-3:]
        max_loss = max(last_three_losses)
        min_loss = min(last_three_losses)

        if max_loss - min_loss <= early_stopping_threshold:
            print(f"Early stopping triggered at epoch {epoch+1}.")
            break  # Stop training if early stopping condition is met

    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0

    with torch.no_grad():  # Disable gradient computation for validation
        for batch_x, batch_y in test_dataloader:  # Use your validation DataLoader
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y.unsqueeze(1))  # Compute validation loss
            val_loss += loss.item()

    val_loss /= len(test_dataloader)  # Average validation loss

    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}')



In [None]:
# Create the model and optimizer again
model = PL_nn_regressor()  # Replace with your model class
optimizer = torch.optim.Adam(model.parameters())  # Replace with your optimizer

# Load the checkpoint
checkpoint = torch.load('checkpoint_ml_pl_cycle_1.pth')

# Load the model state dict
model.load_state_dict(checkpoint['model_state_dict'])

# Load the optimizer state dict
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Optionally, resume training from the saved epoch and loss
epoch = checkpoint['epoch']
loss = checkpoint['loss']


In [None]:
# Load the model to the correct device (CPU or GPU)
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#model.load_state_dict(torch.load('model.pth', map_location=device))
