# Gaussian Process Regression

## Import Libraries

In [1]:
import numpy as np
import torch
from sklearn.metrics import precision_score, recall_score
import gpytorch
from scipy.io import loadmat
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

## Load the Dataset

In [2]:
# Load the training dataset
train_data = loadmat('../data/sarcos_inv.mat')['sarcos_inv'].astype(np.float32)
val_data, train_data = train_data[:4448], train_data[4484:].astype(np.float32)
test_data = loadmat('../data/sarcos_inv_test.mat')['sarcos_inv_test'].astype(np.float32)

X_train, Y_train = train_data[:, :21], train_data[:, 21:]
X_val, Y_val = val_data[:, :21], val_data[:, 21:]
X_test, Y_test = test_data[:, :21], test_data[:, 21:]

# Print the shape of the training and testing datasets
print(f"Shape of the training dataset: {X_train.shape}, {Y_train.shape}")
print(f"Shape of the validation dataset: {X_val.shape}, {Y_val.shape}")
print(f"Shape of the testing dataset: {X_test.shape}, {Y_test.shape}")

Shape of the training dataset: (40000, 21), (40000, 7)
Shape of the validation dataset: (4448, 21), (4448, 7)
Shape of the testing dataset: (4449, 21), (4449, 7)


### Create PyTorch Dataset

In [3]:
class DatasetGenerator(Dataset):
    def __init__(self, features, targets):
        self.features = torch.tensor(features, dtype=torch.float64)
        self.targets = torch.tensor(targets, dtype=torch.float64)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

### Data Preparation

In [4]:
# Create dataset instances
train_dataset = DatasetGenerator(X_train, Y_train)
val_dataset = DatasetGenerator(X_val, Y_val)
test_dataset = DatasetGenerator(X_test, Y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

## Define the GP Model

In [5]:
# class ExactGPModel(gpytorch.models.ExactGP):
#     def __init__(self, train_x, train_y, likelihood):
#         super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
#         # Using MultitaskMean for multi-output
#         self.mean_module = gpytorch.means.MultitaskMean(gpytorch.means.ConstantMean(), num_tasks=train_y.size(1))
#         self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
#         
#     def forward(self, x):
#         mean_x = self.mean_module(x)
#         covar_x = self.covar_module(x)
#         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


## Train the GP Model

In [6]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize likelihood and model without passing train_x and train_y
likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device)
model = ExactGPModel(None, None, likelihood).to(device)

# Set the model in training mode
model.train()
likelihood.train()

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# Parameters for training
num_epochs = 100

# Lists to store metrics
losses = []
accuracies = []
precisions = []
recalls = []

# Training loop with DataLoader
for epoch in range(num_epochs):
    model.train()
    likelihood.train()
    
    total_loss = 0.0
    
    # Iterate over batches using DataLoader
    for batch_x, batch_y in train_loader:
        # Set the model's training inputs
        model.set_train_data(inputs=batch_x, targets=batch_y) 
        
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)  # Move to GPU
        
        optimizer.zero_grad()  # Zero gradients from previous step
        
         # Set new training data for each batch
        
        output = model(batch_x)  # Forward pass (using only training inputs)
        loss = -mll(output, batch_y)  # Compute loss
        loss.backward()  # Backward pass
        
        total_loss += loss.item()
        
        optimizer.step()  # Update parameters
    
    # Average loss for the epoch
    avg_loss = total_loss / len(train_loader)
    losses.append(avg_loss)

    print(f"Epoch {epoch + 1}: Loss: {avg_loss:.4f}")

print("Training completed.")

RuntimeError: Cannot modify dtype of inputs (expected None, found torch.float64).

## Make Predictions

In [None]:
# Set the model in evaluation mode
model.eval()
likelihood.eval()

with torch.no_grad(), gpytorch.settings.fast_pred_var():
    observed_pred = likelihood(model(torch.tensor(X_test, dtype=torch.float32)))

mean = observed_pred.mean
lower, upper = observed_pred.confidence_region()

# Visualize results (assuming you're interested in plotting a specific feature)
plt.figure(figsize=(10, 6))
plt.plot(Y_test[:100], label='True Values', color='blue')  # Plot true values for first 100 samples
plt.plot(mean.numpy()[:100], 'k*', label='Predicted Mean')  # Predicted mean for first 100 samples
plt.fill_between(range(100), lower.numpy()[:100], upper.numpy()[:100], alpha=0.5,
                 label='Confidence Interval')
plt.title('Gaussian Process Regression Predictions')
plt.xlabel('Sample Index')
plt.ylabel('Torque')
plt.legend()
plt.show()