In [1]:
import pandas as pd
import numpy as np
import math
from matplotlib import pyplot as plt

import torch
import tqdm
from tqdm import notebook
import gpytorch
from gpytorch.means import ConstantMean, LinearMean
from gpytorch.kernels import RBFKernel, ScaleKernel
from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution
from gpytorch.distributions import MultivariateNormal
from gpytorch.models import ApproximateGP, GP
from gpytorch.mlls import VariationalELBO, AddedLossTerm
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
from gpytorch.mlls import DeepApproximateMLL

In [2]:
df = pd.read_csv(
    "STEMVisualsSynthData.csv",
    names = [
        'Index_within_Experiment',
        'Time',
        'C_X',
        'C_N',
        'C_L',
        'C_x0',
        'C_N0',
        'F_in',
        'C_N_in',
        'I0'
    ])
    

In [3]:
df = df.drop(labels=0, axis=0)

In [4]:
df

Unnamed: 0,Index_within_Experiment,Time,C_X,C_N,C_L,C_x0,C_N0,F_in,C_N_in,I0
1,0,0,1.5,1.6,0,1.5,1.6,0.006,7,103
2,1,0.753768844,1.524642929,1.622073457,7.45E-05,1.5,1.6,0.006,7,103
3,2,1.507537688,1.548986453,1.644204727,0.000148165,1.5,1.6,0.006,7,103
4,3,2.261306533,1.573020863,1.666397183,0.000220839,1.5,1.6,0.006,7,103
5,4,3.015075377,1.596735073,1.688654616,0.000292554,1.5,1.6,0.006,7,103
...,...,...,...,...,...,...,...,...,...,...
19996,195,146.9849246,2.937322964,11.1514571,0.006985099,0.3,0.6,0.009,9,116
19997,196,147.7386935,2.939438747,11.20784764,0.006985492,0.3,0.6,0.009,9,116
19998,197,148.4924623,2.941519636,11.26424595,0.006985804,0.3,0.6,0.009,9,116
19999,198,149.2462312,2.943566238,11.32065189,0.006986038,0.3,0.6,0.009,9,116


## Split and Format Data

In [5]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, train_size=0.2)

In [6]:
x_train = df_train.iloc[:, [1, 5, 6, 7, 8, 9]]
y_train = df_train.iloc[:, [2, 3, 4]]

x_test = df_test.iloc[:, [1, 5, 6, 7, 8, 9]]
y_test = df_test.iloc[:, [2, 3, 4]]

all_x = df.iloc[:, [1, 5, 6, 7, 8, 9]]
all_y = df.iloc[:, [2, 3, 4]]

In [7]:
x_train

Unnamed: 0,Time,C_x0,C_N0,F_in,C_N_in,I0
19794,145.4773869,0.6,2,0.014,14,130
6620,14.32160804,1.3,1.8,0.004,6,166
14110,82.16080402,1.7,1.1,0.014,7,174
17238,27.88944724,1.6,1.4,0.01,9,185
5944,107.7889447,1.8,1.1,0.007,10,187
...,...,...,...,...,...,...
17658,42.96482412,0.2,1.9,0.011,6,180
1128,95.72864322,0.7,1.1,0.005,8,167
4427,19.59798995,1.4,0.5,0.002,8,165
7372,128.8944724,0.4,1.8,0.004,13,197


In [8]:
#change df values to floats
#transforms df to torch

x_train_float = x_train.astype(float)
x_train = torch.tensor(x_train_float.values)

y_train_float = y_train.astype(float)
y_train = torch.tensor(y_train_float.values)

In [9]:
x_train.shape

torch.Size([4000, 6])

In [16]:
y_train

tensor([[3.0738e+00, 2.9128e+01, 6.5656e-03],
        [1.9649e+00, 1.8982e+00, 1.9682e-03],
        [3.2098e+00, 8.2837e+00, 4.7573e-03],
        ...,
        [2.1898e+00, 5.0786e-01, 2.3999e-03],
        [3.3815e+00, 7.0243e+00, 7.8001e-03],
        [3.3918e+00, 8.5133e+00, 6.9322e-03]], dtype=torch.float64)

In [12]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, x_train, y_train, likelihood):
        super(ExactGPModel, self).__init__(x_train, y_train, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(x_train, y_train, likelihood)

In [17]:
# this is for running the notebook in our testing framework
import os
smoke_test = ('CI' in os.environ)
training_iter = 2 if smoke_test else 50


# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)


In [18]:
for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(x_train)
    # Calc loss and backprop gradients
    loss = -mll(output, y_train)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
        i + 1, training_iter, loss.item(),
        model.covar_module.base_kernel.lengthscale.item(),
        model.likelihood.noise.item()
    ))
    optimizer.step()

RuntimeError: The size of tensor a (3) must match the size of tensor b (4000) at non-singleton dimension 1