# Imports/Intro
This file's going to take the models created in neuralnet.ipynb and attempt to generate a set of 'experimental data' from their predictions.

In [14]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import joblib
import pathlib as path
import copy
import random
# Check PyTorch version
torch.__version__

'2.0.1+cu117'

# Data & Model loadin
Much like previous notebook, we need access to the data and models. 

In [15]:
rawData = pd.read_csv('STEMVisualsSynthData.csv', header=0)
#remove unneeded column
rawData.drop('Index_within_Experiment', axis = 1, inplace = True)
#X is inputs--the three Concentrations, F_in, I0 (light intensity), and c_N_in (6)
X = rawData[['Time', 'C_X', 'C_N', 'C_L', 'F_in', 'C_N_in', 'I0']]
Y = X.copy(deep=True)
#drop unnecessary rows in Y
Y.drop('F_in', axis = 1, inplace = True)
Y.drop('C_N_in', axis = 1, inplace = True)
Y.drop('I0', axis = 1, inplace = True)
Y.drop('Time', axis = 1, inplace = True)
#Y vals should be X concentrations one timestep ahead, so remove the first index
Y.drop(index=0, inplace=True)
#To keep the two consistent, remove the last index of X
X.drop(index=19999, inplace=True)
#separate the times out into their own little thing for later use

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop(index=19999, inplace=True)


In [16]:
model = nn.Sequential(
    nn.Linear(6, 50),
    nn.LeakyReLU(),
    nn.Linear(50, 25),
    nn.LeakyReLU(),
    nn.Linear(25, 12),
    nn.LeakyReLU(),
    nn.Linear(12, 3)
)
#loss/optimizer
loss_fn = nn.MSELoss() #Mean Squared Error
optimizer = optim.Adam(model.parameters(), lr=0.0005)
model.load_state_dict(torch.load('models/model.pt'))
#scalers
mmscalerX = joblib.load('models/mmscalerX.pkl')
mmscalerY = joblib.load('models/mmscalerY.pkl')

# Experiment Isolation
- there are 100 experiments with 200 datapoints each making up our 20k datapoint set
- randomly pick a number 0-99, then select the 200 matching datapoints to create a subset representing 1 experiment
- can set random seed if you want same run every time at the top

In [17]:
random.seed(42) #comment out if you want different random numbers
#randomly select an experiment
expNum = random.randint(0, 99)
indexStart = expNum*200
indexEnd = indexStart + 200
#select the experiment
X_exp = X.iloc[indexStart:indexEnd]
Y_exp = Y.iloc[indexStart:indexEnd]
#to match model shape, pop time column
XTimes = X_exp.pop('Time')


# Generating Model Output
- the model needs the initial conditions of the experiment; our outputs will be the 3 concentrations
- lets create a copy of the x dataframe's first row to be our initial input
- we'll generate our model's experiment one row at a time! 

In [38]:
XIn = X_exp.iloc[0]
#scale the input
XIn = mmscalerX.transform([XIn])
XIn = torch.tensor(XIn, dtype=torch.float32)
# 1. Set the model in evaluation mode
model.eval()

# 2. Setup the inference mode context manager
with torch.inference_mode():
  # 3. Make sure the calculations are done with the model and data on the same device
  y_preds = model(XIn)
y_preds 
#scale the output back up
y_preds = y_preds.detach().numpy()
y_preds = mmscalerY.inverse_transform(y_preds)
y_preds = pd.DataFrame(y_preds, columns=['C_X', 'C_N', 'C_L'])
y_preds 



Unnamed: 0,C_X,C_N,C_L
0,1.199901,1.966507,0.000435
