# Intro/Imports
This is going to be a more granular, customizable neural net.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import joblib
import pathlib as path
import copy

# Data Read-in
unchanged from neuralnet.ipynb; this is non-negotiable. 
For any of the user changes to work, I need to know the shape of the data; so this split will be universal. 

In [2]:
rawData = pd.read_csv('STEMVisualsSynthData.csv', header=0)
#remove unneeded column
rawData.drop('Index_within_Experiment', axis = 1, inplace = True)
#X is inputs--the three Concentrations, F_in, I0 (light intensity), and c_N_in (6)
X = rawData[['Time', 'C_X', 'C_N', 'C_L', 'F_in', 'C_N_in', 'I0']]
Y = X.copy(deep=True)
#drop unnecessary rows in Y
Y.drop('F_in', axis = 1, inplace = True)
Y.drop('C_N_in', axis = 1, inplace = True)
Y.drop('I0', axis = 1, inplace = True)
Y.drop('Time', axis = 1, inplace = True)
#Y vals should be X concentrations one timestep ahead, so remove the first index
Y.drop(index=0, inplace=True)
#To keep the two consistent, remove the last index of X
X.drop(index=19999, inplace=True)
#separate the times out into their own little thing for later use

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop(index=19999, inplace=True)


# Train/Test Split
Keeping it simple stupid, there's only 1 split happening here. User chooses the percentage of data to hold out for testing.

In [6]:
#defaults
train_ratio = 0.2
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1 - train_ratio)

#function to call whenever wanting to change the train/test split
def trainSplit():
    train_ratio = float(input("What percentage of data to holdout for testing? (default 0.2) "))
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1 - train_ratio)

#Separate Time out--we don't want this as a feature the model learns (since it's already incorporated in by how X and Y are structured!)
#however, having the time values will be useful for plotting later
XTrainTime = X_train.pop('Time')
XTestTime = X_test.pop('Time')

# Preprocessing
Another thing I'm going to just have to have happen backend without the user's input.

In [8]:
#for both x and y, create a min max scaler
mmscalerX = preprocessing.MinMaxScaler()
#we'll fit and transform based on training data
X_train_minmax = mmscalerX.fit_transform(X_train)
#transform testing data based on training data
X_test_minmax = mmscalerX.transform(X_test)
#same process as above for Y
mmscalerY = preprocessing.MinMaxScaler()
Y_train_minmax = mmscalerY.fit_transform(Y_train)
Y_test_minmax = mmscalerY.transform(Y_test)

# Tensor Creation

In [9]:
#put our data into tensors
X_train = torch.tensor(X_train_minmax, dtype=torch.float32)
y_train = torch.tensor(Y_train_minmax, dtype=torch.float32)
X_test = torch.tensor(X_test_minmax, dtype=torch.float32)
y_test = torch.tensor(Y_test_minmax, dtype=torch.float32)