### Initialise

In [1]:
## Import necessary packages
import time;
import numpy as np; 
import matplotlib.pyplot as plt; 
import pandas as pd; 
from sklearn.preprocessing import MinMaxScaler; 
from sklearn.model_selection import train_test_split

import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

In [2]:
# Basic Initialisations
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


### Preprocess Data

In [3]:
data = pd.read_csv('data/clean_data.csv'); 
print(data.columns, "\n\n" ,data.shape); 
data.head()

Index(['Time (h)', 'Aeration rate(Fg:L/h)', 'Sugar feed rate(Fs:L/h)',
       'Acid flow rate(Fa:L/h)', 'Base flow rate(Fb:L/h)',
       'Heating/cooling water flow rate(Fc:L/h)',
       'Heating water flow rate(Fh:L/h)',
       'Water for injection/dilution(Fw:L/h)',
       'Air head pressure(pressure:bar)', 'Substrate concentration(S:g/L)',
       'Penicillin concentration(P:g/L)', 'Vessel Volume(V:L)', 'pH(pH:pH)',
       'Temperature(T:K)', 'PAA flow(Fpaa:PAA flow (L/h))',
       'Oil flow(Foil:L/hr)', 'Oxygen Uptake Rate(OUR:(g min^{-1}))',
       '0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)'],
      dtype='object') 

 (113935, 18)


Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Air head pressure(pressure:bar),Substrate concentration(S:g/L),Penicillin concentration(P:g/L),Vessel Volume(V:L),pH(pH:pH),Temperature(T:K),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),Oxygen Uptake Rate(OUR:(g min^{-1})),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)
0,0.2,30.0,8.0,0.0,30.118,9.8335,0.0001,0.0,0.6,0.95749,1.0178e-25,58479.0,6.4472,298.22,5.0,22.0,0.48051,0.0
1,0.4,30.0,8.0,0.0,51.221,18.155,0.0001,0.0,0.6,1.0046,0.001,58487.0,6.4932,298.17,5.0,22.0,0.058147,0.0
2,0.6,30.0,8.0,0.0,54.302,9.5982,0.0001,0.0,0.6,1.0498,0.00099934,58495.0,6.5425,298.14,5.0,22.0,-0.041505,0.0
3,0.8,30.0,8.0,0.0,37.816,4.3395,0.0001,0.0,0.6,1.0942,0.00099874,58499.0,6.5753,298.11,5.0,22.0,-0.056737,0.0
4,1.0,30.0,8.0,0.5181,18.908,1.1045,0.0001,0.0,0.6,1.137,0.00099821,58501.0,6.5825,298.09,5.0,22.0,-0.049975,0.0


In [4]:
# Scaling from 0-1
scaler = MinMaxScaler(); 

d_scaled = pd.DataFrame(scaler.fit_transform(data), columns=data.columns); 
d_scaled = d_scaled[:1000] # Temporary only 1000
d_scaled = d_scaled.astype('float32') # Convert to float32 from 64 to increase speed
d_scaled.head()

Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Air head pressure(pressure:bar),Substrate concentration(S:g/L),Penicillin concentration(P:g/L),Vessel Volume(V:L),pH(pH:pH),Temperature(T:K),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),Oxygen Uptake Rate(OUR:(g min^{-1})),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)
0,0.0,0.181818,0.040541,0.0,0.133858,0.006556,0.0,0.0,0.0,0.008306,1.791808e-27,0.049276,0.767126,0.258427,0.333333,0.0,0.21523,0.0
1,0.00069,0.181818,0.040541,0.0,0.227649,0.012103,0.0,0.0,0.0,0.008715,2.763729e-05,0.04948,0.800686,0.249064,0.333333,0.0,0.162455,0.0
2,0.00138,0.181818,0.040541,0.0,0.241342,0.006399,0.0,0.0,0.0,0.009107,2.761905e-05,0.049685,0.836653,0.243446,0.333333,0.0,0.150004,0.0
3,0.00207,0.181818,0.040541,0.0,0.168071,0.002893,0.0,0.0,0.0,0.009492,2.760247e-05,0.049787,0.860582,0.237828,0.333333,0.0,0.1481,0.0
4,0.002761,0.181818,0.040541,0.039866,0.084036,0.000736,0.0,0.0,0.0,0.009864,2.758782e-05,0.049838,0.865835,0.234082,0.333333,0.0,0.148945,0.0


In [5]:
# Split into x and y
x_keys = [
    "Time (h)", "Aeration rate(Fg:L/h)", "Sugar feed rate(Fs:L/h)","Acid flow rate(Fa:L/h)",
    "Base flow rate(Fb:L/h)","Heating/cooling water flow rate(Fc:L/h)","Heating water flow rate(Fh:L/h)",
    "Water for injection/dilution(Fw:L/h)","Substrate concentration(S:g/L)","PAA flow(Fpaa:PAA flow (L/h))",
    "Oil flow(Foil:L/hr)", "0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)",
    "Oxygen Uptake Rate(OUR:(g min^{-1}))",
    # Converted variables
    "Air head pressure(pressure:bar)", "Temperature(T:K)", "pH(pH:pH)", "Vessel Volume(V:L)"
]
x = d_scaled[x_keys]
print(len(x_keys)," keys in ", len(x.columns), "cols")
x.head()

17  keys in  17 cols


Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Substrate concentration(S:g/L),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref),Oxygen Uptake Rate(OUR:(g min^{-1})),Air head pressure(pressure:bar),Temperature(T:K),pH(pH:pH),Vessel Volume(V:L)
0,0.0,0.181818,0.040541,0.0,0.133858,0.006556,0.0,0.0,0.008306,0.333333,0.0,0.0,0.21523,0.0,0.258427,0.767126,0.049276
1,0.00069,0.181818,0.040541,0.0,0.227649,0.012103,0.0,0.0,0.008715,0.333333,0.0,0.0,0.162455,0.0,0.249064,0.800686,0.04948
2,0.00138,0.181818,0.040541,0.0,0.241342,0.006399,0.0,0.0,0.009107,0.333333,0.0,0.0,0.150004,0.0,0.243446,0.836653,0.049685
3,0.00207,0.181818,0.040541,0.0,0.168071,0.002893,0.0,0.0,0.009492,0.333333,0.0,0.0,0.1481,0.0,0.237828,0.860582,0.049787
4,0.002761,0.181818,0.040541,0.039866,0.084036,0.000736,0.0,0.0,0.009864,0.333333,0.0,0.0,0.148945,0.0,0.234082,0.865835,0.049838


In [6]:
y_keys = list(set(d_scaled.columns) - set(x_keys))
y = d_scaled[y_keys]
print(len(y_keys)," keys in ", len(y.columns), "cols")
y.head()

1  keys in  1 cols


Unnamed: 0,Penicillin concentration(P:g/L)
0,1.791808e-27
1,2.763729e-05
2,2.761905e-05
3,2.760247e-05
4,2.758782e-05


In [8]:
n=2
# First we define the trainable parameters A and b 
A = torch.randn((1, n), requires_grad=True)
b = torch.randn(1, requires_grad=True)

# Then we define the prediction model
def model(x_input):
    return A.mm(x_input) + b


### Loss function definition ###

def loss(y_predicted, y_target):
    return ((y_predicted - y_target)**2).sum()

In [None]:
### Training the model ###

# Setup the optimizer object, so it optimizes a and b.
optimizer = optim.Adam([A, b], lr=0.1)

# Main optimization loop
for t in range(2000):
    # Set the gradients to 0.
    optimizer.zero_grad()
    # Compute the current predicted y's from x_dataset
    y_predicted = model(x_dataset)
    # See how far off the prediction is
    current_loss = loss(y_predicted, y_dataset)
    # Compute the gradient of the loss with respect to A and b.
    current_loss.backward()
    # Update A and b accordingly.
    optimizer.step()
    print(f"t = {t}, loss = {current_loss}, A = {A.detach().numpy()}, b = {b.item()}")