### Initialise

In [73]:
## Import necessary packages
import time;
import numpy as np; 
import matplotlib.pyplot as plt; 
import pandas as pd; 
from sklearn.preprocessing import MinMaxScaler; 
from sklearn.model_selection import train_test_split

import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

In [74]:
# Basic Initialisations
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


### Preprocess Data

In [75]:
data = pd.read_csv('data/clean_data.csv'); 
print(data.columns, "\n\n" ,data.shape); 
data.head()

Index(['Time (h)', 'Aeration rate(Fg:L/h)', 'Sugar feed rate(Fs:L/h)',
       'Acid flow rate(Fa:L/h)', 'Base flow rate(Fb:L/h)',
       'Heating/cooling water flow rate(Fc:L/h)',
       'Heating water flow rate(Fh:L/h)',
       'Water for injection/dilution(Fw:L/h)',
       'Air head pressure(pressure:bar)', 'Dumped broth flow(Fremoved:L/h)',
       'Substrate concentration(S:g/L)',
       'Dissolved oxygen concentration(DO2:mg/L)',
       'Penicillin concentration(P:g/L)', 'Vessel Volume(V:L)', 'pH(pH:pH)',
       'Temperature(T:K)', 'Generated heat(Q:kJ)',
       'carbon dioxide percent in off-gas(CO2outgas:%)',
       'PAA flow(Fpaa:PAA flow (L/h))', 'Oil flow(Foil:L/hr)',
       'Oxygen Uptake Rate(OUR:(g min^{-1}))',
       'Oxygen in percent in off-gas(O2:O2  (%))',
       'Fault reference(Fault_ref:Fault ref)',
       '0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)'],
      dtype='object') 

 (113935, 24)


Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Air head pressure(pressure:bar),Dumped broth flow(Fremoved:L/h),...,pH(pH:pH),Temperature(T:K),Generated heat(Q:kJ),carbon dioxide percent in off-gas(CO2outgas:%),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),Oxygen Uptake Rate(OUR:(g min^{-1})),Oxygen in percent in off-gas(O2:O2 (%)),Fault reference(Fault_ref:Fault ref),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)
0,0.2,30.0,8.0,0.0,30.118,9.8335,0.0001,0.0,0.6,0.0,...,6.4472,298.22,1e-06,0.089514,5.0,22.0,0.48051,0.19595,0.0,0.0
1,0.4,30.0,8.0,0.0,51.221,18.155,0.0001,0.0,0.6,0.0,...,6.4932,298.17,1e-06,0.10176,5.0,22.0,0.058147,0.2039,0.0,0.0
2,0.6,30.0,8.0,0.0,54.302,9.5982,0.0001,0.0,0.6,0.0,...,6.5425,298.14,1e-06,0.1058,5.0,22.0,-0.041505,0.20575,0.0,0.0
3,0.8,30.0,8.0,0.0,37.816,4.3395,0.0001,0.0,0.6,0.0,...,6.5753,298.11,1e-06,0.10819,5.0,22.0,-0.056737,0.20602,0.0,0.0
4,1.0,30.0,8.0,0.5181,18.908,1.1045,0.0001,0.0,0.6,0.0,...,6.5825,298.09,1e-06,0.1103,5.0,22.0,-0.049975,0.20589,0.0,0.0


In [76]:
# Scaling from 0-1
scaler = MinMaxScaler(); 

d_scaled = pd.DataFrame(scaler.fit_transform(data), columns=data.columns); 
d_scaled = d_scaled[:10000] # Temporary only 1000
d_scaled = d_scaled.astype('float32') # Convert to float32 from 64 to increase speed
d_scaled.head()

Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Air head pressure(pressure:bar),Dumped broth flow(Fremoved:L/h),...,pH(pH:pH),Temperature(T:K),Generated heat(Q:kJ),carbon dioxide percent in off-gas(CO2outgas:%),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),Oxygen Uptake Rate(OUR:(g min^{-1})),Oxygen in percent in off-gas(O2:O2 (%)),Fault reference(Fault_ref:Fault ref),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)
0,0.0,0.181818,0.040541,0.0,0.133858,0.006556,0.0,0.0,0.0,1.0,...,0.767126,0.258427,0.0,0.002,0.333333,0.0,0.21523,0.677992,0.0,0.0
1,0.00069,0.181818,0.040541,0.0,0.227649,0.012103,0.0,0.0,0.0,1.0,...,0.800686,0.249064,0.0,0.003738,0.333333,0.0,0.162455,0.759106,0.0,0.0
2,0.00138,0.181818,0.040541,0.0,0.241342,0.006399,0.0,0.0,0.0,1.0,...,0.836653,0.243446,0.0,0.004311,0.333333,0.0,0.150004,0.777982,0.0,0.0
3,0.00207,0.181818,0.040541,0.0,0.168071,0.002893,0.0,0.0,0.0,1.0,...,0.860582,0.237828,0.0,0.00465,0.333333,0.0,0.1481,0.780737,0.0,0.0
4,0.002761,0.181818,0.040541,0.039866,0.084036,0.000736,0.0,0.0,0.0,1.0,...,0.865835,0.234082,0.0,0.00495,0.333333,0.0,0.148945,0.77941,0.0,0.0


In [77]:
# Split into x and y
x_keys = [
    "Time (h)", "Aeration rate(Fg:L/h)", "Sugar feed rate(Fs:L/h)","Acid flow rate(Fa:L/h)",
    "Base flow rate(Fb:L/h)","Heating/cooling water flow rate(Fc:L/h)","Heating water flow rate(Fh:L/h)",
    "Water for injection/dilution(Fw:L/h)","Substrate concentration(S:g/L)","PAA flow(Fpaa:PAA flow (L/h))",
    "Oil flow(Foil:L/hr)", "0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref)",
    "Oxygen Uptake Rate(OUR:(g min^{-1}))"
]
x = d_scaled[x_keys]
print(len(x_keys)," keys in ", len(x.columns), "cols")
x.head()

13  keys in  13 cols


Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Substrate concentration(S:g/L),PAA flow(Fpaa:PAA flow (L/h)),Oil flow(Foil:L/hr),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref),Oxygen Uptake Rate(OUR:(g min^{-1}))
0,0.0,0.181818,0.040541,0.0,0.133858,0.006556,0.0,0.0,0.008306,0.333333,0.0,0.0,0.21523
1,0.00069,0.181818,0.040541,0.0,0.227649,0.012103,0.0,0.0,0.008715,0.333333,0.0,0.0,0.162455
2,0.00138,0.181818,0.040541,0.0,0.241342,0.006399,0.0,0.0,0.009107,0.333333,0.0,0.0,0.150004
3,0.00207,0.181818,0.040541,0.0,0.168071,0.002893,0.0,0.0,0.009492,0.333333,0.0,0.0,0.1481
4,0.002761,0.181818,0.040541,0.039866,0.084036,0.000736,0.0,0.0,0.009864,0.333333,0.0,0.0,0.148945


In [78]:
y_keys = list(set(d_scaled.columns) - set(x_keys))
y = d_scaled[y_keys]
print(len(y_keys)," keys in ", len(y.columns), "cols")
y.head()

11  keys in  11 cols


Unnamed: 0,carbon dioxide percent in off-gas(CO2outgas:%),Air head pressure(pressure:bar),Dissolved oxygen concentration(DO2:mg/L),Oxygen in percent in off-gas(O2:O2 (%)),Generated heat(Q:kJ),Dumped broth flow(Fremoved:L/h),Temperature(T:K),Penicillin concentration(P:g/L),pH(pH:pH),Vessel Volume(V:L),Fault reference(Fault_ref:Fault ref)
0,0.002,0.0,0.884124,0.677992,0.0,1.0,0.258427,1.791808e-27,0.767126,0.049276,0.0
1,0.003738,0.0,0.883351,0.759106,0.0,1.0,0.249064,2.763729e-05,0.800686,0.04948,0.0
2,0.004311,0.0,0.882512,0.777982,0.0,1.0,0.243446,2.761905e-05,0.836653,0.049685,0.0
3,0.00465,0.0,0.8809,0.780737,0.0,1.0,0.237828,2.760247e-05,0.860582,0.049787,0.0
4,0.00495,0.0,0.879095,0.77941,0.0,1.0,0.234082,2.758782e-05,0.865835,0.049838,0.0


In [79]:
rand = round(time.time() * 1000) % 100; 
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.91, random_state=rand)

print ("x_train: ", x_train.shape)
print ("x_test: ", x_test.shape)

print ("y_train: ", y_train.shape)
print ("y_test: ", y_test.shape)

x_train:  (9100, 13)
x_test:  (900, 13)
y_train:  (9100, 11)
y_test:  (900, 11)


In [80]:
# Convert data to torch tensors
class Data(Dataset):
    def __init__(self, x, y):
        self.x = torch.from_numpy(x.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.x.shape[0]
       
    def __getitem__(self, index):
        return self.x[index], self.y[index]
   
    def __len__(self):
        return self.len
   
batch_size = 32

# Instantiate training and test data
# .to_numpy is to Convert Dataframe to numpy object which will then be converted to tensor
train_data = Data(x_train.to_numpy(), y_train.to_numpy())
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

test_data = Data(x_test.to_numpy(), y_test.to_numpy())
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

In [81]:
# Just checking if it worked
for batch, (x, y) in enumerate(train_dataloader):
    print(f"Batch: {batch+1}")
    print(f"X shape: {x.shape}")
    print(f"y shape: {y.shape}")
    break

Batch: 1
X shape: torch.Size([32, 13])
y shape: torch.Size([32, 11])


In [82]:
input_dim = 13
hidden_dim = 949
output_dim = 11

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(NeuralNetwork, self).__init__()
        self.layer_1 = nn.Linear(input_dim, hidden_dim)
        nn.init.kaiming_uniform_(self.layer_1.weight, nonlinearity="relu")
        self.layer_2 = nn.Linear(hidden_dim, hidden_dim)
        nn.init.kaiming_uniform_(self.layer_2.weight, nonlinearity="relu")
        self.layer_3 = nn.Linear(hidden_dim, output_dim)
       
    def forward(self, x):
        x = torch.nn.functional.relu(self.layer_1(x))
        x = torch.nn.functional.relu(self.layer_2(x))
        x = torch.sigmoid(self.layer_3(x))

        return x
       
model = NeuralNetwork(input_dim, hidden_dim, output_dim)
print(model)

NeuralNetwork(
  (layer_1): Linear(in_features=13, out_features=949, bias=True)
  (layer_2): Linear(in_features=949, out_features=949, bias=True)
  (layer_3): Linear(in_features=949, out_features=11, bias=True)
)


In [83]:
learning_rate = 0.1
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [84]:
num_epochs = 100
loss_values = []


for epoch in range(num_epochs):
    for x, y in train_dataloader:
        # zero the parameter gradients
        optimizer.zero_grad()
       
        # forward + backward + optimize
        pred = model(x)
        loss = loss_fn(pred, y)
        loss_values.append(loss.item())
        loss.backward()
        optimizer.step()

print("Training Complete")

Training Complete


'\nTraining Complete\n'

In [89]:
import itertools    

total = 0.0
correct = 0.0
y_pred = []

with torch.no_grad():
    for x, y in test_dataloader:
        outputs = model(x)
        predicted = np.where(outputs < 0.5, 0, 1)
        predicted = list(itertools.chain(*predicted))
        np.append(y_pred, predicted)
        np.append(y_test, y)
        # y_pred.append(predicted)
        # y_test.append(y)
        total += y.size(0)
        correct += np.sum(np.argmax(predicted, axis=0) == np.argmax(y.numpy(), axis=0)) # (predicted == y.numpy())

print(f'Accuracy out of {total} test instances: {100 * correct // total}%')

Accuracy out of 900.0 test instances: 2.0%
