In [167]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, recall_score, precision_score
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import seaborn as sns


In [168]:
#Load the csv into a dataframe
df = pd.read_csv('/notebooks/datacsv.csv')
df = df.set_index('sn') #index using serial number
columns = ['status', 'test key']
df = df[columns] #drop everything except test key and status column
df

Unnamed: 0_level_0,status,test key
sn,Unnamed: 1_level_1,Unnamed: 2_level_1
1447-PP004,pass,1447-PP004pLcgC6HpDdb47tAZ
1447-PP006,pass,1447-PP006aaEwMofBbS0Tfw0U
1447-PP003,pass,1447-PP003AhdIAlmrBsNGqubU
1447-PP002,pass,1447-PP002OTY4gXt7a7gYwkNG
1447-PP005,pass,1447-PP005JVBdbv5OYVl6OOsy
...,...,...
1545-13546,PASS,1545-13546K@vc0=bebjE-miU:
1545-13546,pass,939d1422-28d8-40d1-90e7-6301718bba95
1545-13545,pass,c414da5b-aaa5-464d-96a4-778ae35532f7
1545-13546,PASS,1545-13546#)A(02y*bQtJk6*%


In [169]:
# Load the CSV file
df = pd.read_csv('/notebooks/datacsv.csv')

# Set serial number as the index
df = df.set_index('sn')

# Select specific columns to keep
df = df[['status', 'test procedure']]

# Pivot the DataFrame
df_pivot = df.pivot_table(index=df.index, columns='test procedure', values='status', aggfunc='first')
df_pivot

testIdDropped = "1016100-02_PRE"

df_pivot = df_pivot.dropna(subset=[testIdDropped])

In [170]:
x = df_pivot.drop(testIdDropped, axis=1) 
#need to map non-numerical values to binary labels
x = x.map(lambda x: 1 if x == 'pass' else (0 if x == 'fail' else np.nan))
x

test procedure,1016100-02,1016100-02-ESS,1016100-02_FRM,1016100-02_POST
sn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1447-PP101,,0.0,,0.0
1447-PP102,0.0,0.0,,
1447-PP103,0.0,0.0,,
1447-PP104,0.0,0.0,,
1447-PP201,0.0,1.0,,
1447-PP202,,1.0,,
1447-PP203,,1.0,,
1447-PP204,,1.0,,
1447-PP205,,0.0,,
1447-PP206,,1.0,,


In [171]:
y = df_pivot[testIdDropped]
y = y.map(lambda x: 1 if x == 'pass' else (0 if x == 'fail' else np.nan))

y

sn
1447-PP101     0
1447-PP102     0
1447-PP103     0
1447-PP104     0
1447-PP201     0
1447-PP202     0
1447-PP203     1
1447-PP204     1
1447-PP205     0
1447-PP206     1
1545-10026     0
1545-10027     1
1545-10030     1
1545-10098     1
1545-10166     1
1545-10178     1
1545-10181     0
1545-10183     1
1545-10188     0
1545-10196     1
1545-10276     1
1545-10299     0
1545-103       1
1545-10323     1
1545-10364     0
1545-10372     1
1545-10406     0
1545-10407     1
1545-10408     1
1545-10423     1
1545-10428     0
1545-10437     1
1545-10449     1
1545-10455     1
1545-10836     1
1545-10866     1
1545-10XXX     1
1545-10xxx     0
1545-10xxxx    0
1545-11032     0
1545-11074     1
1545-11080     0
1545-11171     1
1545-11185     1
1545-11216     0
1545-11498     0
1545-11582     1
1545-11660     1
1545-11728     1
1545-12290     1
1545-13097     0
1545-13287     0
1545-13320     1
1545-13412     1
1545-13425     1
1545-13436     1
1545-13447     1
1545-13XXX     1
1545-test  

In [172]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 24)
x_train = x_train.fillna(x_train.mean()) #fill in Nan values with placeholders. Ideally would not 
                                         #need to do this but data is not very good.
x_test = x_test.fillna(x_train.mean())

#convert to tensors
x_train_tensor = torch.tensor(x_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)   

In [173]:
#create binary neural network. Additional linear layers can be added as neccesary
class BinaryMLP(nn.Module):
    def __init__(self, input_size):
        super(BinaryMLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 32)
        self.layer_2 = nn.Linear(32, 16)
        self.layer_out = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.layer_1(x))
        x = torch.relu(self.layer_2(x))
        x = self.layer_out(x)
        return self.sigmoid(x)


num_tests = x_train.shape[1]
model = BinaryMLP(input_size=num_tests)


# Binary Cross-Entropy Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 200

In [175]:
#training loop
for epoch in range(num_epochs):
    y_pred = model(x_train_tensor)
    
    y_pred = y_pred.squeeze()  #squeeze to match target shape
    
    #compute loss
    loss = criterion(y_pred, y_train_tensor)
    
    #backward pass & optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        predicted = (y_pred >= 0.5).float()  # Binarize predictions
        acc = (predicted == y_train_tensor).float().mean()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {acc.item():.4f}')


Epoch [10/200], Loss: 0.6695, Accuracy: 0.6667
Epoch [20/200], Loss: 0.6595, Accuracy: 0.6667
Epoch [30/200], Loss: 0.6490, Accuracy: 0.6667
Epoch [40/200], Loss: 0.6395, Accuracy: 0.6667
Epoch [50/200], Loss: 0.6304, Accuracy: 0.6667
Epoch [60/200], Loss: 0.6207, Accuracy: 0.6667
Epoch [70/200], Loss: 0.6119, Accuracy: 0.6667
Epoch [80/200], Loss: 0.6040, Accuracy: 0.6667
Epoch [90/200], Loss: 0.5962, Accuracy: 0.6667
Epoch [100/200], Loss: 0.5879, Accuracy: 0.6667
Epoch [110/200], Loss: 0.5789, Accuracy: 0.7708
Epoch [120/200], Loss: 0.5697, Accuracy: 0.7708
Epoch [130/200], Loss: 0.5604, Accuracy: 0.7708
Epoch [140/200], Loss: 0.5516, Accuracy: 0.7708
Epoch [150/200], Loss: 0.5434, Accuracy: 0.7708
Epoch [160/200], Loss: 0.5362, Accuracy: 0.7708
Epoch [170/200], Loss: 0.5296, Accuracy: 0.7708
Epoch [180/200], Loss: 0.5239, Accuracy: 0.7708
Epoch [190/200], Loss: 0.5188, Accuracy: 0.7708
Epoch [200/200], Loss: 0.5142, Accuracy: 0.7708


In [176]:
#test model
with torch.no_grad():
    y_predicted = model(x_test_tensor)
    y_predicted_cls = y_predicted.argmax(dim = 1)
    acc = (y_predicted_cls == y_test_tensor).float().mean()
    print(f'accuracy: {acc.item():.4f}')
#not going to have a very good acuracy. Not very good data. 

accuracy: 0.5833
