EM 538: Practical Machine Learning for Enginnering Analytics (Spring 2025)  
Instructor: Fred Livingston (fjliving@ncsu.edu)  
Student: Mike Keating


## Drug Classification Modeling


In [3]:
import pandas as pd
import torch
from sklearn import preprocessing
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [4]:
df = pd.read_csv("drug200.csv")
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


#### Data Preprocessing

This section is complete for this data set


In [None]:
# Data Encoding
from sklearn.preprocessing import LabelEncoder

data_tobe_encoded = ["Sex", "BP", "Cholesterol", "Drug"]
df_copy = df.copy()
label_encoder = LabelEncoder()
for column in data_tobe_encoded:
    df_copy[column] = label_encoder.fit_transform(df_copy[column])

df_copy.head()


array([0, 3, 4, 1, 2])

In [None]:
X = df_copy.drop("Drug", axis=1).values
y = df_copy["Drug"].values

In [None]:
# Data Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# Data Splitting
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)

### Torch DataSet and Loader

Task 1. Design a Data Loader for a batch size of 32 with suffling [10/100 pts]


In [None]:
# Dataset
class DrugDataset(Dataset):
    def __init__(self, X, y):
        self.features = torch.tensor(X, dtype=torch.float32)
        self.labels = torch.tensor(y, dtype=torch.int64)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

    def __len__(self):
        return self.labels.shape[0]


# DataLoader
train_ds = DrugDataset(X_train, y_train)
test_ds = DrugDataset(X_test, y_test)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)


#### MLP Model

Task 2: Design a Feed Foward Neural Network that has a test accuracy greater than 90 % [90/100 pts ]


In [None]:
class PyTorchMLP(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super().__init__()

        self.all_layers = torch.nn.Sequential(
            # input layer
            torch.nn.Linear(num_features, 64),
            torch.nn.ReLU(),
            # output layer
            torch.nn.Linear(64, num_classes),
            torch.nn.Softmax(),
        )

    def forward(self, x):
        z = self.all_layers(x)
        return z


num_features = X.shape[1]
num_classes = len(df_copy["Drug"].unique())

learning_rate = 0.05  # Just selecting a small learning rate for now
model = PyTorchMLP(num_features=num_features, num_classes=num_classes)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  # Stochastic gradient descent
num_epochs = 20  # Number of epochs to train the model

#### Train Model


In [None]:
def train_model(num_epochs, model, optimizer, train_loader):
    for epoch in range(num_epochs):
        model = model.train()
        for batch_idx, (features, labels) in enumerate(train_loader):
            z = model(features)

            loss = F.cross_entropy(z, labels)  # Loss function

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("Epoch [{}/{}], Loss: {:.4f}".format(epoch + 1, num_epochs, loss.item()))


train_model(num_epochs, model, optimizer, train_loader)

Epoch [1/20], Loss: 1.5785
Epoch [2/20], Loss: 1.5403
Epoch [3/20], Loss: 1.5589
Epoch [4/20], Loss: 1.5699
Epoch [5/20], Loss: 1.5106
Epoch [6/20], Loss: 1.5040
Epoch [7/20], Loss: 1.5222
Epoch [8/20], Loss: 1.4946
Epoch [9/20], Loss: 1.4586
Epoch [10/20], Loss: 1.4074
Epoch [11/20], Loss: 1.4589
Epoch [12/20], Loss: 1.4287
Epoch [13/20], Loss: 1.3809
Epoch [14/20], Loss: 1.3845
Epoch [15/20], Loss: 1.3905
Epoch [16/20], Loss: 1.3591
Epoch [17/20], Loss: 1.3459
Epoch [18/20], Loss: 1.3397
Epoch [19/20], Loss: 1.3510
Epoch [20/20], Loss: 1.3593


  return self._call_impl(*args, **kwargs)


In [None]:
# Adjust lr first, and then epoch until we reach a good accuracy

#### Model Accuracy


In [None]:
def test_model(model, test_loader):
    model = model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for features, labels in test_loader:
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)  # Choose the best class from the output: The class with the best score
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print("Accuracy: {} %".format(100 * correct / total))


test_model(model, test_loader)

Accuracy: 67.5 %


#### Saving & Loading Entire Model


In [None]:
# TODO Save the model in the following format and submit it
torch.save(model, "hw4_fred_livingston.mdl")  # Save the model