In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

In [2]:
import pandas as pd

df = pd.read_csv('selected_features_training.csv')

# Step 3: Preprocess the Data

# Separate features and target variable
X = df.drop('label', axis=1)
y = df['label']

In [4]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Convert DataFrame to NumPy and then to PyTorch tensor
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.long)


In [7]:
X_train

tensor([[5.0000e-01, 9.0000e-01, 1.5508e-07,  ..., 1.0000e+00, 0.0000e+00,
         0.0000e+00],
        [5.0000e-01, 5.0000e-01, 0.0000e+00,  ..., 7.8431e-03, 1.0000e+00,
         0.0000e+00],
        [5.0000e-01, 1.0000e-01, 0.0000e+00,  ..., 3.1373e-02, 0.0000e+00,
         1.0000e+00],
        ...,
        [5.0000e-01, 1.0000e-01, 0.0000e+00,  ..., 2.3529e-02, 0.0000e+00,
         1.0000e+00],
        [5.0000e-01, 9.0000e-01, 2.2392e-07,  ..., 1.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.0000e+00, 9.0000e-01, 3.1160e-08,  ..., 9.9608e-01, 0.0000e+00,
         0.0000e+00]])

In [8]:
print(X_train.shape)  # Expected: (num_samples, num_features)

torch.Size([100778, 22])


In [9]:
from tensorflow.keras.models import load_model
teacher_model = load_model("pnn_model.h5")




In [10]:
teacher_logits = torch.tensor(teacher_model.predict(X_train), dtype=torch.float32)
teacher_logits

[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 795us/step


tensor([[8.3746e-19, 5.5388e-24, 3.4760e-21,  ..., 1.1500e-37, 2.5883e-16,
         3.2402e-30],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [9.6598e-36, 2.4998e-31, 0.0000e+00,  ..., 1.5795e-28, 0.0000e+00,
         0.0000e+00],
        ...,
        [7.6000e-36, 2.7694e-31, 0.0000e+00,  ..., 8.9771e-29, 0.0000e+00,
         0.0000e+00],
        [1.3558e-16, 6.8396e-21, 1.0208e-14,  ..., 3.8193e-36, 8.5625e-14,
         4.3104e-22],
        [7.7824e-28, 4.7065e-35, 6.5327e-26,  ..., 0.0000e+00, 2.2098e-19,
         0.0000e+00]])

In [33]:
import torch.nn as nn

class StudentModel(nn.Module):
    def __init__(self):
        super(StudentModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)  # Input layer to a smaller hidden layer
        self.fc2 = nn.Linear(64, len(torch.unique(y_train)))  # Hidden layer to output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [34]:
student_model = StudentModel()

In [35]:
student_model

StudentModel(
  (fc1): Linear(in_features=22, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=23, bias=True)
)

In [43]:
alpha = 0.5         # Weight for soft vs hard loss
temperature = 3.0   # Temperature for softening the teacher's outputs
learning_rate = 0.001  # Learning rate for the optimizer
num_epochs = 100     # Number of training epochs

In [37]:
optimizer = torch.optim.Adam(student_model.parameters(), lr=learning_rate)


In [44]:
def distillation_loss(student_logits, teacher_logits, true_labels, alpha=0.5, temperature=3.0):
    # Compute teacher probabilities (softened outputs)
    teacher_probs = torch.softmax(teacher_logits / temperature, dim=1)
    # Compute student probabilities
    student_probs = torch.log_softmax(student_logits / temperature, dim=1)

    # Compute the soft loss (KL Divergence between student and teacher)
    soft_loss = nn.KLDivLoss(reduction="batchmean")(student_probs, teacher_probs)

    # Compute the hard loss (CrossEntropy with true labels)
    hard_loss = nn.CrossEntropyLoss()(student_logits, true_labels)

    # Combine soft and hard losses
    return alpha * soft_loss + (1 - alpha) * hard_loss


In [45]:
for epoch in range(num_epochs):
    student_model.train()
    optimizer.zero_grad()

    # Get student's predictions
    student_logits = student_model(X_train)

    # Calculate the distillation loss
    loss = distillation_loss(student_logits, teacher_logits, y_train, alpha, temperature)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [10/100], Loss: 0.4186
Epoch [20/100], Loss: 0.3939
Epoch [30/100], Loss: 0.3740
Epoch [40/100], Loss: 0.3583
Epoch [50/100], Loss: 0.3458
Epoch [60/100], Loss: 0.3354
Epoch [70/100], Loss: 0.3268
Epoch [80/100], Loss: 0.3193
Epoch [90/100], Loss: 0.3125
Epoch [100/100], Loss: 0.3064


In [46]:
student_model.eval()
with torch.no_grad():
    test_logits = student_model(X_test)
    predictions = torch.argmax(test_logits, dim=1)

# Calculate accuracy
accuracy = accuracy_score(y_test.numpy(), predictions.numpy())
print(f"Student Model Accuracy: {accuracy:.4f}")


Student Model Accuracy: 0.8598


In [19]:
torch.save(student_model.state_dict(), "student_model.h5")


In [20]:
teacher_size = sum(p.numel() for p in teacher_model.parameters())
student_size = sum(p.numel() for p in student_model.parameters())
print(f"Teacher Model Size: {teacher_size} parameters")
print(f"Student Model Size: {student_size} parameters")

AttributeError: 'Sequential' object has no attribute 'parameters'

In [47]:
teacher_size = teacher_model.count_params()

In [48]:
teacher_size

90647

In [49]:
student_size = sum(p.numel() for p in student_model.parameters())
student_size

2967

In [50]:
teacher_size = teacher_model.count_params()
student_size = sum(p.numel() for p in student_model.parameters())

print(f"Teacher Model Size: {teacher_size} parameters")
print(f"Student Model Size: {student_size} parameters")


Teacher Model Size: 90647 parameters
Student Model Size: 2967 parameters
