In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, classification_report

# Load and preprocess the data
def load_and_preprocess_data(file_path):
    # Load the dataset
    data = pd.read_csv(file_path)  # Replace with your actual file path

    # Drop unnecessary columns
    data = data.drop(columns=['Unnamed: 0', 'time_ms'], errors='ignore')

    # Features and target labels
    X = data.iloc[:, :-1].values  # Features (MLII, V5)
    y = data.iloc[:, -1].values  # Target labels

    # Encode the target labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    # Find unique classes and their counts
    unique_classes, class_counts = np.unique(y, return_counts=True)
    print("Class Counts BEFORE removal:", dict(zip(unique_classes, class_counts)))

    # Identify classes with only one sample
    single_sample_classes = unique_classes[class_counts == 1]

    # Remove rows corresponding to single-sample classes
    for cls in single_sample_classes:
        data = data[data.iloc[:, -1] != label_encoder.inverse_transform([cls])[0]]

    # Re-extract X and y after removing rows
    X = data.iloc[:, :-1].values
    y = label_encoder.fit_transform(data.iloc[:, -1].values)

    # Find unique classes and their counts after removal
    unique_classes, class_counts = np.unique(y, return_counts=True)
    print("Class Counts AFTER removal:", dict(zip(unique_classes, class_counts)))

    # Feature scaling
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Reshape the data for RNN/LSTM (samples, timesteps, features)
    X = X.reshape((X.shape[0], 1, X.shape[1]))

    return X, y, label_encoder


# Build the teacher model
def build_teacher_model(input_shape, num_classes):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=input_shape),
        Dropout(0.4),
        LSTM(64, return_sequences=False),
        Dropout(0.4),
        Dense(64, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Build the student model
def build_student_model(input_shape, num_classes):
    model = Sequential([
        LSTM(32, return_sequences=False, input_shape=input_shape),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Generate soft targets using the teacher model
def generate_soft_targets(teacher_model, X, temperature=2.0):
    logits = teacher_model.predict(X)
    soft_targets = tf.nn.softmax(logits / temperature, axis=-1)
    return soft_targets


# Define the knowledge distillation loss
def knowledge_distillation_loss(y_true, y_pred, soft_targets, temperature=2.0, alpha=0.5):
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
    soft_loss = tf.keras.losses.kl_divergence(tf.nn.softmax(soft_targets * temperature, axis=-1),
                                             tf.nn.softmax(y_pred * temperature, axis=-1))
    return (1 - alpha) * hard_loss + (alpha * temperature * temperature) * soft_loss


# Train the student model with knowledge distillation
def train_student_with_kd(student_model, X_train, y_train, soft_targets, X_val, y_val, epochs=100, batch_size=32):
    def custom_loss(y_true, y_pred):
        return knowledge_distillation_loss(y_true, y_pred, soft_targets, temperature=2.0, alpha=0.5)

    student_model.compile(optimizer=Adam(learning_rate=0.001), loss=custom_loss, metrics=['accuracy'])

    history = student_model.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
    )
    return history


# Evaluate the model
def evaluate_model(model, X_test, y_test, label_encoder):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred_classes)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    # Classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))


# Main execution
if __name__ == "__main__":
    # Step 1: Load and preprocess the data
    file_path = 'C:/Users/abdulssekyanzi/EDA Dataset.csv/100.csv'  # Replace with your actual file path
    X, y, label_encoder = load_and_preprocess_data(file_path)

    # Step 2: Split the data into train, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

    num_classes = len(np.unique(y))
    input_shape = (X_train.shape[1], X_train.shape[2])

    # Step 3: Convert labels to categorical format
    y_train_cat = to_categorical(y_train, num_classes)
    y_val_cat = to_categorical(y_val, num_classes)
    y_test_cat = to_categorical(y_test, num_classes)

    # Step 4: Train the teacher model
    print("Training the teacher model...")
    teacher_model = build_teacher_model(input_shape, num_classes)
    teacher_history = teacher_model.fit(
        X_train,
        y_train_cat,
        epochs=100,
        batch_size=32,
        validation_data=(X_val, y_val_cat),
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
    )

    # Step 5: Generate soft targets using the teacher model
    print("Generating soft targets...")
    soft_targets = generate_soft_targets(teacher_model, X_train)

    # Step 6: Train the student model with knowledge distillation
    print("Training the student model with knowledge distillation...")
    student_model = build_student_model(input_shape, num_classes)
    train_student_with_kd(student_model, X_train, y_train_cat, soft_targets, X_val, y_val_cat)

    # Step 7: Evaluate the student model
    print("Evaluating the student model...")
    evaluate_model(student_model, X_test, y_test_cat, label_encoder)

Class Counts BEFORE removal: {np.int64(0): np.int64(2), np.int64(1): np.int64(1), np.int64(2): np.int64(1), np.int64(3): np.int64(1), np.int64(4): np.int64(1), np.int64(5): np.int64(1), np.int64(6): np.int64(1), np.int64(7): np.int64(1), np.int64(8): np.int64(1), np.int64(9): np.int64(1), np.int64(10): np.int64(1), np.int64(11): np.int64(1), np.int64(12): np.int64(1), np.int64(13): np.int64(1), np.int64(14): np.int64(1), np.int64(15): np.int64(1), np.int64(16): np.int64(1), np.int64(17): np.int64(1), np.int64(18): np.int64(2), np.int64(19): np.int64(1), np.int64(20): np.int64(3), np.int64(21): np.int64(3), np.int64(22): np.int64(1), np.int64(23): np.int64(2), np.int64(24): np.int64(1), np.int64(25): np.int64(3), np.int64(26): np.int64(3), np.int64(27): np.int64(3), np.int64(28): np.int64(1), np.int64(29): np.int64(1), np.int64(30): np.int64(1), np.int64(31): np.int64(1), np.int64(32): np.int64(2), np.int64(33): np.int64(2), np.int64(34): np.int64(2), np.int64(35): np.int64(4), np.int64

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.