In [58]:
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt


In [53]:
X_train = np.loadtxt("dataset/train/X_train.txt")
y_train = np.loadtxt("dataset/train/y_train.txt", dtype=int) - 1 # Adjust labels to start at 0
X_test = np.loadtxt("dataset/test/X_test.txt")
y_test = np.loadtxt("dataset/test/y_test.txt", dtype=int) - 1 # Adjust labels to start at 0
train_set = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
test_set = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))


In [54]:
def hinge_loss():
    return

def train(train_set, model, criterion, epochs=10, lr=0.01, reg_lambda=0.001, print_epoch=True):
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    optimizer = optim.SGD(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X, y in train_loader:
            optimizer.zero_grad()
            outputs = model(X)
            if criterion == hinge_loss:
                loss = criterion(outputs, y, model, reg_lambda)
            else:
                loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if print_epoch:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")
    return total_loss

def test(test_set, model):
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)
    model.eval()
    all_preds = []
    y_test = test_set.tensors[1].numpy()
    
    with torch.no_grad():
        for X, y in test_loader:
            outputs = model(X)
            preds = torch.argmax(outputs, axis=1).numpy()
            all_preds.extend(preds)
    print("Accuracy Score:", accuracy_score(y_test, all_preds))
    print(classification_report(y_test, all_preds))


In [None]:
class FCNet(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(FCNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.fc(x)

input_size = X_train.shape[1]
# model = FCNet(input_size)
criterion = nn.CrossEntropyLoss()

# train(train_set, model, criterion)
# test(test_set, model)

lrs = np.linspace(0.01, 0.1, 5)

best_loss = float('inf')
best_lr = None

for lr in lrs:  
    model = FCNet(input_size)
    train_loss = train(train_set, model, criterion, lr=lr, print_epoch=False)
    print(f"Learning rate: {lr:.4f}, Loss: {train_loss:.4f}")
        
    if train_loss < best_loss:
        best_loss = train_loss
        best_lr = lr

print(f"Best Learning Rate: {best_lr:.4f}, Best Loss: {best_loss:.4f}")

fc_model = FCNet(input_size)
train(train_set, model, criterion, lr=best_lr)
test(test_set, model)

Learning rate: 0.01, Loss: 44.576213389635086
Learning rate: 0.0325, Loss: 22.39434902369976
Learning rate: 0.05500000000000001, Loss: 19.76302805542946
Learning rate: 0.0775, Loss: 18.706474594771862
Learning rate: 0.1, Loss: 23.30612576752901
Best Learning Rate: 0.0775, Best Loss: 18.7065
Epoch 1/10, Loss: 15.7290
Epoch 2/10, Loss: 15.4013
Epoch 3/10, Loss: 14.1330
Epoch 4/10, Loss: 14.5722
Epoch 5/10, Loss: 13.6389
Epoch 6/10, Loss: 11.9251
Epoch 7/10, Loss: 13.7429
Epoch 8/10, Loss: 12.1428
Epoch 9/10, Loss: 12.7130
Epoch 10/10, Loss: 12.5556
Accuracy Score: 0.8900576857821514
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       496
           1       0.92      0.96      0.94       471
           2       0.97      0.93      0.95       420
           3       1.00      0.48      0.65       491
           4       0.68      1.00      0.81       532
           5       1.00      1.00      1.00       537

    accuracy                    

In [56]:
def train_svm_model(X_train, y_train, X_test, y_test):
    svm_model = SVC(kernel='rbf', C=1.0)
    svm_model.fit(X_train, y_train)
    svm_preds = svm_model.predict(X_test)
    
    print("SVM Model Accuracy:", accuracy_score(y_test, svm_preds))
    print(classification_report(y_test, svm_preds))
    return svm_model

svm_model = train_svm_model(X_train, y_train, X_test, y_test)

SVM Model Accuracy: 0.9504580929759077
              precision    recall  f1-score   support

           0       0.94      0.98      0.96       496
           1       0.93      0.96      0.94       471
           2       0.99      0.91      0.95       420
           3       0.94      0.89      0.91       491
           4       0.91      0.95      0.93       532
           5       1.00      1.00      1.00       537

    accuracy                           0.95      2947
   macro avg       0.95      0.95      0.95      2947
weighted avg       0.95      0.95      0.95      2947



In [None]:
class SVM(nn.Module):
    def __init__(self, input_size, num_classes=6):
        super(SVM, self).__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.linear(x)

def hinge_loss(output, y, model, reg_lambda):
    y_one_hot = torch.zeros_like(output)
    y_one_hot.scatter_(1, y.view(-1, 1), 1) # Convert labels 0-5 to one-hot encoding
    y_one_hot = 2 * y_one_hot - 1  # Convert to {-1, 1} for each class

    loss = torch.sum(torch.clamp(1 - y_one_hot * output, min=0))
    reg_term = reg_lambda * torch.sum(model.linear.weight ** 2)
    return loss + reg_term

input_size = X_train.shape[1]
# model = SVM(input_size)
criterion = hinge_loss

# train(train_set, svm_model, criterion)
# test(test_set, svm_model)

# Hyperparameter search
lrs = np.linspace(0.0001, 0.001, 5)
lambdas = np.linspace(0.0001, 0.001, 5)

best_loss = float('inf')
best_lr = None
best_lambda = None

for lr in lrs:
    for reg_lambda in lambdas:   
        model = SVM(input_size)
        train_loss = train(train_set, model, criterion, lr=lr, reg_lambda=reg_lambda, print_epoch=False)
        print(f"Learning rate: {lr:.4f}, Lambda: {reg_lambda:.4f}, Loss: {train_loss:.4f}")
        
        if train_loss < best_loss:
            best_loss = train_loss
            best_lr = lr
            best_lambda = reg_lambda

print(f"Best Learning Rate: {best_lr:.4f}, Best Lambda: {best_lambda:.4f}, Best Loss: {best_loss:.4f}")

model = SVM(input_size)
train(train_set, model, criterion, lr=best_lr, reg_lambda=best_lambda)
test(test_set, model)


Learning rate: 0.0001, Lambda: 0.0001, Loss: 4535.143295288086
Learning rate: 0.0001, Lambda: 0.000325, Loss: 4536.546081542969
Learning rate: 0.0001, Lambda: 0.00055, Loss: 4508.832475662231
Learning rate: 0.0001, Lambda: 0.0007750000000000001, Loss: 4468.438283920288
Learning rate: 0.0001, Lambda: 0.001, Loss: 4527.887773513794
Learning rate: 0.000325, Lambda: 0.0001, Loss: 2981.1092824935913
Learning rate: 0.000325, Lambda: 0.000325, Loss: 3047.769739151001
Learning rate: 0.000325, Lambda: 0.00055, Loss: 3004.663818359375
Learning rate: 0.000325, Lambda: 0.0007750000000000001, Loss: 2992.687195777893
Learning rate: 0.000325, Lambda: 0.001, Loss: 3035.6607751846313
Learning rate: 0.00055, Lambda: 0.0001, Loss: 2935.7483010292053
Learning rate: 0.00055, Lambda: 0.000325, Loss: 2847.57759475708
Learning rate: 0.00055, Lambda: 0.00055, Loss: 2948.243597507477
Learning rate: 0.00055, Lambda: 0.0007750000000000001, Loss: 2937.0466623306274
Learning rate: 0.00055, Lambda: 0.001, Loss: 2931

In [None]:
model = RandomForestClassifier(
    n_estimators=100,  # Number of trees
    max_depth=None,    # Maximum depth of trees (default)
    random_state=42    # Set for reproducibility
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9257
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.97      0.93       496
           1       0.90      0.89      0.89       471
           2       0.97      0.87      0.91       420
           3       0.91      0.89      0.90       491
           4       0.90      0.92      0.91       532
           5       1.00      1.00      1.00       537

    accuracy                           0.93      2947
   macro avg       0.93      0.92      0.92      2947
weighted avg       0.93      0.93      0.93      2947

