In [1]:
import torch
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from datasets import DateDataset
from model import FeatureExtractor

In [2]:
def train_classifier(model, optimizer, loss_fn, train_loader, val_loader, epochs, save_path):
    train_losses = [None] * epochs
    val_losses = [None] * epochs

    for epoch in range(epochs):
        model.train()
        
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for x_batch,y_batch in train_loader:
            x_batch = x_batch.float()
            predictions = model(x_batch)
        
            loss = loss_fn(predictions, y_batch.long())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 
            
            _, class_prediction = torch.max(predictions, dim=1)
            train_correct += (class_prediction == y_batch).sum().item()
            train_total += y_batch.shape[0]
            train_loss += loss.item()
        
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        for x_batch,y_batch in val_loader:
            x_batch = x_batch.float()

            with torch.no_grad():
                predictions = model(x_batch)
                loss = loss_fn(predictions, y_batch.long())

                _, class_prediction = torch.max(predictions, dim=1)
                val_correct += (class_prediction == y_batch).sum().item()

                val_total += y_batch.shape[0]
                val_loss += loss.item()

        train_accuracy = train_correct / train_total
        train_loss /= train_total
        train_losses[epoch] = train_loss

        val_accuracy = val_correct / val_total
        val_loss /= val_total
        val_losses[epoch] = val_loss
        
        print(f'Epoch {epoch+1:<2} / {epochs}: Train Loss: {train_loss:.2f}  Train Accuracy: {train_accuracy*100:.2f}%  Validation Loss: {val_loss:.2f} Validaton Accuracy: {val_accuracy*100:.2f}%')
    
    torch.save(model.state_dict(), save_path)
    return model

In [42]:
in_features = 34
out_features = 34
n_classes = 7
f_name = None
model = FeatureExtractor((in_features, 1024, 1024, out_features, n_classes), torch.nn.functional.relu)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

train_data = DateDataset('date-data/train.csv')
val_data = DateDataset('date-data/test.csv')

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=False, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False, pin_memory=True)

epochs = 10

model = train_classifier(model, optimizer, loss_fn, train_loader, val_loader, epochs, f_name)

Epoch 1  / 10: Train Loss: 0.01  Train Accuracy: 59.33%  Validation Loss: 0.01 Validaton Accuracy: 82.78%
Epoch 2  / 10: Train Loss: 0.00  Train Accuracy: 84.12%  Validation Loss: 0.00 Validaton Accuracy: 86.67%
Epoch 3  / 10: Train Loss: 0.00  Train Accuracy: 89.14%  Validation Loss: 0.00 Validaton Accuracy: 91.11%
Epoch 4  / 10: Train Loss: 0.00  Train Accuracy: 92.06%  Validation Loss: 0.00 Validaton Accuracy: 90.56%
Epoch 5  / 10: Train Loss: 0.00  Train Accuracy: 94.15%  Validation Loss: 0.00 Validaton Accuracy: 93.33%
Epoch 6  / 10: Train Loss: 0.00  Train Accuracy: 94.99%  Validation Loss: 0.00 Validaton Accuracy: 93.33%
Epoch 7  / 10: Train Loss: 0.00  Train Accuracy: 95.40%  Validation Loss: 0.00 Validaton Accuracy: 93.33%
Epoch 8  / 10: Train Loss: 0.00  Train Accuracy: 96.80%  Validation Loss: 0.00 Validaton Accuracy: 91.67%
Epoch 9  / 10: Train Loss: 0.00  Train Accuracy: 97.08%  Validation Loss: 0.00 Validaton Accuracy: 93.89%
Epoch 10 / 10: Train Loss: 0.00  Train Accurac

In [None]:
# model = FeatureExtractor((in_features, 1024, 7), torch.nn.functional.relu)
# model.load_state_dict(torch.load('model.pt'))

### 1) Raw Features with end to end NN classifier
Since we trained the NN classifier in the previous step using the train data this step will evaluate the classification accuracy on the test/validation set

In [43]:
m = len(val_data)
y_true = np.zeros(m)
y_pred = np.zeros(m)

for i in range(m):
    x,y = val_data[i]
    prediction = model(torch.from_numpy(x))
    # get index of class w/ max prob and set its value to be y_pred[i]
    y_pred[i] = torch.argmax(prediction).item()
    y_true[i] = y

acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'Accuracy {acc * 100:.2f}%')
print(f'F1 Score: {f1:.4f}')

Accuracy 93.89%
F1 Score: 0.9373


### 2) & 3) Raw Features with SVM Classifier (Linear & RBF Kernel)

In this step we will train the linear SVM using the train set and then evaluate its classification accuracy on the test/validation set.

In [44]:
from sklearn.svm import SVC

# Train
train_data = np.genfromtxt('date-data/train.csv', delimiter=',', skip_header=1)
X_train = train_data[:, :-1]
y_train = train_data[:, -1]


# Test
test_data = np.genfromtxt('date-data/test.csv', delimiter=',', skip_header=1)
X_test = test_data[:, :-1]
y_test = test_data[:, -1]


linear_svm = SVC(kernel="linear", C =10).fit(X_train, y_train)
rbf_svm = SVC(kernel="rbf", C=10).fit(X_train, y_train)

linear_pred = linear_svm.predict(X_test)
rbf_pred = rbf_svm.predict(X_test)

linear_acc = accuracy_score(y_test, linear_pred)
rbf_acc = accuracy_score(y_test, rbf_pred)
linear_f1 = f1_score(y_test, linear_pred, labels=[0,1,2,3,4,5,6], average="weighted")
rbf_f1 = f1_score(y_test, rbf_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'Linear Accuracy: {linear_acc * 100:.2f}%')
print(f'Linear F1 Score: {linear_f1:.4f}')
print(f'RBF Accuracy: {rbf_acc * 100:.2f}%')
print(f'RBF F1 Score: {rbf_f1:.4f}')

Linear Accuracy: 93.33%
Linear F1 Score: 0.9314
RBF Accuracy: 92.78%
RBF F1 Score: 0.9243


### 4) & 5) NN Extracted Features with SVM Classifier (Linear & RBF Kernel)

We will construct a training dataset by running the train set through the neural network set to extract features. We will then train the SVMs on this dataset. We will then construct a test dataset by running the test set through the neural network to extract new features. We will then evaluate the classification accuracy of the SVMs on this dataset.

In [45]:
X_train = np.zeros((len(train_data),out_features))
y_train = np.zeros(len(train_data))

X_test = np.zeros((len(val_data),out_features))
y_test = np.zeros(len(val_data))

model.set_extract_feature_mode(True)

with torch.no_grad():
    i = 0
    for x,y in train_loader:
        predictions = model(x.float()).numpy()
        
        for r in range(predictions.shape[0]):
            X_train[i, :] = predictions[r,:]
            y_train[i] = y[r]
            i += 1

linear_svm = SVC(kernel="linear").fit(X_train, y_train)
rbf_svm = SVC(kernel="rbf").fit(X_train, y_train)

with torch.no_grad():
    i = 0
    for x,y in val_loader:
        predictions = model(x.float()).numpy()

        for r in range(predictions.shape[0]):
            X_test[i,:] = predictions[r,:]
            y_test[i] = y[r]
            i += 1

linear_acc = linear_svm.score(X_test, y_test)
rbf_acc = rbf_svm.score(X_test, y_test)

linear_pred = linear_svm.predict(X_test)
rbf_pred = rbf_svm.predict(X_test)

linear_f1 = f1_score(y_test, linear_pred, labels=[0,1,2,3,4,5,6], average="weighted")
rbf_f1 = f1_score(y_test, rbf_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'Linear Accuracy: {linear_acc * 100:.2f}%')
print(f'Linear F1 Score: {linear_f1:.4f}')
print(f'RBF Accuracy: {rbf_acc * 100:.2f}%')
print(f'RBF F1 Score: {rbf_f1:.4f}')

Linear Accuracy: 95.56%
Linear F1 Score: 0.9541
RBF Accuracy: 92.22%
RBF F1 Score: 0.9192


### 6) & 7) PCA Extracted Features with SVM Classifier (Linear & RBF Kernel)



In [35]:
from sklearn.decomposition import PCA

# Train
train_data = np.genfromtxt('date-data/train.csv', delimiter=',', skip_header=1)
X_train = train_data[:, :-1]
y_train = train_data[:, -1]

pca = PCA(n_components=20)
pca = pca.fit(X_train)
X_train = pca.transform(X_train)

# Test
test_data = np.genfromtxt('date-data/test.csv', delimiter=',', skip_header=1)
X_test = test_data[:, :-1]
y_test = test_data[:, -1]

X_test = pca.transform(X_test)

linear_svm = SVC(kernel="linear", C=10).fit(X_train, y_train)
rbf_svm = SVC(kernel="rbf", C=10).fit(X_train, y_train)

linear_pred = linear_svm.predict(X_test)
rbf_pred = rbf_svm.predict(X_test)

linear_acc = accuracy_score(y_test, linear_pred)
rbf_acc = accuracy_score(y_test, rbf_pred)
linear_f1 = f1_score(y_test, linear_pred, labels=[0,1,2,3,4,5,6], average="weighted")
rbf_f1 = f1_score(y_test, rbf_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'PCA Linear Accuracy: {linear_acc * 100:.2f}%')
print(f'PCA Linear F1 Score: {linear_f1:.4f}')
print(f'PCA RBF Accuracy: {rbf_acc * 100:.2f}%')
print(f'PCA RBF F1 Score: {rbf_f1:.4f}')

PCA Linear Accuracy: 93.33%
PCA Linear F1 Score: 0.9321
PCA RBF Accuracy: 92.78%
PCA RBF F1 Score: 0.9243


In [38]:
in_features = 20
n_classes = 7
model = FeatureExtractor((in_features, 1024, 1024, n_classes), torch.nn.functional.relu)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

train_data = DateDataset('date-data/train.csv')
train_data.data = np.hstack((X_train, np.expand_dims(y_train, axis=1)))
val_data = DateDataset('date-data/test.csv')
val_data.data = np.hstack((X_test, np.expand_dims(y_test, axis=1)))

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=False, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False, pin_memory=True)

epochs = 10
model = train_classifier(model, optimizer, loss_fn, train_loader, val_loader, epochs, 'pca-1024-1024.pt')

m = len(val_data)
y_true = np.zeros(m)
y_pred = np.zeros(m)

for i in range(m):
    x,y = val_data[i]
    prediction = model(torch.from_numpy(x.astype(np.float32)))
    # get index of class w/ max prob and set its value to be y_pred[i]
    y_pred[i] = torch.argmax(prediction).item()
    y_true[i] = y

acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'PCA Features NN Accuracy {acc * 100:.2f}%')
print(f'PCA FEatures NN F1 Score: {f1:.4f}')

Epoch 1  / 10: Train Loss: 0.01  Train Accuracy: 60.72%  Validation Loss: 0.01 Validaton Accuracy: 82.78%
Epoch 2  / 10: Train Loss: 0.00  Train Accuracy: 87.19%  Validation Loss: 0.01 Validaton Accuracy: 85.56%
Epoch 3  / 10: Train Loss: 0.00  Train Accuracy: 89.69%  Validation Loss: 0.00 Validaton Accuracy: 87.78%
Epoch 4  / 10: Train Loss: 0.00  Train Accuracy: 92.48%  Validation Loss: 0.00 Validaton Accuracy: 89.44%
Epoch 5  / 10: Train Loss: 0.00  Train Accuracy: 94.71%  Validation Loss: 0.00 Validaton Accuracy: 90.00%
Epoch 6  / 10: Train Loss: 0.00  Train Accuracy: 95.68%  Validation Loss: 0.00 Validaton Accuracy: 92.22%
Epoch 7  / 10: Train Loss: 0.00  Train Accuracy: 96.94%  Validation Loss: 0.00 Validaton Accuracy: 92.78%
Epoch 8  / 10: Train Loss: 0.00  Train Accuracy: 97.63%  Validation Loss: 0.00 Validaton Accuracy: 92.78%
Epoch 9  / 10: Train Loss: 0.00  Train Accuracy: 98.05%  Validation Loss: 0.00 Validaton Accuracy: 92.78%
Epoch 10 / 10: Train Loss: 0.00  Train Accurac