In [1]:
import torch
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from datasets import DateDataset
from model import FeatureExtractor

In [2]:
def train_classifier(model, optimizer, loss_fn, train_loader, val_loader, epochs, save_path):
    train_losses = [None] * epochs
    val_losses = [None] * epochs

    for epoch in range(epochs):
        model.train()
        
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for x_batch,y_batch in train_loader:
            x_batch = x_batch.float()
            predictions = model(x_batch)
        
            loss = loss_fn(predictions, y_batch.long())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 
            
            _, class_prediction = torch.max(predictions, dim=1)
            train_correct += (class_prediction == y_batch).sum().item()
            train_total += y_batch.shape[0]
            train_loss += loss.item()
        
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        for x_batch,y_batch in val_loader:
            x_batch = x_batch.float()

            with torch.no_grad():
                predictions = model(x_batch)
                loss = loss_fn(predictions, y_batch.long())

                _, class_prediction = torch.max(predictions, dim=1)
                val_correct += (class_prediction == y_batch).sum().item()

                val_total += y_batch.shape[0]
                val_loss += loss.item()

        train_accuracy = train_correct / train_total
        train_loss /= train_total
        train_losses[epoch] = train_loss

        val_accuracy = val_correct / val_total
        val_loss /= val_total
        val_losses[epoch] = val_loss
        
        print(f'Epoch {epoch+1:<2} / {epochs}: Train Loss: {train_loss:.2f}  Train Accuracy: {train_accuracy*100:.2f}%  Validation Loss: {val_loss:.2f} Validaton Accuracy: {val_accuracy*100:.2f}%')
    
    torch.save(model.state_dict(), save_path)
    return model

In [3]:
in_features = 34
n_classes = 7
model = FeatureExtractor((in_features, 1024, in_features, n_classes), torch.nn.functional.relu)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

train_data = DateDataset('date-data/train.csv')
val_data = DateDataset('date-data/test.csv')

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=False, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False, pin_memory=True)

epochs = 10

model = train_classifier(model, optimizer, loss_fn, train_loader, val_loader, epochs, 'model.pt')

Epoch 1  / 10: Train Loss: 0.01  Train Accuracy: 56.69%  Validation Loss: 0.01 Validaton Accuracy: 80.00%
Epoch 2  / 10: Train Loss: 0.00  Train Accuracy: 81.34%  Validation Loss: 0.00 Validaton Accuracy: 88.33%
Epoch 3  / 10: Train Loss: 0.00  Train Accuracy: 88.30%  Validation Loss: 0.00 Validaton Accuracy: 88.33%
Epoch 4  / 10: Train Loss: 0.00  Train Accuracy: 91.78%  Validation Loss: 0.00 Validaton Accuracy: 90.00%
Epoch 5  / 10: Train Loss: 0.00  Train Accuracy: 92.48%  Validation Loss: 0.00 Validaton Accuracy: 92.22%
Epoch 6  / 10: Train Loss: 0.00  Train Accuracy: 93.45%  Validation Loss: 0.00 Validaton Accuracy: 91.11%
Epoch 7  / 10: Train Loss: 0.00  Train Accuracy: 93.87%  Validation Loss: 0.00 Validaton Accuracy: 90.56%
Epoch 8  / 10: Train Loss: 0.00  Train Accuracy: 94.85%  Validation Loss: 0.00 Validaton Accuracy: 90.56%
Epoch 9  / 10: Train Loss: 0.00  Train Accuracy: 95.40%  Validation Loss: 0.00 Validaton Accuracy: 91.11%
Epoch 10 / 10: Train Loss: 0.00  Train Accurac

In [7]:
model = FeatureExtractor((in_features, 1024, in_features, n_classes), torch.nn.functional.relu)
model.load_state_dict(torch.load('test.pt'))

<All keys matched successfully>

### 1) Raw Features with end to end NN classifier
Since we trained the NN classifier in the previous step using the train data this step will evaluate the classification accuracy on the test/validation set

In [None]:
m = len(val_data)
y_true = np.zeros(m)
y_pred = np.zeros(m)

for i in range(m):
    x,y = val_data[i]
    prediction = model(torch.from_numpy(x))
    # get index of class w/ max prob and set its value to be y_pred[i]
    y_pred[i] = torch.argmax(prediction).item()
    
acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, labels=[0,1,2,3,4,5,6], average="weighted")

print(f'Accuracy {acc * 100:.2f}%')
print(f'F1 Score: {f1:.4f}')

### 2) & 3) Raw Features with SVM Classifier (Linear & RBF Kernel)

In this step we will train the linear SVM using the train set and then evaluate its classification accuracy on the test/validation set.

In [None]:
from sklearn.svm import SVC

# Train
train_data = np.genfromtxt('date-data/train.csv')
X_train = train_data[:, :-1]
y_train = train_data[:, -1]


# Test
test_data = np.genfromtxt('date-data/test.csv')
X_test = test_data[:, :-1]
y_test = test_data[:, -1]

linear_svm = SVC(kernel="linear").fit(X_train, y_train)
rbf_svm = SVC(kernel="rbf").fit(X_train, y_train)

linear_acc = linear_svm.score(X_test, y_test)
rbf_acc = rbf_svm.score(X_test, y_test)

### 4) & 5) NN Extracted Features with SVM Classifier (Linear & RBF Kernel)

We will construct a training dataset by running the train set through the neural network set to extract features. We will then train the SVMs on this dataset. We will then construct a test dataset by running the test set through the neural network to extract new features. We will then evaluate the classification accuracy of the SVMs on this dataset.

In [None]:
# TODO