In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from collections import Counter

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

Load the data

In [3]:
X_train_cls = np.load("bert_train_embeddings_cls.npy", allow_pickle=True)
X_test_cls = np.load("bert_test_embeddings_cls.npy", allow_pickle=True)

In [None]:
X_train_mean = np.load("bert_train_embeddings_mean.npy", allow_pickle=True)
X_test_mean = np.load("bert_test_embeddings_mean.npy", allow_pickle=True)

In [4]:
X_train_max = np.load("bert_train_embeddings_max.npy", allow_pickle=True)
X_test_max = np.load("bert_test_embeddings_max.npy", allow_pickle=True)

In [None]:
X_train_attention = np.load("bert_train_embeddings_attention.npy", allow_pickle=True)
X_test_attention = np.load("bert_test_embeddings_attention.npy", allow_pickle=True)

In [5]:
X_train_mixed = np.load("bert_train_embeddings_mixed.npy", allow_pickle=True)
X_test_mixed = np.load("bert_test_embeddings_mixed.npy", allow_pickle=True)

In [None]:
y_train = train["Class Index"]
y_test = train["Class Index"]

In [None]:
# Create the SVM classifier
svc = SVC()

In [None]:
# Define the parameter grid to search over
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'degree': [2, 3, 4]
}

CLS token

In [None]:
# Initialize a list to store the best hyperparameters for each batch
best_params_list = []

batch_size = 1000

num_batches = X_train_cls.shape[0] // batch_size

# Loop through the batches
for i in range(num_batches):
    # Get the batch indices
    start_idx = i * batch_size
    end_idx = min((i+1) * batch_size, X_train_cls.shape[0])

    # Get the batch data
    X_batch = X_train_cls[start_idx:end_idx]
    y_batch = y_train[start_idx:end_idx]

    # Perform grid search using cross-validation on the batch data
    grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)
    grid_search.fit(X_batch, y_batch)

    # Retrieve the best hyperparameters and append them to the list
    best_params_list.append(tuple(sorted(grid_search.best_params_.items())))

In [None]:
# Compute the most common hyperparameters among all batches

# Get the most common set of hyperparameters
common_params = dict(Counter(best_params_list).most_common(1)[0][0])

# Create a new SVM classifier using the common hyperparameters
svc_best = SVC(**common_params)

# Fit the new classifier to the whole training data
svc_best.fit(X_train_cls, y_train)

In [None]:
# Calculate the score of svc_best on the test set
svc_best.score(X_test_cls, y_test)

In [None]:
# Predict the target labels for the test data using the best-trained support vector classifier
y_pred = svc_best.predict(X_test_cls)

In [None]:
# Calculate the F1 score for the predicted labels compared to the true labels
# The average parameter is set to 'macro' to compute the F1 score for each class independently
f1_score(y_test, y_pred, average='macro')

In [None]:
# Calculate the confusion matrix for the predicted labels compared to the true labels
matrx = confusion_matrix(y_test, y_pred)

# Create a ConfusionMatrixDisplay object with the confusion matrix
# Specify the display labels as a list of range(4) to represent the class labels
disp = ConfusionMatrixDisplay(matrx, display_labels = list(range(4)))

# Plot the confusion matrix using the ConfusionMatrixDisplay object
disp.plot()

Mean - pooling

In [None]:
# Initialize a list to store the best hyperparameters for each batch
best_params_list = []

batch_size = 1000

num_batches = X_train_mean.shape[0] // batch_size

# Loop through the batches
for i in range(num_batches):
    # Get the batch indices
    start_idx = i * batch_size
    end_idx = min((i+1) * batch_size, X_train_mean.shape[0])

    # Get the batch data
    X_batch = X_train_mean[start_idx:end_idx]
    y_batch = y_train[start_idx:end_idx]

    # Perform grid search using cross-validation on the batch data
    grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)
    grid_search.fit(X_batch, y_batch)

    # Retrieve the best hyperparameters and append them to the list
    best_params_list.append(tuple(sorted(grid_search.best_params_.items())))

In [None]:
# Compute the most common hyperparameters among all batches

# Get the most common set of hyperparameters
common_params = dict(Counter(best_params_list).most_common(1)[0][0])

# Create a new SVM classifier using the common hyperparameters
svc_best = SVC(**common_params)

# Fit the new classifier to the whole training data
svc_best.fit(X_train_mean, y_train)

In [None]:
# Calculate the score of svc_best on the test set
svc_best.score(X_test_mean, y_test)

In [None]:
# Calculate the F1 score for the predicted labels compared to the true labels
# The average parameter is set to 'macro' to compute the F1 score for each class independently
y_pred = svc_best.predict(X_test_mean)

In [None]:
f1_score(y_test, y_pred, average='macro')

In [None]:
# Calculate the confusion matrix for the predicted labels compared to the true labels
matrx = confusion_matrix(y_test, y_pred)

# Create a ConfusionMatrixDisplay object with the confusion matrix
# Specify the display labels as a list of range(4) to represent the class labels
disp = ConfusionMatrixDisplay(matrx, display_labels = list(range(4)))

# Plot the confusion matrix using the ConfusionMatrixDisplay object
disp.plot()

Max - pooling

In [None]:
# Initialize a list to store the best hyperparameters for each batch
best_params_list = []

batch_size = 1000

num_batches = X_train_max.shape[0] // batch_size

# Loop through the batches
for i in range(num_batches):
    # Get the batch indices
    start_idx = i * batch_size
    end_idx = min((i+1) * batch_size, X_train_max.shape[0])

    # Get the batch data
    X_batch = X_train_max[start_idx:end_idx]
    y_batch = y_train[start_idx:end_idx]

    # Perform grid search using cross-validation on the batch data
    grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)
    grid_search.fit(X_batch, y_batch)

    # Retrieve the best hyperparameters and append them to the list
    best_params_list.append(tuple(sorted(grid_search.best_params_.items())))

In [None]:
# Compute the most common hyperparameters among all batches

# Get the most common set of hyperparameters
common_params = dict(Counter(best_params_list).most_common(1)[0][0])

# Create a new SVM classifier using the common hyperparameters
svc_best = SVC(**common_params)

# Fit the new classifier to the whole training data
svc_best.fit(X_train_max, y_train)

In [None]:
# Calculate the score of svc_best on the test set
svc_best.score(X_test_max, y_test)

In [None]:
# Calculate the F1 score for the predicted labels compared to the true labels
# The average parameter is set to 'macro' to compute the F1 score for each class independently
y_pred = svc_best.predict(X_test_max)

In [None]:
f1_score(y_test, y_pred, average='macro')

In [None]:
# Calculate the confusion matrix for the predicted labels compared to the true labels
matrx = confusion_matrix(y_test, y_pred)

# Create a ConfusionMatrixDisplay object with the confusion matrix
# Specify the display labels as a list of range(4) to represent the class labels
disp = ConfusionMatrixDisplay(matrx, display_labels = list(range(4)))

# Plot the confusion matrix using the ConfusionMatrixDisplay object
disp.plot()

Attention - pooling

In [None]:
# Initialize a list to store the best hyperparameters for each batch
best_params_list = []

batch_size = 1000

num_batches = X_train_attention.shape[0] // batch_size

# Loop through the batches
for i in range(num_batches):
    # Get the batch indices
    start_idx = i * batch_size
    end_idx = min((i+1) * batch_size, X_train_attention.shape[0])

    # Get the batch data
    X_batch = X_train_attention[start_idx:end_idx]
    y_batch = y_train[start_idx:end_idx]

    # Perform grid search using cross-validation on the batch data
    grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)
    grid_search.fit(X_batch, y_batch)

    # Retrieve the best hyperparameters and append them to the list
    best_params_list.append(tuple(sorted(grid_search.best_params_.items())))

In [None]:
# Compute the most common hyperparameters among all batches

# Get the most common set of hyperparameters
common_params = dict(Counter(best_params_list).most_common(1)[0][0])

# Create a new SVM classifier using the common hyperparameters
svc_best = SVC(**common_params)

# Fit the new classifier to the whole training data
svc_best.fit(X_train_attention, y_train)

In [None]:
# Calculate the score of svc_best on the test set
svc_best.score(X_test_attention, y_test)

In [None]:
# Calculate the F1 score for the predicted labels compared to the true labels
# The average parameter is set to 'macro' to compute the F1 score for each class independently
y_pred = svc_best.predict(X_test_attention)

In [None]:
f1_score(y_test, y_pred, average='macro')

In [None]:
# Calculate the confusion matrix for the predicted labels compared to the true labels
matrx = confusion_matrix(y_test, y_pred)

# Create a ConfusionMatrixDisplay object with the confusion matrix
# Specify the display labels as a list of range(4) to represent the class labels
disp = ConfusionMatrixDisplay(matrx, display_labels = list(range(4)))

# Plot the confusion matrix using the ConfusionMatrixDisplay object
disp.plot()

Mixed - pooling

In [None]:
# Initialize a list to store the best hyperparameters for each batch
best_params_list = []

batch_size = 1000

num_batches = X_train_mixed.shape[0] // batch_size

# Loop through the batches
for i in range(num_batches):
    # Get the batch indices
    start_idx = i * batch_size
    end_idx = min((i+1) * batch_size, X_train_mixed.shape[0])

    # Get the batch data
    X_batch = X_train_mixed[start_idx:end_idx]
    y_batch = y_train[start_idx:end_idx]

    # Perform grid search using cross-validation on the batch data
    grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)
    grid_search.fit(X_batch, y_batch)

    # Retrieve the best hyperparameters and append them to the list
    best_params_list.append(tuple(sorted(grid_search.best_params_.items())))

In [None]:
# Compute the most common hyperparameters among all batches

# Get the most common set of hyperparameters
common_params = dict(Counter(best_params_list).most_common(1)[0][0])

# Create a new SVM classifier using the common hyperparameters
svc_best = SVC(**common_params)

# Fit the new classifier to the whole training data
svc_best.fit(X_train_mixed, y_train)

In [None]:
# Calculate the score of svc_best on the test set
svc_best.score(X_test_mixed, y_test)

In [None]:
# Calculate the F1 score for the predicted labels compared to the true labels
# The average parameter is set to 'macro' to compute the F1 score for each class independently
y_pred = svc_best.predict(X_test_mixed)

In [None]:
f1_score(y_test, y_pred, average='macro')

In [None]:
# Calculate the confusion matrix for the predicted labels compared to the true labels
matrx = confusion_matrix(y_test, y_pred)

# Create a ConfusionMatrixDisplay object with the confusion matrix 
# Specify the display labels as a list of range(4) to represent the class labels
disp = ConfusionMatrixDisplay(matrx, display_labels = list(range(4)))

# Plot the confusion matrix using the ConfusionMatrixDisplay object
disp.plot()