### Support Vector Machines
Using a support vector machine to classify data based on the speech act

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import CountVectorizer

Generating the sentences and labels from the Excel sheet

In [47]:
from preprocessing import get_sentences_labels

# file_path = "data/interrater_data.xlsx"
file_path = "data/combined_data_set.xlsx"

sentences, labels = get_sentences_labels(file_path)

Sentences:  ['alpha, charlie. bravo check.', "alpha you're loud_and_clear.", 'charlie. good to me', 'charlie, charlie one, bravo radio check. ', 'yeah. charlie good to me. over']
I have sentences:  70
Correct Labels:  ['Request for Situation', 'Statement of Situation', 'Statement of Situation', 'Request for Situation', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Statement of Situation', 'Statement of Situation', 'Statement of Situation', 'Statement of Action', 'Statement of Intent', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Not Classified', 'Statement of Intent', 'Not Classified', 'Statement of Intent', 'Not Classified', 'Statement of Intent', 'Statement of Intent', 'Statement of Prediction', 'Not Classified', 'Not Classified', 'Statement of Intent', 'Statement of Intent', 'Statement of Intent', 'Statement of Intent', 'Statement of Prediction', 'Statement of Sit

## Preprocessing
Vectorising the data

In [48]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(sentences)
y = labels

Encode the labels

In [49]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)

Convert to tensors

In [51]:
X = torch.tensor(X.toarray(), dtype=torch.float32)
y = torch.tensor(y_encoded, dtype=torch.float32)

Defining the SVM model using TensorFlow

In [52]:
class SVM(nn.Module):
    def __init__(self):
        super(SVM, self).__init__()
        self.linear = nn.Linear(X.shape[1], 1)

    def forward(self, x):
        return self.linear(x)

Define the SVM model

In [53]:
model = SVM()

Define loss function and optimiser

In [54]:
criterion = nn.HingeEmbeddingLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

Train the model

In [55]:
epochs = 300
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs.squeeze(), y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')

TypeError: linear(): argument 'input' (position 1) must be Tensor, not csr_matrix

## Evaluation

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Convert model predictions to labels
def predictions_to_labels(predictions):
    return torch.round(torch.sigmoid(predictions)).detach().numpy()

# Make predictions
with torch.no_grad():
    predicted_labels = predictions_to_labels(model(X))

# Convert ground truth labels to numpy array
true_labels = y.detach().numpy()

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro', zero_division=1)
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

In [None]:
# from sklearn.metrics import classification_report

# print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=0))

Decoding and evaluating with classification report

In [None]:
def decode_labels(encoded_labels):
    return label_encoder.inverse_transform(encoded_labels)