### Support Vector Machines
Using a support vector machine to classify data based on the speech act

In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import CountVectorizer


Generating the sentences and labels from the Excel sheet

In [46]:
from preprocessing import get_sentences_labels

sentences, labels = get_sentences_labels()

Sentences:  ['alpha, charlie. bravo check.', "alpha you're loud_and_clear.", 'charlie. good to me', 'charlie, charlie one, bravo radio check. ', 'yeah. charlie good to me. over']
I have sentences:  81
Correct Labels:  ['Request for Situation', 'Statement of Situation', 'Statement of Situation', 'Request for Situation', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Statement of Situation', 'Statement of Situation', 'Statement of Situation', 'Statement of Action', 'Statement of Intent', 'Statement of Situation', 'Request for Situation', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Statement of Situation', 'Statement of Situation', 'Not Classified', 'Not Classified', 'Statement of Situation', 'Statement of Prediction', 'Statement of Intent', 'Not Classified', 'Statement of Intent', 'Statement of Prediction', 'Not Classified', 'Statement of Intent', 'Statement of Prediction', 'Statement of Prediction', 'Not Classified', 'Not Classified', '

## Preprocessing
Vectorising the data

In [47]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(sentences)
y = labels

Encode the labels

In [48]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)

Convert to tensors

In [49]:
X = torch.tensor(X.toarray(), dtype=torch.float32)
y = torch.tensor(y_encoded, dtype=torch.float32)

Defining the SVM model using TensorFlow

In [50]:
class SVM(nn.Module):
    def __init__(self):
        super(SVM, self).__init__()
        self.linear = nn.Linear(X.shape[1], 1)

    def forward(self, x):
        return self.linear(x)

Define the SVM model

In [51]:
model = SVM()

Define loss function and optimiser

In [52]:
criterion = nn.HingeEmbeddingLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

Train the model

In [53]:
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs.squeeze(), y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')

Epoch [10/100], Loss: 0.9115352630615234
Epoch [20/100], Loss: 0.891568660736084
Epoch [30/100], Loss: 0.8716022968292236
Epoch [40/100], Loss: 0.851635754108429
Epoch [50/100], Loss: 0.8316693902015686
Epoch [60/100], Loss: 0.8117028474807739
Epoch [70/100], Loss: 0.791736364364624
Epoch [80/100], Loss: 0.7717699408531189
Epoch [90/100], Loss: 0.751803457736969
Epoch [100/100], Loss: 0.7318369746208191


## Evaluation

In [54]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Convert model predictions to labels
def predictions_to_labels(predictions):
    return torch.round(torch.sigmoid(predictions)).detach().numpy()

# Make predictions
with torch.no_grad():
    predicted_labels = predictions_to_labels(model(X))

# Convert ground truth labels to numpy array
true_labels = y.detach().numpy()

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro', zero_division=1)
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

Accuracy: 0.12345679012345678
Precision: 0.8747795414462082
Recall: 0.14285714285714285
F1-score: 0.03139717425431711


Decoding and evaluating with classification report

In [55]:
def decode_labels(encoded_labels):
    return label_encoder.inverse_transform(encoded_labels)