### Support Vector Machines
Using a support vector machine to classify data based on the speech act

In [414]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

Generating the sentences and labels from the Excel sheet

In [415]:
raw_data = pd.read_csv("data/labeled_sentences.csv")

sentences = raw_data["Sentence"] 
labels = raw_data["Labels"]

Separating the data into training and test data

In [416]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.3)

## Preprocessing
Vectorising based on the Tf-idf values in the data set

In [417]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(X_train)
test_vectors = vectorizer.transform(X_test)

Selecting the Linear Support Vector Classification model

In [418]:
from sklearn import svm
from sklearn.metrics import classification_report
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
classifier_linear.fit(train_vectors, y_train)
prediction_linear = classifier_linear.predict(test_vectors)

Evaluating Results

In [419]:
# results
report = classification_report(y_test, prediction_linear)
print(f"Accuracy: {accuracy_score(y_test, prediction_linear):.2f}")
print(report)

Accuracy: 0.37
                         precision    recall  f1-score   support

           Action words       0.12      0.01      0.02        89
         Communications       0.47      0.67      0.55       667
             Fire words       0.09      0.03      0.04        70
Intel (from newspapers)       0.00      0.00      0.00        33
        Reasoning words       0.12      0.03      0.04        74
           Rescue words       0.09      0.05      0.06       107
    Situation Awareness       0.16      0.15      0.16       346

               accuracy                           0.37      1386
              macro avg       0.15      0.13      0.13      1386
           weighted avg       0.29      0.37      0.32      1386
