# Speech Act Classification using a CNN

In [39]:
import pandas as pd

In [40]:
raw_data = pd.read_csv("../data/adjusted-labels-comms-exclusive.csv")

filtered_data = raw_data[raw_data["Label"] != "Other"]

In [41]:
sentences = filtered_data["Sentence"]
labels = filtered_data["Label"]

In [42]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2, random_state=47)

In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create feature vectors
vectorizer = TfidfVectorizer(min_df=5,
                             max_df=0.8,
                             sublinear_tf=True,
                             use_idf=True)
train_vectors = vectorizer.fit_transform(X_train)
test_vectors = vectorizer.transform(X_test)

In [44]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier

mlp_classifier = MLPClassifier(hidden_layer_sizes=(200,), max_iter=500, alpha=0.001, solver='adam', verbose=10, random_state=42, tol=0.0001)

# Step 4: Training
mlp_classifier.fit(train_vectors, y_train)

# Step 5: Evaluation
predictions = mlp_classifier.predict(test_vectors)
print(f"Accuracy: {accuracy_score(y_test, predictions):.2f}")
print(classification_report(y_test, predictions))

Iteration 1, loss = 2.36271615
Iteration 2, loss = 2.24690361
Iteration 3, loss = 2.13885572
Iteration 4, loss = 2.03283852
Iteration 5, loss = 1.91124160
Iteration 6, loss = 1.77890463
Iteration 7, loss = 1.65111775
Iteration 8, loss = 1.53382804
Iteration 9, loss = 1.43137939
Iteration 10, loss = 1.34097493
Iteration 11, loss = 1.26341874
Iteration 12, loss = 1.19766247
Iteration 13, loss = 1.13927788
Iteration 14, loss = 1.09371546
Iteration 15, loss = 1.05237555
Iteration 16, loss = 1.01688329
Iteration 17, loss = 0.98737489
Iteration 18, loss = 0.96225588
Iteration 19, loss = 0.93953606
Iteration 20, loss = 0.92056936
Iteration 21, loss = 0.90240084
Iteration 22, loss = 0.88686142
Iteration 23, loss = 0.87451614
Iteration 24, loss = 0.86176296
Iteration 25, loss = 0.85264223
Iteration 26, loss = 0.84031434
Iteration 27, loss = 0.83123435
Iteration 28, loss = 0.82493112
Iteration 29, loss = 0.81813265
Iteration 30, loss = 0.81059050
Iteration 31, loss = 0.80496266
Iteration 32, los