In [70]:
# Importing stuff

import pandas as pd  # For handling data in DataFrames
import numpy as np  # For numerical operations

# Importing TensorFlow and Keras for building and training neural networks
import tensorflow as tf
from tensorflow.keras.models import Sequential  # Sequential model for stacking layers
from keras.layers import (  # Importing various layers for the model
    Dense, 
    Dropout, 
    Flatten, 
    Conv1D, 
    MaxPooling1D, 
)

import netron

# Importing libraries for Natural Language Processing (NLP)
import nltk  # Natural Language Toolkit for text processing
from nltk.corpus import stopwords  # To filter out common words
from string import punctuation  # To handle punctuation marks
from nltk.stem import WordNetLemmatizer  # For reducing words to their base forms
from nltk.tokenize import word_tokenize  # For splitting text into words

# Importing TfidfVectorizer for converting text data into numerical format
from sklearn.feature_extraction.text import TfidfVectorizer

# Importing processing utilities for model training
from sklearn.model_selection import train_test_split  # For splitting the dataset into training and testing sets
from keras.utils import to_categorical  # For converting labels to a categorical format
from sklearn.preprocessing import LabelEncoder  # For encoding categorical labels into integers


In [71]:
df = pd.read_csv("processedData.csv")

In [72]:
# Preprocessing the comments. Lemmatizing, removing stopwords, etc.s

stopwords_list = stopwords.words('english')

def tokenization(message):
    return word_tokenize(message)

lemmatizer = WordNetLemmatizer()
def lemmatize(message):
    return [lemmatizer.lemmatize(word) for word in message]

def remove_stopwords(message):
  res = []
  for word in message:
    if word not in stopwords_list:
      res.append(word)

  return res

def remove_stopwords(message):
    return [word for word in message if word not in stopwords_list]

def processing(message):
  message = message.lower()
  message = tokenization(message)
  message = remove_stopwords(message)
  message = lemmatize(message)
  cleaned_msg = ' '.join(message)
  return cleaned_msg

df['comments'] = df['comments'].apply(processing)

In [73]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Betrayal',axis=1), 
                                                     df['Betrayal'],
                                                     random_state=33,
                                                     test_size=0.2)

vectorizer = TfidfVectorizer(ngram_range=(1,2),max_features=32)
vectorizer.fit(df['comments'])

tfidf_X_train = vectorizer.transform(X_train['comments'])
tfidf_X_test = vectorizer.transform(X_test['comments'])

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(68, 17) (18, 17) (68,) (18,)


In [74]:
# Building the model. It is a 1-D CNN, fine-tuned for textual classification.

model_CNN = Sequential()
model_CNN.add(Conv1D(filters=32, kernel_size=3, activation='leaky_relu', input_shape=(tfidf_X_train.shape[1],1)))
model_CNN.add(MaxPooling1D(pool_size=3))
model_CNN.add(Flatten())
model_CNN.add(Dense(units=32, activation = 'leaky_relu'))
model_CNN.add(Dense(units = 16, activation = 'leaky_relu'))
model_CNN.add(Dropout(0.2))
model_CNN.add(Dense(units=1, activation='softmax'))

model_CNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_CNN.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [75]:
model_CNN.save("cnn_model.h5")
netron.start("cnn_model.h5")



Serving 'cnn_model.h5' at http://localhost:8080


('localhost', 8080)

In [76]:
# Training
model_CNN.fit(tfidf_X_train, y_train, epochs=100)

Epoch 1/100




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.2764 - loss: 0.6887
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2608 - loss: 0.6745  
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2725 - loss: 0.6578  
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.2647 - loss: 0.6465 
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2920 - loss: 0.6389  
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2686 - loss: 0.6112  
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2764 - loss: 0.6028  
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2530 - loss: 0.5961  
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x1f17b6cdfd0>

In [77]:
y_pred_msg_train = np.array(model_CNN.predict(tfidf_X_train))
y_pred_msg_test = np.array(model_CNN.predict(tfidf_X_test))

#Predictions
X_train['CNN_preds'] = y_pred_msg_train
X_test['CNN_preds'] = y_pred_msg_test

X_train.drop('comments',axis=1,inplace=True) # Dropping since no longer needed
X_test.drop('comments', axis=1 ,inplace=True)
X_train.drop(X_train.columns[0], axis=1, inplace = True) # Dropping unnamed index column
X_test.drop(X_test.columns[0], axis = 1, inplace = True)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


# Neural Network Classification

In [81]:
model_classifier = Sequential()
model_classifier.add(Dense(8, activation='relu', input_shape=(16,)))
model_classifier.add(Dense(8, activation='relu'))
model_classifier.add(Dense(1, activation='sigmoid'))

model_classifier.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [82]:
model_classifier.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])

In [84]:
model_classifier.fit(X_train, y_train,epochs=300)

Epoch 1/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7197 - loss: 0.6877 
Epoch 2/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7080 - loss: 0.6876 
Epoch 3/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7314 - loss: 0.6865 
Epoch 4/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7001 - loss: 0.6869  
Epoch 5/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7080 - loss: 0.6862 
Epoch 6/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7197 - loss: 0.6853 
Epoch 7/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7392 - loss: 0.6840 
Epoch 8/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7236 - loss: 0.6840 
Epoch 9/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x1f17c9485c0>

In [85]:
y_pred = model_classifier.predict([X_test, y_test])

# Since the output layer uses sigmoid, convert predictions to binary labels
y_pred_binary = (y_pred > 0.5).astype(int)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_binary))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        16
           1       0.00      0.00      0.00         2

    accuracy                           0.89        18
   macro avg       0.44      0.50      0.47        18
weighted avg       0.79      0.89      0.84        18



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [86]:
model_classifier.save("classifer.h5")
netron.start("classifer.h5") # Visualzing the classification network.



Serving 'classifer.h5' at http://localhost:8080


('localhost', 8080)