In [1]:
import pandas as pd

### Import data

In [2]:
data = pd.read_csv('preprocessed_data.csv')
data = data[['text_lemmatized','sentiment']]
data = data.dropna()
data

Unnamed: 0,text_lemmatized,sentiment
0,switchfoot httptwitpiccom2y1zl awww bummer sho...,0
1,upset cannot update facebook texting might cry...,0
2,kenichan dived many time ball managed save 50 ...,0
3,whole body feel itchy like fire,0
4,nationwideclass behaving mad cannot see,0
...,...,...
1599995,woke school best feeling ever,1
1599996,thewdbcom cool hear old walt interview ♫ httpb...,1
1599997,ready mojo makeover ask detail,1
1599998,happy 38th birthday boo alll time tupac amaru ...,1


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.feature_extraction.text import TfidfVectorizer

import numpy as np
import nltk

### TF-IDF

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data["text_lemmatized"],data["sentiment"],test_size=0.2,shuffle=True)

In [5]:
tfidf_vectorizer = TfidfVectorizer(use_idf=True)
X_train_vectors_tfidf = tfidf_vectorizer.fit_transform(X_train) 
X_test_vectors_tfidf = tfidf_vectorizer.transform(X_test)

### Multi Layer Perceptron (MLP) classifier

In [6]:
mlp = MLPClassifier(random_state=1, hidden_layer_sizes= (5,), max_iter=10, activation= 'relu', solver= 'adam').fit(X_train_vectors_tfidf,y_train)



In [8]:
y_predict = mlp.predict(X_test_vectors_tfidf)
y_prob = mlp.predict_proba(X_test_vectors_tfidf)[:,1]
print(classification_report(y_test,y_predict))
print('Confusion Matrix:',confusion_matrix(y_test, y_predict))
 
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
print('AUC:', roc_auc)

              precision    recall  f1-score   support

           0       0.77      0.76      0.76    159852
           1       0.76      0.77      0.77    160074

    accuracy                           0.76    319926
   macro avg       0.76      0.76      0.76    319926
weighted avg       0.76      0.76      0.76    319926

Confusion Matrix: [[121567  38285]
 [ 37108 122966]]
AUC: 0.842294185408639


In [9]:
import joblib

In [10]:
joblib.dump(mlp,'mlp_model.pkl')

['mlp_model.pkl']