In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier      # ensemble learning technique combines classifiers to improve preformance
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, accuracy_score

In [3]:
data = pd.read_csv('C:/Users/NIKHIL/Desktop/twitterf-main/data/twitter_dataset.csv')
data.head()

Unnamed: 0,name_wt,statuses_count,followers_count,friends_count,favourites_count,listed_count,label
0,0.6,195,19,53,58,0,0
1,0.705882,9,67,555,2,1,0
2,0.916667,20,21,267,0,0,1
3,0.5,28,16,325,0,0,1
4,0.733333,45,20,515,0,0,1


In [4]:
features=[]
for attributes in data.columns:
    if attributes != 'label':
        features.append(attributes)
features

['name_wt',
 'statuses_count',
 'followers_count',
 'friends_count',
 'favourites_count',
 'listed_count']

In [5]:
# features and labels extracted from dataframe
X = data[features]
y = data.label

In [6]:
# Splits the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# then it Defines three base models that will be used as estimators in a stacking ensemble classifier
dt = DecisionTreeClassifier(max_depth=10, random_state=42)
nb = GaussianNB()
nn = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', random_state=42)

In [8]:
# Define the meta model that will be used to combine the predictions of the base models in the stacking
meta = MLPClassifier(hidden_layer_sizes=(16,), activation='relu', solver='adam', random_state=42)

In [9]:
# now stacking classifier combines predictions of all three base models
estimators = [('dt', dt), ('nb', nb), ('nn', nn)]
stacking = StackingClassifier(estimators=estimators, final_estimator=meta)

In [10]:
#Training the stacking classifier
stacking.fit(X_train, y_train)

In [11]:
# Evaluate the stacking classifier comparing its predicted outputs with the true labels
y_pred = stacking.predict(X_val)

In [12]:
y_pred

array([0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,

In [13]:
# computes the confusion matrix for predicted lable
conf_matrix = confusion_matrix(y_val, y_pred)

In [14]:
#true_negative
TN = conf_matrix[0][0]
#false_negative
FN = conf_matrix[1][0]
#false_positive
FP = conf_matrix[0][1]
#true_positive
TP = conf_matrix[1][1]

In [15]:
recall = (TP)/(TP + FN)

In [16]:
precision = (TP)/(TP + FP)

In [17]:
# accuracy score measures the proportion of correctly classified sample 
fmeasure = (2*recall*precision)/(recall+precision)
accuracy = (TP + TN)/(TN + FN + FP + TP)
ErrorRate=(FP+FN)/(TN + FN + FP + TP)

accuracy_score(y_val, y_pred)



0.9893617021276596

In [18]:
print("------ CLASSIFICATION PERFORMANCE OF DECISION TREE MODEL ------ \n"\
      "\n Recall : ", (recall*100) ,"%" \
      "\n Precision : ", (precision*100) ,"%" \
      "\n Accuracy : ", (accuracy*100) ,"%" \
      "\n F-measure : ", (fmeasure*100) ,"%" 
      "\n ErrorRate : ", (ErrorRate*100) ,"%" )

------ CLASSIFICATION PERFORMANCE OF DECISION TREE MODEL ------ 

 Recall :  99.28571428571429 %
 Precision :  98.58156028368793 %
 Accuracy :  98.93617021276596 %
 F-measure :  98.93238434163702 %
 ErrorRate :  1.0638297872340425 %


In [19]:
## Creating Pickle file for Easy Frontend/Client-side Access via server on Flask

In [20]:
import pickle

In [21]:
#trained stacking classifier saved in file by using stacking classifier.pkl
stacking_classifier = 'stacking_classifier.pkl'
with open(stacking_classifier, 'wb') as file:
    pickle.dump(stacking, file)

In [22]:
# then loads a saved stacking classifier to make predictions on a validation dataset
with open(stacking_classifier, 'rb') as file:
    loaded_model = pickle.load(file)

y_pred = loaded_model.predict(X_val)
y_pred

array([0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,