In [17]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [18]:
training_data = pd.read_csv("training_data.csv")

In [19]:
training_data.drop(['id','id_str', 'screen_name', 
                    'location', 'description', 
                    'url', 'created_at', 
                    'lang', 'status',
                    'default_profile',
                    'default_profile_image',
                    'has_extended_profile','name'],axis=1,inplace=True)

In [20]:
training_data.head()

Unnamed: 0,followers_count,friends_count,listedcount,favourites_count,verified,statuses_count,bot
0,2925,3,139,0,False,708,1
1,9,0,5,0,False,6,1
2,132,46,24,740,False,7346,1
3,54,1351,0,2,False,6,1
4,1300380,24248,7089,4184,True,8536,0


In [21]:
#To check Performances
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [22]:
#calculate accuracy score, precision, recall, f1 score

def Performance(actual_value , predicted_value):
    accuracy = accuracy_score(actual_value , predicted_value) * 100
    precision = precision_score(actual_value , predicted_value) * 100
    recall = recall_score(actual_value , predicted_value) * 100
    f1 = f1_score(actual_value , predicted_value, average='weighted')
    print('Accuracy is {:.4f}%\n Precision is {:.4f}%\n Recall is {:.4f}%\nF1 Score is {:.4f}\n'.format(accuracy, precision, recall, f1))

In [13]:
#Extracted features

# features = ['followers_count', 'friends_count', 'listedcount', 'favourites_count', 'verified', 'statuses_count','bot']

In [23]:
X = training_data.iloc[:, :-1].values
y = training_data.iloc[:, 6].values

In [25]:
#KNN

from sklearn.neighbors import KNeighborsClassifier as knn

classifier=knn(n_neighbors=5)
classifier.fit(X,y)

bots = training_data[training_data.bot==1]
Nbots = training_data[training_data.bot==0]
 
B = bots.iloc[:,:-1]
B_y = bots.iloc[:,6]
B_pred = classifier.predict(B)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(B_y,B_pred)
Performance(B_y,B_pred)

NB = Nbots.iloc[:,:-1]
NB_y = Nbots.iloc[:,6]
NB_pred = classifier.predict(NB)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(NB_y,NB_pred)
Performance(NB_y,NB_pred)

Accuracy is 89.2473%
 Precision is 100.0000%
 Recall is 89.2473%
F1 Score is 0.9432

Accuracy is 85.9413%
 Precision is 0.0000%
 Recall is 0.0000%
F1 Score is 0.9244



In [10]:
#SVM

from sklearn.svm import SVC
classifier=SVC(kernel='rbf', random_state=0)
classifier.fit(X,y)


bots = training_data[training_data.bot==1]
Nbots = training_data[training_data.bot==0]
 
B = bots.iloc[:,:-1]
B_y = bots.iloc[:,7]
B_pred = classifier.predict(B)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(B_y,B_pred)
Performance(B_y,B_pred)

NB = Nbots.iloc[:,:-1]
NB_y = Nbots.iloc[:,7]
NB_pred = classifier.predict(NB)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(NB_y,NB_pred)
Performance(NB_y,NB_pred)

Accuracy is 30.3763%
 Precision is 100.0000%
 Recall is 30.3763%
F1 Score is 0.4660

Accuracy is 92.4205%
 Precision is 0.0000%
 Recall is 0.0000%
F1 Score is 0.9606



In [11]:
#Random Forest

from sklearn.ensemble import RandomForestClassifier as rf
classifier= rf(n_estimators=10,criterion='entropy',random_state=0)
classifier.fit(X,y)


bots = training_data[training_data.bot==1]
Nbots = training_data[training_data.bot==0]

B = bots.iloc[:,:-1]
B_y = bots.iloc[:,7]
B_pred = classifier.predict(B)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(B_y,B_pred)
Performance(B_y,B_pred)

NB = Nbots.iloc[:,:-1]
NB_y = Nbots.iloc[:,7]
NB_pred = classifier.predict(NB)
 
#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(NB_y,NB_pred)
Performance(NB_y,NB_pred)

Accuracy is 98.2527%
 Precision is 100.0000%
 Recall is 98.2527%
F1 Score is 0.9912

Accuracy is 99.7555%
 Precision is 0.0000%
 Recall is 0.0000%
F1 Score is 0.9988



In [12]:
#Naive Bayes

from sklearn.naive_bayes import GaussianNB as GNB
classifier=GNB()
classifier.fit(X,y)


bots = training_data[training_data.bot==1]
Nbots = training_data[training_data.bot==0]
 
B = bots.iloc[:,:-1]
B_y = bots.iloc[:,7]
B_pred = classifier.predict(B)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(B_y,B_pred)
Performance(B_y,B_pred)

NB = Nbots.iloc[:,:-1]
NB_y = Nbots.iloc[:,7]
NB_pred = classifier.predict(NB)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(NB_y,NB_pred)
Performance(NB_y,NB_pred)

Accuracy is 30.3763%
 Precision is 100.0000%
 Recall is 30.3763%
F1 Score is 0.4660

Accuracy is 92.4205%
 Precision is 0.0000%
 Recall is 0.0000%
F1 Score is 0.9606



In [26]:
#Decision Tree

from sklearn.tree import DecisionTreeClassifier as DTC
import pickle
classifier= DTC(criterion="entropy")
classifier.fit(X,y)
filename = 'dtc_model.sav'
pickle.dump(classifier, open(filename, 'wb'))
bots = training_data[training_data.bot==1]
Nbots = training_data[training_data.bot==0]
training_data.to_html('bots.html')
 
B = bots.iloc[:,:-1]
B_y = bots.iloc[:,6]
B_pred = classifier.predict(B)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(B_y,B_pred)
Performance(B_y,B_pred)

NB = Nbots.iloc[:,:-1]
NB_y = Nbots.iloc[:,6]
NB_pred = classifier.predict(NB)

#Confusionmatrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(NB_y,NB_pred)
Performance(NB_y,NB_pred)

Accuracy is 100.0000%
 Precision is 100.0000%
 Recall is 100.0000%
F1 Score is 1.0000

Accuracy is 100.0000%
 Precision is 0.0000%
 Recall is 0.0000%
F1 Score is 1.0000

