## Notebook: logistic_regression.ipynb

This notebook is used for building logistic regression classifiers.

In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

### Data Preparation

In [2]:
X_labels = ['geo_enabled','default_profile','default_profile_image','followers_count','friends_count','favourites_count','listed_count','retweet_post_percent','reply_post_percent','avg_hashtags','avg_urls','avg_mentions','avg_retweets_cnt','avg_reply_cnt']
Y_label = 'identification'

data = pd.read_csv('../datasets/MIB/mib_processed.csv')
X = data[X_labels]
y = data[Y_label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

### Logistic Regression Classifier

**Penalty = L1**

In [3]:
logistic_regression_l1 = LogisticRegression(solver='liblinear', penalty='l1', random_state=0)
l1_y_pred = logistic_regression_l1.fit(X_train, y_train).predict(X_test)
accuracy_score(l1_y_pred, y_test)

0.9613180515759312

In [4]:
print(classification_report(l1_y_pred, y_test))

              precision    recall  f1-score   support

         bot       0.98      0.97      0.97      2527
       human       0.91      0.95      0.93       963

    accuracy                           0.96      3490
   macro avg       0.95      0.96      0.95      3490
weighted avg       0.96      0.96      0.96      3490



### Logistic Regression Classifier

**Penalty = L2**

In [5]:
logistic_regression_l2 = LogisticRegression(solver='liblinear', penalty='l2', random_state=0)
l2_y_pred = logistic_regression_l2.fit(X_train, y_train).predict(X_test)
accuracy_score(l2_y_pred, y_test)

0.9232091690544413

In [6]:
print(classification_report(l2_y_pred, y_test))

              precision    recall  f1-score   support

         bot       0.99      0.91      0.95      2728
       human       0.75      0.98      0.85       762

    accuracy                           0.92      3490
   macro avg       0.87      0.94      0.90      3490
weighted avg       0.94      0.92      0.93      3490

