## Notebook: decision_tree.ipynb
This notebook is used for building decision tree and random forest classifiers.

In [59]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

### Data Preparation

In [61]:
X_labels = ['followers_count','friends_count','favourites_count','verified', 'default_profile_image', 'statuses_count','listed_count']
Y_label = 'identification'

data = pd.read_csv('../datasets/all-datasets.csv')
X = data[X_labels]
y = data[Y_label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42)

### Decision Tree Classifier

**Max depth = 5**

In [62]:
decision_tree = DecisionTreeClassifier(max_depth=5, random_state=0)
dt_y_pred = decision_tree.fit(X_train, y_train).predict(X_test)
accuracy_score(dt_y_pred, y_test)

0.9550744174530248

In [63]:
print(classification_report(dt_y_pred, y_test))

              precision    recall  f1-score   support

         bot       0.98      0.97      0.97     31614
       human       0.81      0.85      0.83      4735

    accuracy                           0.96     36349
   macro avg       0.89      0.91      0.90     36349
weighted avg       0.96      0.96      0.96     36349



### Random Forest Classifier

**Num estimators = 20**\
**Max depth = 4**

In [64]:
random_forest = RandomForestClassifier(n_estimators=20, max_depth=4, random_state=0)
rf_y_pred = random_forest.fit(X_train, y_train).predict(X_test)
accuracy_score(rf_y_pred, y_test)

0.9543041074032298

In [65]:
print(classification_report(rf_y_pred, y_test))

              precision    recall  f1-score   support

         bot       0.97      0.97      0.97     31296
       human       0.84      0.83      0.83      5053

    accuracy                           0.95     36349
   macro avg       0.91      0.90      0.90     36349
weighted avg       0.95      0.95      0.95     36349

