In [26]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [27]:
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [29]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [30]:
#decision_tree_classifier_using_entropy_criterion
from sklearn.tree import DecisionTreeClassifier
dec_en = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
dec_en.fit(X_train, y_train)
y_pred_dec_en=dec_en.predict(X_test)

In [31]:
#random_forest_classifier_using_entropy_criterion
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
rf_classifier.fit(X_train, y_train)
y_pred_rf_en = rf_classifier.predict(X_test)

In [32]:
#bagging_classifier_using_entropy_criterion
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
base_classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
bagging_classifier = BaggingClassifier(base_classifier, n_estimators=10, random_state=0)
bagging_classifier.fit(X_train, y_train)
y_pred_bag_en= bagging_classifier.predict(X_test)

In [33]:
#boosting_classifier_using_entropy_criterion
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
base= DecisionTreeClassifier(criterion='entropy', random_state=0)
adaboost_classifier = AdaBoostClassifier(base, n_estimators=10, random_state=0)
adaboost_classifier.fit(X_train,y_train)
y_pred_boost_en=adaboost_classifier.predict(X_test)


In [34]:
#decision_tree_classifier_using_gini_criterion
dec_gn = DecisionTreeClassifier(criterion='gini', random_state=0)
dec_gn.fit(X_train,y_train)
y_pred_gn=dec_gn.predict(X_test)

In [35]:
#random_forest_classifier_using_gini_criterion
from sklearn.ensemble import RandomForestClassifier
rf_class = RandomForestClassifier(n_estimators=10, criterion='gini', random_state=0)
rf_class.fit(X_train, y_train)
y_pred_rf_gn = rf_class.predict(X_test)


In [36]:
#bagging_classifier_using_gini_criterion
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
base_classifier = DecisionTreeClassifier(criterion='gini', random_state=0)
bagging_class = BaggingClassifier(base_classifier, n_estimators=10, random_state=0)
bagging_class.fit(X_train, y_train)
y_pred_bag_gn= bagging_class.predict(X_test)

In [37]:
#boosting_classifier_using_gini_criterion
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
base_gn= DecisionTreeClassifier(criterion='gini', random_state=0)
adaboost_class = AdaBoostClassifier(base_gn, n_estimators=10, random_state=0)
adaboost_class.fit(X_train,y_train)
y_pred_boost_gn=adaboost_class.predict(X_test)

In [38]:
from sklearn.ensemble import VotingClassifier
voting_classifier = VotingClassifier(
    estimators=[
        ('decision_tree_entropy', dec_en),
        ('decision_tree_gini', dec_gn)
    ],
    voting='hard'
)
voting_classifier.fit(X_train, y_train)
y_pred_votin = voting_classifier.predict(X_test)

In [39]:
#calculating_performances
def calculate_metrics(y_true,y_pred):
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  accuracy = accuracy_score(y_true, y_pred)
  precision = precision_score(y_true, y_pred)
  recall = recall_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred)
  return accuracy,precision,recall,f1


In [40]:
metrics={
    "technique":["no_ensembling","Random Forest","Bagging","Boosting",],
    "accuracy":[calculate_metrics(y_test,y_pred_dec_en)[0],calculate_metrics(y_test,y_pred_rf_en)[0],calculate_metrics(y_test,y_pred_bag_en)[0],calculate_metrics(y_test,y_pred_boost_en)[0]],
    "precision":[calculate_metrics(y_test,y_pred_dec_en)[1],calculate_metrics(y_test,y_pred_rf_en)[1],calculate_metrics(y_test,y_pred_bag_en)[1],calculate_metrics(y_test,y_pred_boost_en)[1]],
    "recall":[calculate_metrics(y_test,y_pred_dec_en)[2],calculate_metrics(y_test,y_pred_rf_en)[2],calculate_metrics(y_test,y_pred_bag_en)[2],calculate_metrics(y_test,y_pred_boost_en)[2]],
    "f1_score":[calculate_metrics(y_test,y_pred_dec_en)[3],calculate_metrics(y_test,y_pred_rf_en)[3],calculate_metrics(y_test,y_pred_bag_en)[3],calculate_metrics(y_test,y_pred_boost_en)[3]]
}

In [41]:
metrics_2={
    "technique":["no_ensembling","Random Forest","Bagging","Boosting",],
    "accuracy":[calculate_metrics(y_test,y_pred_gn)[0],calculate_metrics(y_test,y_pred_rf_gn)[0],calculate_metrics(y_test,y_pred_bag_gn)[0],calculate_metrics(y_test,y_pred_boost_gn)[0]],
    "precision":[calculate_metrics(y_test,y_pred_gn)[1],calculate_metrics(y_test,y_pred_rf_gn)[1],calculate_metrics(y_test,y_pred_bag_gn)[1],calculate_metrics(y_test,y_pred_boost_gn)[1]],
    "recall":[calculate_metrics(y_test,y_pred_gn)[2],calculate_metrics(y_test,y_pred_rf_gn)[2],calculate_metrics(y_test,y_pred_bag_gn)[2],calculate_metrics(y_test,y_pred_boost_gn)[2]],
    "f1_score":[calculate_metrics(y_test,y_pred_gn)[3],calculate_metrics(y_test,y_pred_rf_gn)[3],calculate_metrics(y_test,y_pred_bag_gn)[3],calculate_metrics(y_test,y_pred_boost_gn)[3]]
}

In [42]:
metrics_df=pd.DataFrame(metrics)
metrics_2_df=pd.DataFrame(metrics_2)

In [43]:
print("decision tree using entropy criterion performance\n")
print(metrics_df)

decision tree using entropy crterion performance

       technique  accuracy  precision   recall  f1_score
0  no_ensembling      0.91   0.828571  0.90625  0.865672
1  Random Forest      0.91   0.848485  0.87500  0.861538
2        Bagging      0.91   0.870968  0.84375  0.857143
3       Boosting      0.91   0.828571  0.90625  0.865672


In [44]:
print("decision tree using gini criterion performance\n")
print(metrics_2_df)

decision tree using gini criterion performance

       technique  accuracy  precision  recall  f1_score
0  no_ensembling      0.90   0.823529   0.875  0.848485
1  Random Forest      0.92   0.875000   0.875  0.875000
2        Bagging      0.91   0.848485   0.875  0.861538
3       Boosting      0.90   0.823529   0.875  0.848485


In [46]:


metrics_3={"accuracy":[calculate_metrics(y_test,y_pred_votin)[0]],
           "precision":[calculate_metrics(y_test,y_pred_votin)[1]],
           "recall":[calculate_metrics(y_test,y_pred_votin)[2]],
           "f1_score":[calculate_metrics(y_test,y_pred_votin)[3]]}
metrics_3_df=pd.DataFrame(metrics_3)
print("performance using voting of gini and entropy criterion based decision tree\n")
print(metrics_3_df)

performance using voting of gini and entropy criterion based decision tree

   accuracy  precision   recall  f1_score
0      0.89   0.818182  0.84375  0.830769
