# Task 9: AdaBoost or Gradient Boosting 
- Train an AdaBoostClassifier or GradientBoostingClassifier. 
- Use a suitable dataset. 
- Compare it with Random Forest and Decision Tree in terms of: 
    - Accuracy 
    - F1-score 
    - Training time (optional) 

In [19]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier , RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import seaborn as sns
import time

### LOADING , CLEANING AND DIVIDING OF DATASET

In [24]:
data = sns.load_dataset("titanic")

# filled missing values in age as well as embarked
data.fillna({'age' : data['age'].median()}, inplace=True)
data.fillna({'embarked': data['embarked'].mode()[0]}, inplace=True)


# changing into numbers
data['sex'] = data['sex'].map({'male':0,'female':1})
data["embarked"] = data["embarked"].map({"S": 0, "C": 1, "Q": 2})

X = data[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']]
y = data['survived']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### ADA BOOST CLASSIFIER

In [25]:
start = time.time()
ada_model = AdaBoostClassifier(random_state=42)
ada_model.fit(X_train, y_train)
y_pred_ada = ada_model.predict(X_test)
end = time.time()

time_eda = end-start
accuracy_ada = accuracy_score(y_test, y_pred_ada)
f1_ada = f1_score(y_test, y_pred_ada)

### GRADIENT BOOSTING CLASSIFIER

In [26]:
start = time.time()
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)
end = time.time()

accuracy_gb = accuracy_score(y_test, y_pred_gb)
f1_gb = f1_score(y_test, y_pred_gb)
time_gb = end - start

### DECISION TREE CLASSIFIER

In [27]:
start = time.time()
model_dt = DecisionTreeClassifier(random_state=42)
model_dt.fit(X_train,y_train)
y_pred_dt = model_dt.predict(X_test)
end = time.time()

accuracy_dt = accuracy_score(y_test, y_pred_dt)
f1_dt = f1_score(y_test, y_pred_dt)
time_dt = end - start

### RANDOM FOREST CLASSIFIER

In [28]:
start = time.time()
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train,y_train)
y_pred_rf = model_rf.predict(X_test)
end = time.time()

accuracy_rf = accuracy_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
time_rf = end - start

In [29]:
print("accuracy of AdaBoostClassifier : ",accuracy_ada)
print("accuracy of gradient Boost Classifier : ",accuracy_gb)
print("accuracy of DecisionTreeClassifier : ",accuracy_dt)
print("accuracy of RandomForestClassifier : ",accuracy_rf)

accuracy of AdaBoostClassifier :  0.7988826815642458
accuracy of gradient Boost Classifier :  0.8044692737430168
accuracy of DecisionTreeClassifier :  0.7988826815642458
accuracy of RandomForestClassifier :  0.8268156424581006


In [30]:
print("f1 score of AdaBoostClassifier : ",f1_ada)
print("f1 score of gradient Boost Classifier : ",f1_gb)
print("f1 score of decisionTreeClassifier : ",f1_dt)
print("f1 score of RandomForestClassifier : ",f1_rf)

f1 score of AdaBoostClassifier :  0.7428571428571429
f1 score of gradient Boost Classifier :  0.7407407407407407
f1 score of decisionTreeClassifier :  0.7567567567567568
f1 score of RandomForestClassifier :  0.7832167832167832


In [31]:
print("time taken by AdaBoostClassifier : ",time_eda)
print("time taken by gradient Boost Classifier : ",time_gb)
print("time taken by DecisionTreeClassifier : ",time_dt)
print("time taken by RandomForestClassifier : ",time_rf)

time taken by AdaBoostClassifier :  0.054831504821777344
time taken by gradient Boost Classifier :  0.09036874771118164
time taken by DecisionTreeClassifier :  0.006314754486083984
time taken by RandomForestClassifier :  0.09903264045715332
