Use the titanic dataset you used in Q1. Now use AdaBoost ensemble method to create classification model. Compare the accuracy of this model with the ensemble model obtained in Q1.


In [1]:
#Importing titanic dataset
import pandas as pd
titanic_df=pd.read_csv("https://raw.githubusercontent.com/kajalpanda1/DatasetRepository/master/Titanic.csv")

In [2]:
#Checking for NULL values
titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  156 non-null    int64  
 1   Survived     156 non-null    int64  
 2   Pclass       156 non-null    int64  
 3   Name         156 non-null    object 
 4   Sex          156 non-null    object 
 5   Age          126 non-null    float64
 6   SibSp        156 non-null    int64  
 7   Parch        156 non-null    int64  
 8   Ticket       156 non-null    object 
 9   Fare         156 non-null    float64
 10  Cabin        31 non-null     object 
 11  Embarked     155 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 14.8+ KB


In [3]:
#Accessing column names of titanic dataset
titanic_df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [4]:
#Dropping ineffective columns from the titanic dataset
titanic_df=titanic_df.drop(columns=['PassengerId','Name','Ticket','Fare','Cabin'])

In [5]:
#Accessing records after dropping columns
titanic_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,male,22.0,1,0,S
1,1,1,female,38.0,1,0,C
2,1,3,female,26.0,0,0,S
3,1,1,female,35.0,1,0,S
4,0,3,male,35.0,0,0,S


In [6]:
#Handling NULL values in Age column
titanic_df['Age']=titanic_df['Age'].fillna(titanic_df['Age'].median())

In [7]:
#Handling NULL values in Embarked column
titanic_df['Embarked']=titanic_df['Embarked'].fillna('ffill')

In [8]:
#Handling categorical values to extract correspond numerical values for Sex and Embarked column
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
titanic_df['Sex']=le.fit_transform(titanic_df['Sex'])
titanic_df['Embarked']=le.fit_transform(titanic_df['Embarked'])

In [9]:
#Extracting features and target dataframes from the titanic dataset
feature_df=titanic_df.iloc[:,1:].values
target_df=titanic_df.iloc[:,0].values

In [10]:
#Standardizing the features of the titanic dataset
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
feature_df=ss.fit_transform(feature_df)

In [11]:
#Splitting titanic dataset into training and testing set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(feature_df,target_df,test_size=0.2,random_state=3)

In [12]:
#Building Decision Tree Classifier model
from sklearn.tree import DecisionTreeClassifier
titanic_tree=DecisionTreeClassifier(max_depth=5,random_state=10)
model_dt=titanic_tree.fit(x_train,y_train)

In [13]:
#Building Logistic Regressor model
from sklearn.linear_model import LogisticRegression
lgr=LogisticRegression(solver='liblinear',random_state=10) 
model_lgr=lgr.fit(x_train,y_train)

In [36]:
#Building Support Vector Classifier model
from sklearn import svm
svc=svm.SVC(kernel='linear',C=0.1,probability=True)
model_svc=svc.fit(x_train,y_train)

In [37]:
#ENSEMBLE VOTING CLASSIFIER
from sklearn.ensemble import VotingClassifier
estimators=[('decision tree', model_dt), ('logistic regression', model_lgr), ('svm', model_svc)]
ensemble = VotingClassifier(estimators, voting='hard')

#Fitting model with training instances
ensemble.fit(x_train, y_train)

VotingClassifier(estimators=[('decision tree',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=5,
                                                     max_features=None,
                                                     max_leaf_nodes=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
                                                     presort='deprecated',
                                                     random_state=1

In [38]:
#Training and testing accuracies for voting classifier model
print('Training Accuracy:',ensemble.score(x_train,y_train))
print('Testing Accuracy:',ensemble.score(x_test, y_test))

Training Accuracy: 0.8064516129032258
Testing Accuracy: 0.84375


In [62]:
#Importing adaboostclassifier
from sklearn.ensemble import AdaBoostClassifier
#Implementing adaboost classifier with base model as decision tree classifier
abc=AdaBoostClassifier(n_estimators=300, base_estimator=model_dt, learning_rate=3.0)
abc_dt=abc.fit(x_train,y_train)

In [63]:
#Training and testing accuracies for adaboost classifier model
print('Training Accuracy:',abc_dt.score(x_train,y_train))
print('Testing Accuracy:',abc_dt.score(x_test, y_test))

Training Accuracy: 0.75
Testing Accuracy: 0.78125


In [68]:
#Implementing adaboost classifier with base model as svm classifier
abc1=AdaBoostClassifier(n_estimators=500, base_estimator=model_svc)
abc_svc=abc1.fit(x_train,y_train)

In [69]:
#Training and testing accuracies for adaboost classifier model
print('Training Accuracy:',abc_svc.score(x_train,y_train))
print('Testing Accuracy:',abc_svc.score(x_test, y_test))

Training Accuracy: 0.6370967741935484
Testing Accuracy: 0.71875


In [70]:
#Implementing adaboost classifier with base model as logistic regressor
abc2=AdaBoostClassifier(n_estimators=500, base_estimator=model_lgr)
abc_lgr=abc2.fit(x_train,y_train)

In [71]:
#Training and testing accuracies for adaboost classifier model
print('Training Accuracy:',abc_lgr.score(x_train,y_train))
print('Testing Accuracy:',abc_lgr.score(x_test, y_test))

Training Accuracy: 0.8064516129032258
Testing Accuracy: 0.84375
