In [1]:

# Bagged Decision Trees for Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

filename = '/content/pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
seed = 7
kfold = KFold(n_splits=10,random_state=seed,shuffle=True) # evaluation method
cart = DecisionTreeClassifier()
num_trees = 100

model = BaggingClassifier(estimator=cart, n_estimators=num_trees, random_state=seed) #Classifier
results = cross_val_score(model, X, Y, cv=kfold)

In [2]:
results

array([0.76623377, 0.75324675, 0.74025974, 0.77922078, 0.80519481,
       0.79220779, 0.66233766, 0.75324675, 0.78947368, 0.73684211])

In [3]:
print(results.mean())

0.7578263841421736


In [4]:
# Random Forest Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

array=dataframe.values
X = array[:,0:8]
Y = array[:,8]

num_trees = 100
max_features = 3 #Total features m=8, K=3 i.e. randomly select 3 features
kfold = KFold(n_splits=10) #Bootstrap

model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)
results = cross_val_score(model, X, Y, cv=kfold)

In [5]:
results

array([0.67532468, 0.79220779, 0.75324675, 0.63636364, 0.79220779,
       0.83116883, 0.83116883, 0.87012987, 0.72368421, 0.76315789])

In [6]:
print(results.mean())

0.766866028708134


In [7]:
# AdaBoost Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
filename = '/content/pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

num_trees = 10 # try 20,30
seed=7
kfold = KFold(n_splits=10, random_state=seed,shuffle=True)
model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
results = cross_val_score(model, X, Y, cv=kfold)

In [8]:
results

array([0.80519481, 0.74025974, 0.67532468, 0.81818182, 0.80519481,
       0.79220779, 0.71428571, 0.75324675, 0.78947368, 0.77631579])

In [9]:
print(results.mean())

0.7669685577580314


In [10]:
# Stacking Ensemble for Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC # Support Vector Classifier
from sklearn.ensemble import VotingClassifier

filename = '/content/pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10)

# create the sub models
estimators = [] # create empty list for different algorithms

model1 = LogisticRegression(max_iter=500)
estimators.append(('logistic', model1))

model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))

model3 = SVC()
estimators.append(('svc', model3))

# create the ensemble model
ensemble = VotingClassifier(estimators)
results = cross_val_score(ensemble, X, Y, cv=kfold)

In [11]:
estimators

[('logistic', LogisticRegression(max_iter=500)),
 ('cart', DecisionTreeClassifier()),
 ('svc', SVC())]

In [12]:
results

array([0.64935065, 0.80519481, 0.72727273, 0.64935065, 0.79220779,
       0.81818182, 0.84415584, 0.84415584, 0.75      , 0.76315789])

In [13]:
print(results.mean())

0.7643028024606973
