In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [3]:
# Load the data file.
data = pd.read_csv('ctg.csv', sep=';');
df = data.fillna(method='pad') 
df.head()

Unnamed: 0,LB,AC,FM,UC,ASTV,ALTV,DL,DS,DP,DR,...,Max,Nmax,Nzeros,Mode,Mean,Median,Variance,Tendency,CLASS,NSP
0,120.0,0.0,0.0,0.0,73.0,43.0,0.0,0.0,0.0,0.0,...,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,9.0,2.0
1,132.0,4.0,0.0,4.0,17.0,0.0,2.0,0.0,0.0,0.0,...,198.0,6.0,1.0,141.0,136.0,140.0,12.0,0.0,6.0,1.0
2,133.0,2.0,0.0,5.0,16.0,0.0,2.0,0.0,0.0,0.0,...,198.0,5.0,1.0,141.0,135.0,138.0,13.0,0.0,6.0,1.0
3,134.0,2.0,0.0,6.0,16.0,0.0,2.0,0.0,0.0,0.0,...,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,6.0,1.0
4,132.0,4.0,0.0,5.0,16.0,0.0,0.0,0.0,0.0,0.0,...,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,2.0,1.0


In [4]:
# Seperate features and labels.

df_x = df.iloc[:, 0:20]
df_y = df.iloc[:, 20:21]
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.3, random_state=4)

In [5]:
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(x_train, y_train)

DecisionTreeClassifier()

In [6]:
dt_classifier.score(x_train, y_train)

0.9993288590604027

In [7]:
dt_classifier.score(x_test, y_test)

0.8262910798122066

In [8]:
#Bagging with Decision Tree algorithm.

bagging_classifire = BaggingClassifier(DecisionTreeClassifier(),
                                       max_samples= 0.6, 
                                       n_estimators = 100, 
                                       random_state=4)

bagging_classifire.fit(x_train, y_train.values.ravel())

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.6,
                  n_estimators=100, random_state=4)

In [9]:
bagging_classifire.score(x_train, y_train)

0.9778523489932885

In [10]:
bagging_classifire.score(x_test, y_test)

0.8513302034428795

In [11]:
#Random Forest
random_forest_classifier = RandomForestClassifier(n_estimators=50)
random_forest_classifier.fit(x_train, y_train.values.ravel())


RandomForestClassifier(n_estimators=50)

In [12]:
random_forest_classifier.score(x_train, y_train)

0.9993288590604027

In [13]:
random_forest_classifier.score(x_test, y_test)

0.8622848200312989

In [14]:
#Boosting - Ada Boost Alogorithm is used.

ada_boost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=6),
                               n_estimators = 20, 
                               learning_rate = 0.7)

ada_boost.fit(x_train,y_train.values.ravel())

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=6),
                   learning_rate=0.7, n_estimators=20)

In [15]:
ada_boost.score(x_train, y_train)

0.9919463087248322

In [16]:
ada_boost.score(x_test, y_test)

0.8482003129890454

In [143]:
# Voting classification with different base alogorithms

dt = DecisionTreeClassifier()
svm = SVC(kernel = 'poly', degree = 2 )
nb = GaussianNB()

voting_classifire = VotingClassifier( estimators= [('dt',dt),('svm',svm), ('nb', nb)], voting = 'hard')
voting_classifire.fit(x_train,y_train.values.ravel())

VotingClassifier(estimators=[('dt', DecisionTreeClassifier()),
                             ('svm', SVC(degree=2, kernel='poly')),
                             ('nb', GaussianNB())])

In [144]:
voting_classifire.score(x_train, y_train)

0.8664429530201342

In [145]:
voting_classifire.score(x_test, y_test)

0.7762128325508607