In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('../datasets/QCM7.csv', sep=';')

df.head()

Unnamed: 0,0.799_0.201,0.799_0.201.1,0.700_0.300,0.700_0.300.1,0.600_0.400,0.600_0.400.1,0.501_0.499,0.501_0.499.1,0.400_0.600,0.400_0.600.1,1-Octanol,1-Propanol,2-Butanol,2-propanol,1-isobutanol
0,-11.23,-14.21,-18.71,-22.65,-27.32,-32.39,-35.28,-40.72,-40.63,-49.87,1,0,0,0,0
1,-12.44,-15.44,-21.13,-25.53,-30.45,-36.22,-36.85,-45.11,-45.4,-55.16,1,0,0,0,0
2,-13.75,-16.72,-24.0,-28.29,-33.91,-39.95,-40.96,-49.43,-49.91,-60.11,1,0,0,0,0
3,-15.05,-17.98,-26.42,-30.65,-36.56,-43.0,-43.81,-52.73,-53.89,-64.35,1,0,0,0,0
4,-16.67,-19.11,-28.69,-32.58,-39.62,-45.68,-47.16,-55.65,-57.2,-67.68,1,0,0,0,0


In [12]:
df.shape

(25, 15)

In [13]:
df.iloc[:, -5:].sum()

1-Octanol       5
1-Propanol      5
2-Butanol       5
2-propanol      5
1-isobutanol    5
dtype: int64

In [14]:
X = df.iloc[:, :10]

In [15]:
class_names = df.columns[-5:]
class_names

Index(['1-Octanol', '1-Propanol', '2-Butanol', '2-propanol', '1-isobutanol'], dtype='object')

In [16]:
y = df[class_names].idxmax(axis=1)
y

0        1-Octanol
1        1-Octanol
2        1-Octanol
3        1-Octanol
4        1-Octanol
5       1-Propanol
6       1-Propanol
7       1-Propanol
8       1-Propanol
9       1-Propanol
10       2-Butanol
11       2-Butanol
12       2-Butanol
13       2-Butanol
14       2-Butanol
15      2-propanol
16      2-propanol
17      2-propanol
18      2-propanol
19      2-propanol
20    1-isobutanol
21    1-isobutanol
22    1-isobutanol
23    1-isobutanol
24    1-isobutanol
dtype: object

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=40, stratify=y)

In [20]:
bagging_clf = BaggingClassifier(
    estimator=DecisionTreeClassifier(random_state=42),
    n_estimators=15,
    random_state=42
)

In [21]:
bagging_clf.fit(X_train,y_train)
y_pred_bag = bagging_clf.predict(X_test)

In [23]:
print(f'Bagging Accuracy:', accuracy_score(y_test,y_pred_bag))
print(f'Classification Report:', classification_report(y_test,y_pred_bag))

Bagging Accuracy: 1.0
Classification Report:               precision    recall  f1-score   support

   1-Octanol       1.00      1.00      1.00         1
  1-Propanol       1.00      1.00      1.00         1
1-isobutanol       1.00      1.00      1.00         1
   2-Butanol       1.00      1.00      1.00         1
  2-propanol       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5



In [24]:
ada_clf = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(random_state=42),
    n_estimators=15,
    random_state=42
)

In [29]:
ada_clf.fit(X_train,y_train)
y_pred_ada = ada_clf.predict(X_test)

In [30]:
print(f'Accuracy for Ada boost : ',accuracy_score(y_test,y_pred_ada))

Accuracy for Ada boost :  1.0


In [31]:
print(f'Boosting Accuracy:', accuracy_score(y_test,y_pred_ada))
print(f'Classification Report:', classification_report(y_test,y_pred_ada))

Boosting Accuracy: 1.0
Classification Report:               precision    recall  f1-score   support

   1-Octanol       1.00      1.00      1.00         1
  1-Propanol       1.00      1.00      1.00         1
1-isobutanol       1.00      1.00      1.00         1
   2-Butanol       1.00      1.00      1.00         1
  2-propanol       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5

