## 1. Stacking

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

data = load_breast_cancer()

data_df = pd.DataFrame(data = data.data,
                       columns = data.feature_names)

X_train, X_rem, y_train, y_rem = train_test_split(data.data, data.target, random_state=97, train_size=0.6)

X_valid, X_test, y_valid, y_test = train_test_split(X_rem, y_rem, random_state=97, test_size=0.3)

print(data.data.size)


17070


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

models = dict()
# preds = list()
models['lr'] = LogisticRegression(max_iter=100000)
models['cart'] = DecisionTreeClassifier()
models['bayes'] = GaussianNB()



for model in models:
  models[model].fit(X_train,y_train)

In [None]:
pred1 = models['lr'].predict(X_valid)
pred2 = models['cart'].predict(X_valid)
pred3 = models['bayes'].predict(X_valid)

test_preds1 = models['lr'].predict(X_test)
test_preds2 = models['cart'].predict(X_test)
test_preds3 = models['bayes'].predict(X_test)

data_df_new = pd.DataFrame(data = X_valid,
                       columns = data.feature_names)
data_df_new['lr'] = pred1
data_df_new['cart'] = pred2
data_df_new['bayes'] = pred3


print(data_df_new.info())
print(data_df_new.head())

In [None]:
pred1 = models['lr'].predict(X_valid)
pred2 = models['cart'].predict(X_valid)
pred3 = models['bayes'].predict(X_valid)

test_preds1 = models['lr'].predict(X_test)
test_preds2 = models['cart'].predict(X_test)
test_preds3 = models['bayes'].predict(X_test)

data_df_new = pd.DataFrame(data = X_valid,
                       columns = data.feature_names)
data_df_new['lr'] = pred1
data_df_new['cart'] = pred2
data_df_new['bayes'] = pred3


print(data_df_new.info())
print(data_df_new.head())

In [None]:
print(data_df.head())

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst radius  worst texture  worst perimeter  \
0           

In [None]:
train_stack = np.column_stack((pred1,pred2,pred3))
test_stack = np.column_stack((test_preds1,test_preds2,test_preds3))

In [None]:
final_model = LogisticRegression(max_iter=100000)

final_model.fit(train_stack,y_valid)

LogisticRegression(max_iter=100000)

In [None]:
final_predictions = final_model.predict(test_stack)

In [None]:
from sklearn import metrics

print("Accuracy: ",metrics.accuracy_score(y_test, final_predictions))
print("Precision: ",metrics.precision_score(y_test, final_predictions))
print("Recall: ",metrics.recall_score(y_test, final_predictions))

Accuracy:  0.9855072463768116
Precision:  0.9791666666666666
Recall:  1.0


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

def model_Evaluate(model, y_test, final_predictions):
  print(classification_report(y_test, final_predictions))


model_Evaluate(final_model, y_test, final_predictions)

## StackingClassifier from sklearn

In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True)

data=load_breast_cancer()
data_df = pd.DataFrame(data = data.data,
                       columns = data.feature_names)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=97, train_size=0.8)


model1 = LogisticRegression(max_iter=100000)
model2 = DecisionTreeClassifier()
model3 = GaussianNB()

print(y_test)

[1 0 1 1 0 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 0 0 1 1 0 1 1 1 1 0 0 0 1 0 0 0 0
 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0
 1 1 1 0 1 0 0 1 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 0 1 0 1 0 0 1 1 0 1 0 1 0
 0 0 0]


In [None]:
estimators = [
     ('lr', model1),
     ('cart', model2),
     ('bayes', model3)
]

final_model = LogisticRegression(max_iter=100000)
sclf = StackingClassifier(estimators=estimators,
                            final_estimator=final_model,
                            cv=10)

In [None]:
sclf.fit(X_train, y_train)

StackingClassifier(cv=10,
                   estimators=[('lr', LogisticRegression(max_iter=100000)),
                               ('cart', DecisionTreeClassifier()),
                               ('bayes', GaussianNB())],
                   final_estimator=LogisticRegression(max_iter=100000))

In [None]:
prediction = sclf.predict(X_test)

In [None]:
print("Accuracy: ",sclf.score(X_test, y_test))
print("Precision: ",metrics.precision_score( y_test, prediction))
print("Accuracy: ",metrics.recall_score( y_test, prediction))

Accuracy:  0.956140350877193
Precision:  0.9558823529411765
Accuracy:  0.9701492537313433


## Ada Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True)

data=load_breast_cancer()
data_df = pd.DataFrame(data = data.data,
                       columns = data.feature_names)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=97, train_size=0.8)


model1 = LogisticRegression(max_iter=100000)
model2 = DecisionTreeClassifier()
model3 = GaussianNB()

estimators = [
     ('lr', model1),
     ('cart', model2),
     ('bayes', model3)
]

In [None]:
abc = AdaBoostClassifier(learning_rate=1)
abc.fit(X_train, y_train)

prediction = abc.predict(X_test)

In [None]:
from sklearn import metrics

print("Accuracy: ",abc.score(X_test, y_test))
print("Precision: ",metrics.precision_score(y_test, prediction))
print("Recall: ",metrics.recall_score( y_test, prediction))

Accuracy:  0.9473684210526315
Precision:  0.9552238805970149
Recall:  0.9552238805970149


## Adaboost Regression on concrete_data.csv

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
datasets = pd.read_csv('/content/drive/MyDrive/datasets/Contrete.csv')

In [None]:
X = datasets.iloc[:, :-1].values

# Only last column, 0 for 1st column and -1 for last colum,-2 for 2nd last column
y = datasets.iloc[:, -1].values
print("\n\nInput : \n", X)
print("\n\nOutput: \n", y)



Input : 
 [[ 540.     0.     0.  ... 1040.   676.    28. ]
 [ 540.     0.     0.  ... 1055.   676.    28. ]
 [ 332.5  142.5    0.  ...  932.   594.   270. ]
 ...
 [ 148.5  139.4  108.6 ...  892.4  780.    28. ]
 [ 159.1  186.7    0.  ...  989.6  788.9   28. ]
 [ 260.9  100.5   78.3 ...  864.5  761.5   28. ]]


Output: 
 [79.99 61.89 40.27 ... 23.7  32.77 32.4 ]


In [None]:
from sklearn.model_selection import train_test_split

#split data set into train and test sets
X_train, X_test, y_train, y_test = train_test_split(datasets, y, test_size = 0.25, random_state = 97)

print(y_test)

In [None]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

 
#Choosing Decision Tree with 1 level as the weak learner
DTR=DecisionTreeRegressor(max_depth=1)
RegModel = AdaBoostRegressor(n_estimators=50, base_estimator=DTR ,learning_rate=1)

In [None]:
AB=RegModel.fit(X_train,y_train)
y_pred=AB.predict(X_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# model_Evaluate(AB, y_test, predictions) --> doesn't work for continuous values
print("Accuracy: ",RegModel.score(X_test, y_test))

Accuracy:  0.739255564940275


In [None]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

X,y = load_diabetes(return_X_y=True)

#split data set into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 97)


from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

 
#Choosing Decision Tree with 1 level as the weak learner
DTR=DecisionTreeRegressor(max_depth=10)
RegModel = AdaBoostRegressor(n_estimators=100, base_estimator=DTR ,learning_rate=1)

AB=RegModel.fit(X_train,y_train)
y_pred=AB.predict(X_test)

from sklearn import metrics
from sklearn.metrics import mean_squared_error
# model_Evaluate(AB, y_test, predictions) --> doesn't work for continuous values
print("Accuracy: ",RegModel.score(X_test, y_test))

print("Mean Square Error: ",mean_squared_error(y_test,y_pred))

Accuracy:  0.3957073474596924
Mean Square Error:  3885.2117616365017
