In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import sklearn
from sklearn.datasets import load_breast_cancer
import io
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [3]:
data = load_breast_cancer()
data.target_names, data.feature_names
X = preprocessing.MinMaxScaler().fit_transform(data.data)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.85)

X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_train, y_train, train_size=0.75)
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_train, y_train, train_size=0.75)
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(X_train, y_train, train_size=0.75)

In [4]:
from sklearn import tree
from sklearn import metrics
model_1 = tree.DecisionTreeClassifier(max_depth=1)
model_1 = model_1.fit(X_train_1, y_train_1)
y_pred_1 = model_1.predict(X_test_1)
print("A ",metrics.accuracy_score(y_test_1, y_pred_1))
print("P ",metrics.precision_score(y_test_1,y_pred_1))
print("R ",metrics.recall_score(y_test_1,y_pred_1))
print("F1 ",metrics.f1_score(y_test_1,y_pred_1))

A  0.9421487603305785
P  0.9431818181818182
R  0.9764705882352941
F1  0.9595375722543352


In [5]:
from sklearn.naive_bayes import GaussianNB
model_2 = GaussianNB().fit(X_train_2, y_train_2)
y_pred_2 = model_2.predict(X_test_2)
print("A ",metrics.accuracy_score(y_test_2, y_pred_2))
print("P ",metrics.precision_score(y_test_2,y_pred_2))
print("R ",metrics.recall_score(y_test_2,y_pred_2))
print("F1 ",metrics.f1_score(y_test_2,y_pred_2))

A  0.9338842975206612
P  0.9342105263157895
R  0.9594594594594594
F1  0.9466666666666667


In [6]:
from sklearn.naive_bayes import MultinomialNB
model_3 = MultinomialNB().fit(X_train_3, y_train_3)
y_pred_3 = model_3.predict(X_test_3)
print("A ",metrics.accuracy_score(y_test_3, y_pred_3))
print("P ",metrics.precision_score(y_test_3,y_pred_3))
print("R ",metrics.recall_score(y_test_3,y_pred_3))
print("F1 ",metrics.f1_score(y_test_3,y_pred_3))

A  0.859504132231405
P  0.8247422680412371
R  1.0
F1  0.903954802259887


In [7]:
from sklearn.linear_model import LogisticRegression
y_train_final = []
X_train_final = []
y_train_final_1 = model_1.predict(X_train)
y_train_final_2 = model_2.predict(X_train)
y_train_final_3 = model_3.predict(X_train)

for i in range(len(y_train_final_1)):
  X_train_final.append([y_train_final_1[i], y_train_final_2[i], y_train_final_3[i]])
  y_train_final.append(y_train[i])
model_final = LogisticRegression().fit(X_train_final, y_train_final)
X_test_final = []
y_test_final = []
y_test_final_1 = model_1.predict(X_test)
y_test_final_2 = model_2.predict(X_test)
y_test_final_3 = model_3.predict(X_test)

for i in range(len(y_test_final_1)):
  X_test_final.append([y_test_final_1[i], y_test_final_2[i], y_test_final_3[i]])
  y_test_final.append(y_test[i])

y_pred_final = model_final.predict(X_test_final)
print("A ",metrics.accuracy_score(y_test_final, y_pred_final))
print("P",metrics.precision_score(y_test_final,y_pred_final))
print("R ",metrics.recall_score(y_test_final,y_pred_final))
print("F1 ",metrics.f1_score(y_test_final,y_pred_final))

A  0.9186046511627907
P 0.9534883720930233
R  0.8913043478260869
F1  0.9213483146067417


In [8]:
# stacking
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier
estimators = [('gnb', GaussianNB()),('dt', tree.DecisionTreeClassifier(max_depth=1)),('mnb', MultinomialNB())]
model_stacked = StackingClassifier(estimators=estimators).fit(X_train, y_train)
y_pred_stacked = model_stacked.predict(X_test)
print("A ",metrics.accuracy_score(y_test, y_pred_stacked))
print("P ",metrics.precision_score(y_test,y_pred_stacked))
print("R ",metrics.recall_score(y_test,y_pred_stacked))
print("F1 ",metrics.f1_score(y_test,y_pred_stacked))


model_bagged = BaggingClassifier(max_features = 0.67).fit(X_train, y_train)
y_pred_bagged = model_bagged.predict(X_test)
print("A ",metrics.accuracy_score(y_test, y_pred_bagged))
print("P ",metrics.precision_score(y_test,y_pred_bagged))
print("R ",metrics.recall_score(y_test,y_pred_bagged))
print("F1 ",metrics.f1_score(y_test,y_pred_bagged))

A  0.9302325581395349
P  0.9347826086956522
R  0.9347826086956522
F1  0.9347826086956522
A  0.9651162790697675
P  0.9777777777777777
R  0.9565217391304348
F1  0.967032967032967


In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
model_random_forest = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
y_pred_random_forest = model_random_forest.predict(X_test)
print("A ",metrics.accuracy_score(y_test, y_pred_random_forest))
print("P ",metrics.precision_score(y_test,y_pred_random_forest))
print("R ",metrics.recall_score(y_test,y_pred_random_forest))
print("F1 ",metrics.f1_score(y_test,y_pred_random_forest))


model_adaboost = AdaBoostClassifier(n_estimators=100, learning_rate=1).fit(X_train, y_train)
y_pred_adaboost = model_adaboost.predict(X_test)
print("A ",metrics.accuracy_score(y_test, y_pred_adaboost))
print("P ",metrics.precision_score(y_test,y_pred_adaboost))
print("R ",metrics.recall_score(y_test,y_pred_adaboost))
print("F1 ",metrics.f1_score(y_test,y_pred_adaboost))

A  0.9767441860465116
P  0.9782608695652174
R  0.9782608695652174
F1  0.9782608695652174
A  0.9883720930232558
P  1.0
R  0.9782608695652174
F1  0.989010989010989


In [10]:
from google.colab import drive
drive.mount('/content/drive')
datasets = pd.read_csv('/content/drive/MyDrive/sem6 myfolder/mldatasets/Contrete.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
X = datasets.iloc[:, :-1].values
y = datasets.iloc[:, -1].values
print("\n\nInput : \n", X)
print("\n\nOutput: \n", y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(datasets, y, test_size = 0.25, random_state = 97)



Input : 
 [[ 540.     0.     0.  ... 1040.   676.    28. ]
 [ 540.     0.     0.  ... 1055.   676.    28. ]
 [ 332.5  142.5    0.  ...  932.   594.   270. ]
 ...
 [ 148.5  139.4  108.6 ...  892.4  780.    28. ]
 [ 159.1  186.7    0.  ...  989.6  788.9   28. ]
 [ 260.9  100.5   78.3 ...  864.5  761.5   28. ]]


Output: 
 [79.99 61.89 40.27 ... 23.7  32.77 32.4 ]


In [13]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
DTR=DecisionTreeRegressor(max_depth=1)
RegModel = AdaBoostRegressor(n_estimators=50, base_estimator=DTR ,learning_rate=1)
AB=RegModel.fit(X_train,y_train)
y_pred=AB.predict(X_test)
from sklearn import metrics
print("Accuracy: ",RegModel.score(X_test, y_test))

Accuracy:  0.7289003240757699


In [14]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics
from sklearn.metrics import mean_squared_error
X,y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 97)
DTR=DecisionTreeRegressor(max_depth=10)
RegModel = AdaBoostRegressor(n_estimators=100, base_estimator=DTR ,learning_rate=1)
AB=RegModel.fit(X_train,y_train)
y_pred=AB.predict(X_test)
print("Accuracy: ",RegModel.score(X_test, y_test))
print("MSE: ",mean_squared_error(y_test,y_pred))

Accuracy:  0.4265410495938441
MSE:  3686.970956154572
