In [1]:
from collections import Counter
import numpy as np

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        distances = [np.sqrt(np.sum((x - x_train) ** 2)) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

    def get_params(self, deep=True):
        return {'k': self.k}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self




In [5]:
import os
import pickle
import dill
from joblib import load


with open('Models/AdaBoost_model.pkl', 'rb') as file:
    AdaBoost_model = pickle.load(file)
with open('Models/HGB_model.pkl', 'rb') as file:
    HGB_model = pickle.load(file)
with open('Models/ET_model.pkl', 'rb') as file:
    ExtraTrees_model = pickle.load(file)
with open('Models/SVM_model.pkl', 'rb') as file:
    SVM_model = pickle.load(file)
with open('Models/LR_model.pkl', 'rb') as file:
    LR_model = pickle.load(file)
with open('Models/DT_colon.pkl', 'rb') as file:
    DT_model = pickle.load(file)
model = load('Models/KNN_model.pkl')



In [31]:
import pandas as pd
# Encode labels
from sklearn.calibration import LabelEncoder

data=pd.read_csv('Data/colon-dataset-processed.csv')

le = LabelEncoder()
data['Class'] = le.fit_transform(data['Class'])
#split the data into X and y
X=data.drop('Class',axis=1)
y=data['Class']

#split the data into train and test
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state = 42)

In [33]:
#evaluate the models
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

#AdaBoost
AdaBoost_pred=AdaBoost_model.predict(X_test)
print('AdaBoost')
print(accuracy_score(y_test,AdaBoost_pred))


#HGB
HGB_pred=HGB_model.predict(X_test)
print('HGB')
print(accuracy_score(y_test,HGB_pred))

#ExtraTrees
ExtraTrees_pred=ExtraTrees_model.predict(X_test)
print('ExtraTrees')
print(accuracy_score(y_test,ExtraTrees_pred))


#SVM
SVM_pred=SVM_model.predict(X_test)
print('SVM')
print(accuracy_score(y_test,SVM_pred))


#LogisticRegression
LogisticRegression_pred=LR_model.predict(X_test)
print('LogisticRegression')
print(accuracy_score(y_test,LogisticRegression_pred))

#DecisionTree
DT_pred=DT_model.predict(X_test)
print('DecisionTree')
print(accuracy_score(y_test,DT_pred))

#KNN
KNN_pred=model.predict(X_test.values)
print('KNN')
print(accuracy_score(y_test,KNN_pred))




AdaBoost
0.8695652173913043
HGB
0.8695652173913043
ExtraTrees
0.8695652173913043
RandomForest
0.9565217391304348
SVM
0.9130434782608695
LogisticRegression
0.8695652173913043
DecisionTree
0.8695652173913043
KNN
0.8695652173913043
XGboost
0.782608695652174


<h1>Stacking</h1>

In [40]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression

meta_model = RandomForestClassifier(n_estimators=100, random_state=42)

stack_models=[
    ('SVM', SVM_model),
    ('RF', RandomForest_model),
    ('ET', ExtraTrees_model),
    ('HGB', HGB_model),
    ('AdaBoost', AdaBoost_model),
    ('LR', LogisticRegression_model),
    ('DT', DT_model),
    ('XGboost', XGboost_model),
    ('KNN', model)
]

stacking_model = StackingClassifier(estimators=stack_models, final_estimator=meta_model)

stacking_model.fit(X_train, y_train)

predictions = stacking_model.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
accuracy

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


TypeError: unsupported operand type(s) for -: 'str' and 'str'