In [None]:
# import libraries

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [None]:
# import all models that are required

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB


In [None]:
# Load datasets
data= load_breast_cancer()
x=data.data
y=data.target

In [None]:
# train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y, random_state=42, test_size=0.2, stratify=y)

In [None]:
# Feature scaling
scaler=StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
# Different models of machine learning
models = {
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=5000),
    "Logistic L1": LogisticRegression(penalty='l1', solver='liblinear', max_iter=5000),
    "Logistic L2": LogisticRegression(penalty='l2', max_iter=5000),
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Bagging": BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM Linear": SVC(kernel='linear'),
    "SVM RBF": SVC(kernel='rbf'),
    "LDA": LinearDiscriminantAnalysis(),
    "Naive Bayes": GaussianNB()
}


In [None]:



# Training + Evaluation
results = [] # [Model Name, Accuracy, Precision, Recall, F1]

 # train each model of dictionary

for name, model in models.items():
    # Scale only required models
    # Trees cannot be scaled

    if name in ["KNN", "Logistic Regression", "Logistic L1", "Logistic L2",
                "SVM Linear", "SVM RBF", "LDA", "Naive Bayes"]:


      # Training + prediction
      # scaled models
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

    else:
      # Trees model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)


    # Special case
    # Handle Linear Regression separately
    if name == "Linear Regression":
        y_pred = (y_pred > 0.5).astype(int)



    # Evaluation Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Store result
    results.append([name, acc, prec, rec, f1])




In [None]:
#  create comparison table
 results_df = pd.DataFrame(results,
                          columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score"])

print(results_df.sort_values(by="F1 Score", ascending=False))

                  Model  Accuracy  Precision    Recall  F1 Score
2           Logistic L1  0.991228   0.986301  1.000000  0.993103
1   Logistic Regression  0.982456   0.986111  0.986111  0.986111
3           Logistic L2  0.982456   0.986111  0.986111  0.986111
9               SVM RBF  0.982456   0.986111  0.986111  0.986111
8            SVM Linear  0.973684   0.985915  0.972222  0.979021
4     Linear Regression  0.956140   0.946667  0.986111  0.965986
10                  LDA  0.956140   0.946667  0.986111  0.965986
0                   KNN  0.956140   0.958904  0.972222  0.965517
7         Random Forest  0.947368   0.958333  0.958333  0.958333
6               Bagging  0.938596   0.957746  0.944444  0.951049
11          Naive Bayes  0.929825   0.944444  0.944444  0.944444
5         Decision Tree  0.921053   0.956522  0.916667  0.936170
