# Importing relevant libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,f1_score,recall_score,roc_curve,classification_report

# defining a class for model evaluation

In [2]:
class evaluation:
    def __init__(self,true,predicted):
        self.true_values = true
        self.predicted_values = predicted
    
    def evaluate(self):
        self.accuracy = accuracy_score(self.true_values,self.predicted_values)
        self.precision = precision_score(self.true_values,self.predicted_values)
        self.f1_score = f1_score(self.true_values,self.predicted_values)
        self.recall = recall_score(self.true_values,self.predicted_values)
        return print("Accuracy : {}\nPrecision: {}\nF1: {}\nRecall : {}".format(self.accuracy,self.precision,self.f1_score,self.recall))
    
    def confusion(self):
        return confusion_matrix(self.true_values,self.predicted_values)
    
    def class_report(self):
        return print(classification_report(self.true_values,self.predicted_values))
    

# Loading the dataset

In [3]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

# Splitting the dataset into independent features and target variable

In [4]:
data_frame = pd.DataFrame(data = cancer.data, columns=cancer.feature_names)
data_frame['target'] = cancer.target
df = data_frame.copy()
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [5]:
X.shape,y.shape

((569, 30), (569,))

# Scaling the data

In [6]:
scaler = RobustScaler()
X = scaler.fit_transform(X)
X.shape

(569, 30)

# Using principal component analysis to reduce the number of features

In [7]:
pca = PCA(n_components=15,random_state=42)
reduced_X = pca.fit_transform(X)
reduced_X.shape

(569, 15)

# Splitting the data into train and test data and using LazyPredict to compare different algorithms

In [8]:
X_train, X_test, y_train, y_test = train_test_split(reduced_X,y,test_size=0.2,random_state=42)

In [9]:
from lazypredict.Supervised import LazyClassifier
classifiers = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models,predictions = classifiers.fit(X_train,X_test,y_train,y_test)

100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:01<00:00, 26.61it/s]


In [24]:
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Perceptron,0.98,0.98,0.98,0.98,0.01
LinearSVC,0.98,0.98,0.98,0.98,0.01
CalibratedClassifierCV,0.97,0.97,0.97,0.97,0.04
LogisticRegression,0.97,0.97,0.97,0.97,0.01
XGBClassifier,0.96,0.96,0.96,0.96,0.09
ExtraTreesClassifier,0.96,0.96,0.96,0.96,0.13
NearestCentroid,0.96,0.96,0.96,0.96,0.06
AdaBoostClassifier,0.96,0.96,0.96,0.96,0.13
RandomForestClassifier,0.96,0.96,0.96,0.96,0.24
SGDClassifier,0.96,0.96,0.96,0.96,0.01


Initializing and fitting the model

In [11]:
final_model = LinearSVC()

In [12]:
final_model = final_model.fit(X_train, y_train)

Making predictions

In [13]:
y_pred = final_model.predict(X_test)

Initializing the evaluation object

In [16]:
val = evaluation(true=y_test,predicted=y_pred)

In [18]:
val.evaluate()

Accuracy : 0.9912280701754386
Precision: 0.9861111111111112
F1: 0.993006993006993
Recall : 1.0


In [19]:
val.confusion()

array([[42,  1],
       [ 0, 71]], dtype=int64)

In [20]:
val.class_report()

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        43
           1       0.99      1.00      0.99        71

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114



In [22]:
val.accuracy,val.precision,val.recall,val.f1_score

(0.9912280701754386, 0.9861111111111112, 1.0, 0.993006993006993)