In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()

In [None]:
breast_cancer.feature_names

In [None]:
from comet_ml import Experiment

experiment = Experiment()
experiment.set_name("Breast Cancer Classification")
experiment.add_tag("Breast Cancer Classification")

In [None]:
df.info()

In [None]:
df = pd.DataFrame(data=np.c_[breast_cancer['data'], breast_cancer['target']])
df.columns = ['mean radius', 'mean texture', 'mean perimeter', 'mean area',
        'mean smoothness', 'mean compactness', 'mean concavity',
        'mean concave points', 'mean symmetry', 'mean fractal dimension',
        'radius error', 'texture error', 'perimeter error', 'area error',
        'smoothness error', 'compactness error', 'concavity error',
        'concave points error', 'symmetry error',
        'fractal dimension error', 'worst radius', 'worst texture',
        'worst perimeter', 'worst area', 'worst smoothness',
        'worst compactness', 'worst concavity', 'worst concave points',
        'worst symmetry', 'worst fractal dimension', 'target']

df["target"] = df["target"].astype(int)
df.head()

In [None]:
df["target"] = df["target"].astype(int)
df.head()

In [None]:
df.shape

In [None]:
experiment.log_dataframe_profile(df, "breast_cancer")

In [None]:
import missingno
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
missing = missingno.bar(df).get_figure().savefig("images/missingno.png")
experiment.log_image("images/missingno.png", name="missingno.png", image_format="png")

In [None]:
for col in df.columns:
    plt.figure(figsize=(10, 6))
    sns.histplot(data=df, x=col, kde=True)
    figure = f"images/{col}.png"
    plt.savefig(figure)
    experiment.log_image(figure, name=col, image_format="png")
    plt.show()
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.histplot(data=df, x="target", kde=True)
plt.savefig("images/counts.png")
experiment.log_image("images/counts.png", "counts.png", image_format="png")
plt.show()

In [None]:
sns.pairplot(data=df)
plt.savefig("images/pairplot.png")
experiment.log_image("images/pairplot.png", "pairplot.png", image_format="png")
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.savefig("images/corr_heatmap.png")
experiment.log_image("images/corr_heatmap.png", name="corr_heatmap.png", image_format="png")
plt.show()

In [None]:
X = df.drop("target", axis=1)
y = df["target"]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

def compute_metrics(y_pred, y_test):
    metrics = {}
    metrics['accuracy'] =  accuracy_score(y_test, y_pred)
    metrics['f1-score'] = f1_score(y_test, y_pred)
    metrics['precision'] = precision_score(y_test, y_pred)
    metrics['recall'] = recall_score(y_test, y_pred)
    return metrics

In [None]:
import pickle

def model_experiment(ml_model, model_name):
    experiment = Experiment()
    experiment.set_name(model_name)
    experiment.add_tag(model_name)
    model = ml_model()
    
    with experiment.train():    
        model.fit(X_train, y_train)
        y_pred = model.predict(X_train)
        train_metrics = compute_metrics(y_pred, y_train)
        experiment.log_metrics(train_metrics)
        
        with open(f"models/{model_name}.pkl", 'wb') as file:  
            pickle.dump(model, file)
            experiment.log_model(model_name, f"models/{model_name}.pkl")
        
    
    with experiment.validate():
        y_pred = model.predict(X_test)
        test_metrics = compute_metrics(y_pred, y_test)
        experiment.log_metrics(test_metrics)
        experiment.log_confusion_matrix(y_test, y_pred)
        
    experiment.end()

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

model_experiment(GaussianNB, 'GaussianNB')
model_experiment(KNeighborsClassifier, 'KNeighborsClassifier')
model_experiment(RandomForestClassifier, 'RandomForest')