In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()

In [None]:
diabetes.feature_names

In [None]:
from comet_ml import Experiment

experiment = Experiment()
experiment.set_name("Diabetes")
experiment.add_tag("Diabetes")

In [None]:
df = pd.DataFrame(data=np.c_[diabetes['data'], diabetes['target']], 
                  columns=diabetes['feature_names'] + ['target'])

df.head()

In [None]:
df.shape

In [None]:
experiment.log_dataframe_profile(df, "diabetes")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import missingno
%matplotlib inline

In [None]:
missing = missingno.bar(df).get_figure().savefig("images/missingno.png")
experiment.log_image("images/missingno.png", name="missingno.png", image_format="png")

In [None]:
for col in df.columns:
    plt.figure(figsize=(10, 6))
    sns.histplot(data=df, x=col, kde=True)
    figure = f"images/{col}.png"
    plt.savefig(figure)
    experiment.log_image(figure, name=col, image_format="png")
    plt.show()
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.histplot(data=df, x=diabetes.target, kde=True)
plt.savefig("images/counts.png")
experiment.log_image("images/counts.png", "counts.png", image_format="png")
plt.show()

In [None]:
sns.pairplot(data=df)
plt.savefig("images/pairplot.png")
experiment.log_image("images/pairplot.png", "pairplot.png", image_format="png")
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.savefig("images/corr_heatmap.png")
experiment.log_image("images/corr_heatmap.png", name="corr_heatmap.png", image_format="png")
plt.show()

In [None]:
X = df.drop("target", axis=1)
y = df["target"]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def compute_metrics(y_pred, y_test):
    metrics = {}
    metrics['mae'] = mean_absolute_error(y_test, y_pred)
    metrics['mse'] = mean_squared_error(y_test, y_pred)
    metrics['r2-score'] = r2_score(y_test, y_pred)
    return metrics

In [None]:
import pickle
from sklearn.linear_model import LinearRegression

model = LinearRegression()
with experiment.train():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    metrics = compute_metrics(y_test, y_pred)
    experiment.log_metrics(metrics)
    
    with open("models/linreg.pkl", "wb") as file:
        pickle.dump(model, file)
        experiment.log_model("LinearRegression", "models/linreg.pkl")

In [None]:
experiment.end()