In [306]:
!echo $http_proxy




In [307]:
import comet_ml
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDClassifier
from comet_ml import Experiment
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
df = pd.read_csv('diamonds.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [308]:


comet_ml.login()

# Display Comet UI



In [309]:
df.shape


(53940, 11)

In [310]:
df = df.drop(["Unnamed: 0"], axis=1)


In [311]:
from sklearn.preprocessing import LabelEncoder

def encode_labels(data):
    categories = (data.dtypes =="object")
    cat_cols = list(categories[categories].index)
    
    feature_label_encoder_dict = {}
    for col in cat_cols:
        feature_label_encoder_dict[col] = LabelEncoder()
        X[col] = feature_label_encoder_dict[col].fit_transform(X[col])

In [312]:
from sklearn.preprocessing import StandardScaler

def scale_numerical(data):
    scaler = StandardScaler()
    data[data.columns] = scaler.fit_transform(data[data.columns])


In [313]:
def set_target(x):
    golden_set = ['Ideal', 'Premium', 'Very Good']
    if x in golden_set:
        return 'Gold'
    return 'Silver'
df['target'] = df['cut'].apply(lambda x: set_target(x))
df.drop("cut", axis = 1,inplace=True)

In [314]:
X = df.drop("target", axis = 1)
y = df["target"]

In [315]:
encode_labels(X)
scale_numerical(X)

In [316]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [317]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)


In [318]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

def compute_metrics(y_pred, y_true):
    metrics = {}
    metrics['precision'] = precision_score(y_true, y_pred)
    metrics['recall'] = recall_score(y_true, y_pred)
    metrics['f1-score'] = f1_score(y_true, y_pred)
    metrics['accuracy'] =  accuracy_score(y_true, y_pred)
    return metrics

In [320]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import pickle

def run_experiment(ModelClass, name):
    experiment = Experiment(project_name="Datascience")
    experiment.set_name(name)
    experiment.add_tag(name)
    
    model = ModelClass()
    with experiment.train():    
        model.fit(X_train, y_train)
        y_pred = model.predict(X_train)
        metrics = compute_metrics(y_pred, y_train)
        experiment.log_metrics(metrics)
        experiment.log_confusion_matrix(y_train, y_pred, title = "Train Confusion Matrix", file_name="train_confusion_matrix.json")
        
    
    with experiment.validate():
        y_pred = model.predict(X_test)
        metrics = compute_metrics(y_pred, y_test)
        experiment.log_metrics(metrics)
        experiment.log_confusion_matrix(y_test, y_pred, title = "validate Confusion Matrix", file_name="validation_confusion_matrix.json")
        fpr, tpr, _ = roc_curve(y_test, y_pred)
        experiment.log_curve(name ,fpr, tpr)


    # plt.figure()
    # plt.plot(fpr, tpr, color='blue', label="ROC Curve")
    # plt.plot([0, 1], [0, 1], color='gray', linestyle='--')  # Diagonal line for random classifier
    # plt.xlabel('False Positive Rate')
    # plt.ylabel('True Positive Rate')
    # plt.title('ROC Curve')
    # plt.legend(loc="lower right")
    # experiment.log_figure('ROC', plt)
        
    experiment.end()





In [321]:

run_experiment(RandomForestClassifier, 'RandomForest')
run_experiment(DecisionTreeClassifier, 'DecisionTreeClassifier')
run_experiment(GaussianNB, 'GaussianNB')
run_experiment(KNeighborsClassifier, 'KNeighborsClassifier')
run_experiment(SGDClassifier, 'SGD')

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/nast69/datascience/f4e06e3d431a4638a34541d815c2e593

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : RandomForest
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/nast69/datascience/f4e06e3d431a4638a34541d815c2e593
[1;38;5;39mCOMET INFO:[0m   Metrics:
[1;38;5;39mCOMET INFO:[0m     train_accuracy     : 0.9999768261030775
[1;38;5;39mCOMET INFO:[0m     train_f1-score     : 0.9999034096397179
[1;38;5;39mCOMET INFO:[0m     train_precision    : 1.0
[1;38;5;39mCOMET INFO:[0m     train_recall