<a href="https://colab.research.google.com/github/pseudo-xy/pseudo-xy/blob/main/pengiuns.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import tempfile
import tensorflow as tf
import urllib.request

from comet_ml import Experiment
from keras_tuner import RandomSearch
from tensorflow import keras
from tensorflow.keras import layers, losses, metrics, optimizers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler

import comet_ml
comet_ml.init()

experiment = Experiment(
    project_name="Penguins",
    auto_metric_logging=True,
    auto_param_logging=True,
    auto_histogram_weight_logging=True,
    auto_histogram_gradient_logging=True,
    auto_histogram_activation_logging=True,
    log_code=True
)

COMET INFO: Comet API key is valid
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/eaamo/general/b2a9e1bc25894b5eac47e44642750961
COMET INFO:   Parameters:
COMET INFO:     add_indicator       : False
COMET INFO:     categories          : auto
COMET INFO:     copy                : True
COMET INFO:     drop                : 1
COMET INFO:     dtype               : <class 'numpy.float64'>
COMET INFO:     fill_value          : 1
COMET INFO:     handle_unknown      : ignore
COMET INFO:     keep_empty_features : False
COMET INFO:     max_categories      : 1
COMET INFO:     min_frequency       : 1
COMET INFO:     missing_values      : nan
COMET INFO:     sparse              : depreca

In [None]:
Experiment(log_code=True)

In [None]:
!pip install comet_ml

In [None]:
!pip install comet_ml keras_tuner

In [None]:
DATA_DIRECTORY = tempfile.mkdtemp(prefix="keras-tuner-data")
DATA_FILEPATH = os.path.join(DATA_DIRECTORY, "penguins.csv")

urllib.request.urlretrieve(
    "https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins_size.csv", 
    DATA_FILEPATH
)

df = pd.read_csv(DATA_FILEPATH)

experiment.log_table(filename="penguins.csv", tabular_data=df)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   culmen_length_mm   342 non-null    float64
 3   culmen_depth_mm    342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                334 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [None]:
numerical_columns = [column for column in df.columns if df[column].dtype in ["int64", "float64"]]

numerical_preprocessor = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

categorical_preprocessor = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("numerical", numerical_preprocessor, numerical_columns),
        ("categorical", categorical_preprocessor, ["island"]),
    ]
)

In [None]:
y = df.species
X = df.drop(["species", "sex"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.20, 
    random_state=42
)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# preventing data leakages 

label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [None]:
def _model(hp):
    model = keras.Sequential([
        layers.Dense(
            hp.Int("dense_1_units", min_value=4, max_value=12, step=4, default=8),
            input_shape=(X_train.shape[1],)
            
        ),
        layers.Dense(
            hp.Int("dense_2_units", min_value=4, max_value=12, step=4, default=8), 
            activation="relu"
        ),
        layers.Dense(3, activation="softmax"),
    ])

    model.compile(
        optimizer=optimizers.Adam(
            hp.Choice("learning_rate", values=[1e-2, 1e-3])
        ),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

In [None]:
tuner = RandomSearch(
    _model,
    objective="val_accuracy",
    max_trials=10,
    overwrite=True,
    directory="keras-tuner",
    project_name="keras-tuner-example",
)

tuner.search_space_summary()

tuner.search(
    X_train[:,:], 
    to_categorical(y_train), 
    epochs=5, 
    validation_data=(X_test[:,:], to_categorical(y_test))
)

tuner.results_summary()

Trial 10 Complete [00h 00m 02s]
val_accuracy: 0.739130437374115

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 00m 21s
Results summary
Results in keras-tuner/keras-tuner-example
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 08 summary
Hyperparameters:
dense_1_units: 4
dense_2_units: 12
learning_rate: 0.01
Score: 1.0

Trial 04 summary
Hyperparameters:
dense_1_units: 8
dense_2_units: 12
learning_rate: 0.01
Score: 0.9855072498321533

Trial 06 summary
Hyperparameters:
dense_1_units: 12
dense_2_units: 8
learning_rate: 0.01
Score: 0.9855072498321533

Trial 03 summary
Hyperparameters:
dense_1_units: 8
dense_2_units: 4
learning_rate: 0.01
Score: 0.9710144996643066

Trial 07 summary
Hyperparameters:
dense_1_units: 12
dense_2_units: 12
learning_rate: 0.001
Score: 0.9130434989929199

Trial 05 summary
Hyperparameters:
dense_1_units: 12
dense_2_units: 4
learning_rate: 0.001
Score: 0.8260869383811951

Trial 01 summary
Hyperparameters:
dense_1_units: 8
dense_2_

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
y_pred = np.argmax(best_model.predict(X_test), axis=-1)
accuracy = np.sum(y_pred == y_test) / len(y_test) * 100
print(f"Accuracy: {accuracy:.2f}")

results = pd.DataFrame(np.concatenate((
    X_test, 
    np.expand_dims(y_test, axis=1), 
    np.expand_dims(y_pred, axis=1)), axis=1), columns=[
        "Culmen Length", "Culmen Depth", "Flipper Length", "Body Mass", 
        "Island - Biscoe", "Island - Dream", "Island - Torgersen", 
        "Species", "Prediction"]
)

experiment.log_table(filename="results.csv", tabular_data=results)
experiment.display("panels")
experiment.end()

COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------------------------------------------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/eaamo/penguins/e7b8a33ebfb441d8bd84cf15d3770211
COMET INFO:   Parameters:
COMET INFO:     add_indicator       : False
COMET INFO:     categories          : auto
COMET INFO:     copy                : True
COMET INFO:     drop                : 1
COMET INFO:     dtype               : <class 'numpy.float64'>
COMET INFO:     fill_value          : 1
COMET INFO:     handle_unknown      : ignore
COMET INFO:     keep_empty_features : False
COMET INFO:     max_categories      : 1
COMET INFO:     min_frequency       : 1
COMET INFO:     missing_values      : nan
COMET INFO:     sparse              : deprecated
COMET INFO:     sparse_output 