<a href="https://colab.research.google.com/github/ksk0629/comparison_of_dnn/blob/develop/comparison_of_dnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Comparison of DNN

## Preparation

In [None]:
import os
from google.colab import drive
import pickle

In [None]:
# Mount my google drive
drive_path = "/content/gdrive"
drive.mount(drive_path)

# Prepare environment
!pip install mlflow
!pip install pyngrok
!pip install PyYAML==5.4  # reference: https://github.com/ultralytics/yolov5/issues/414

from pyngrok import ngrok
import yaml

# Load general config
config_path = os.path.join(drive_path, "MyDrive", "config", "general_config.yaml")
with open(config_path, 'r') as yml:
  config = yaml.safe_load(yml)

config_github = config["github"]
config_ngrok = config["ngrok"]

# Set git config
!git config --global user.email {config_github["email"]}
!git config --global user.name {config_github["username"]}

# Clone the repository
repository_name = "comparison_of_dnn"
git_repository = f"https://github.com/{config_github['username']}/" + repository_name + ".git"
repository_path = "/content/" + repository_name
if not os.path.exists(repository_path):
  !git clone {git_repository}

# Change directory to the cloned directory
%cd {repository_name}

In [None]:
# Checkout
branch_name = "develop"
!git checkout {branch_name}

In [None]:
# Pull
!git pull

## California dataset
- `sklearn.datasets.fetch_california_housing()`: regression problem

In [None]:
california_experiment_id = "1"

### Checking dataset

In [None]:
%cd comparison_of_dnn  # for resetting runtime
import sys
sys.path.append("./src")

import src.california_dataset

import importlib
importlib.reload(src.california_dataset)

In [None]:
# Load dataset
california_dataset = src.california_dataset.CaliforniaDataset()
callifornia_df = california_dataset.load_dataset()
callifornia_df

In [None]:
# Show statistics without count
callifornia_df.describe().drop(["count"])

In [None]:
california_train_df, california_eval_df, california_test_df = california_dataset.load_splitted_dataset_with_eval()
california_eval_df

### Bulding the model

In [None]:
!python ./src/california_dnn.py

### Visualization predicted values and true target values

In [None]:
import keras
import matplotlib.pyplot as plt

In [None]:
run_id = "a4cf416cf3184acb9bd2fe280682d710"
model_path = f"/content/comparison_of_dnn/mlruns/{california_experiment_id}/{run_id}/artifacts/model/data/model"

model = keras.models.load_model(model_path)
model.summary()

In [None]:
predicted_values = model.predict(x=california_test_df.drop(["MedHouseVal"], axis=1))
predicted_values

In [None]:
fig = plt.figure(dpi=150)

length = len(california_test_df)

plt.plot(range(length), california_test_df["MedHouseVal"][:length], color="red")
plt.plot(range(length), predicted_values[:length], color="green")

plt.show()

## Iris dataset
- `sklearn.datasets.load_iris()`: three-classification problem

In [None]:
iris_experiment_id = "2"

### Checking dataset

In [None]:
%cd comparison_of_dnn  # for resetting runtime
import sys
sys.path.append("./src")

import src.iris_dataset

import importlib
importlib.reload(src.iris_dataset)

In [None]:
# Load dataset
iris_dataset = src.iris_dataset.IrisDataset()
iris_df = iris_dataset.load_dataset()
iris_df

In [None]:
# Show statistics without count
iris_df.describe().drop(["count"])

In [None]:
iris_train_df, iris_eval_df, iris_test_df = iris_dataset.load_splitted_dataset_with_eval()
iris_eval_df

In [None]:
iris_eval_df.drop(["target"], axis=1)

### Bulding the model

In [None]:
!python ./src/iris_dnn.py

### Visualization predicted values and true target values

In [None]:
import keras
import matplotlib.pyplot as plt
import numpy as np

In [None]:
run_id = "6d7b74f128774697a6e40f65477ac04d"
model_path = f"/content/comparison_of_dnn/mlruns/{iris_experiment_id}/{run_id}/artifacts/model/data/model"

model = keras.models.load_model(model_path)
model.summary()

In [None]:
predicted_values = np.argmax(model.predict(x=iris_test_df.drop(["target"], axis=1)), axis=1)
predicted_values

In [None]:
fig = plt.figure(dpi=150)

length = len(iris_test_df)

plt.scatter(range(length), iris_test_df["target"][:length], color="red")
plt.scatter(range(length), predicted_values[:length], color="green", s=10)


accuracy = (iris_test_df["target"] == predicted_values).sum() / length
print(f"accuracy: {accuracy}")

plt.show()

## MLflow

In [None]:
# Run MLflow
get_ipython().system_raw("mlflow ui --port 5000 &") # run tracking UI in the background

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken of ngrok
ngrok.set_auth_token(config_ngrok["token"])

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

## Adding files to the git repository

In [None]:
# add_objects = os.path.join("mlruns", "1", "*")
# !git add {add_objects}

In [None]:
# commit_msg = "Added new mlruns data"
# !git commit -m "{commit_msg}"

In [None]:
# html = f"https://{config_github['token']}@github.com/{config_github['username']}/{repository_name}.git"
# !git remote set-url origin {html}
# !git push origin {branch_name}