<a href="https://colab.research.google.com/github/ksk0629/comparison_of_dnn/blob/develop/comparison_of_dnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Comparison of DNN

## Preparation

In [None]:
import os
from google.colab import drive
import pickle

In [None]:
# Mount my google drive
drive_path = "/content/gdrive"
drive.mount(drive_path)

# Prepare environment
!pip install mlflow
!pip install pyngrok
!pip install PyYAML==5.4  # reference: https://github.com/ultralytics/yolov5/issues/414

from pyngrok import ngrok
import yaml

# Load general config
config_path = os.path.join(drive_path, "MyDrive", "config", "general_config.yaml")
with open(config_path, 'r') as yml:
  config = yaml.safe_load(yml)

config_github = config["github"]
config_ngrok = config["ngrok"]

# Set git config
!git config --global user.email {config_github["email"]}
!git config --global user.name {config_github["username"]}

# Clone the repository
repository_name = "comparison_of_dnn"
git_repository = f"https://github.com/{config_github['username']}/" + repository_name + ".git"
repository_path = "/content/" + repository_name
if not os.path.exists(repository_path):
  !git clone {git_repository}

# Change directory to the cloned directory
%cd {repository_name}

In [None]:
# Checkout
branch_name = "develop"
!git checkout {branch_name}

In [None]:
# Pull
!git pull

## California dataset
- `sklearn.datasets.fetch_california_housing()`: regression problem

### Checking dataset

In [None]:
import sys
sys.path.append("./src")

import src.utils

In [None]:
# Load dataset
callifornia_df = src.utils.load_california_housing()
callifornia_df.info()

In [None]:
# Show statistics without count
callifornia_df.describe().drop(["count"])

In [None]:
california_train_df, california_eval_df, california_test_df = src.utils.load_splitting_california_dataset_with_eval()
california_eval_df

### Bulding the model

In [None]:
import matplotlib.pyplot as plt
import yaml

import src.dnn

In [None]:
config_yaml_path = "./config_california.yaml"

# Load the config and show the model summary
with open(config_yaml_path, "r") as yaml_f:
  config = yaml.safe_load(yaml_f)
src.dnn.get_california_dnn(**config["dnn"]).summary()

model, history = src.dnn.train_california_dnn_with_mlflow(config_yaml_path)

In [None]:
model.predict(x=california_test_df.drop(["MedHouseVal"], axis=1))

In [None]:
evaluation_loss = model.evaluate(x=california_test_df.drop(["MedHouseVal"], axis=1), y=california_test_df["MedHouseVal"])
evaluation_loss_str = f"0 {evaluation_loss} 0"

run_id = "6d683f2ff7c04fbe8ba8f103c3b14149"
run_dir_name = f"/content/comparison_of_dnn/mlruns/1/{run_id}"
metric_dir_name = f"{run_dir_name}/metrics"

if not os.path.exists(f"{metric_dir_name}/test_loss"):
  with open(f"{run_dir_name}/meta.yaml", "r")  as f:
    lines = f.readlines()
  
  # Add entry_point_name
  lines[2] = "entry_point_name: 'src.dnn.train_california_dnn_with_mlflow'\n"
  with open(f"{run_dir_name}/meta.yaml", "w")  as f:
    f.writelines(lines)

  # Add test_loss
  with open(f"{metric_dir_name}/test_loss", "w") as f:
    f.write(evaluation_loss_str)

  # Add git commit
  with open(f"{run_dir_name}/tags/mlflow.source.git.commit", "w") as f:
    f.writelines(["b10b482d2b32805276f16679293c4426a23302fb"])

  fig = plt.figure(dpi=150)

  # length = len(california_test_df)
  length = 500

  plt.plot(range(length), california_test_df["MedHouseVal"][:length], color="red")
  plt.plot(range(length), model.predict(x=california_test_df.drop(["MedHouseVal"], axis=1))[:length], color="green")

  plt.show()
else:
  print("You've already changed their.")

## MLflow

In [None]:
# Run MLflow
get_ipython().system_raw("mlflow ui --port 5000 &") # run tracking UI in the background

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken of ngrok
ngrok.set_auth_token(config_ngrok["token"])

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

## Adding files to the git repository

In [None]:
add_objects = os.path.join("mlruns", "1", "*")
!git add {add_objects}

In [None]:
commit_msg = "Added new mlruns data"
!git commit -m "{commit_msg}"

In [None]:
html = f"https://{config_github['token']}@github.com/{config_github['username']}/{repository_name}.git"
!git remote set-url origin {html}
!git push origin {branch_name}