### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [166]:
%%capture
!pip install boto3
!pip install ads
!pip install awscli
!pip install pytorch-lightning==1.5.10
!pip install ipython[notebook]
!pip install matplotlib
!pip install seaborn
!pip install tensorboard pandas
!pip install sklearn

In [167]:
!pip install torch torchvision



In [168]:
import boto3

s3 = boto3.resource(
 's3',
 region_name="us-phoenix-1",
 aws_secret_access_key="PcYW3zZugZdfGMk6CQr6OiFy5Cyt8pvctUIQd6NJyns=",
 aws_access_key_id="e1f1af4d0d5f6fc28bd45aeaec79c8f7554f6410",
 endpoint_url="https://axutkjfnpof3.compat.objectstorage.us-phoenix-1.oraclecloud.com"
)

In [169]:
import os

import torch
from pytorch_lightning import LightningModule, Trainer
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy, F1Score, Precision, Recall, PrecisionRecallCurve
import torchvision
from torchvision import transforms
from torchvision.datasets import MNIST
from pytorch_lightning.callbacks import ModelCheckpoint
import numpy as np
import matplotlib.pyplot as plt
from pytorch_lightning.loggers import TensorBoardLogger

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
AVAIL_GPUS = min(1, torch.cuda.device_count())
BATCH_SIZE = 256 if AVAIL_GPUS else 64


from packaging import version
from statistics import mean
from packaging import version


import pandas as pd
import seaborn as sns
from scipy import stats
import tensorboard as tb
from sklearn.metrics import confusion_matrix, classification_report
import json
from tensorboard.plugins.hparams.plugin_data_pb2 import HParamsPluginData

from datetime import datetime
import pickle
import re

# Write the version number for the project
VERSION_NO = "v1"

# Folder for storing tensorbolard logs and checkpoints
logger = TensorBoardLogger("tb_logs", name="my_model_tensorboard")



In [170]:
class LitMNIST(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, learning_rate=2e-4):

        super().__init__()
        # Set our init args as class attributes
        self.data_dir = data_dir
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        
        # Save the hyperparameters 
        self.save_hyperparameters()
        
        # Hardcode some dataset specific attributes
        self.num_classes = 10
        self.dims = (1, 28, 28)
        channels, width, height = self.dims
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,)),
            ]
        )

        # Define PyTorch model
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(channels * width * height, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size, self.num_classes),
        )
        self.accuracy = Accuracy()

    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        pred=self.forward(x)

        # identifying number of correct predections in a given batch
        correct=pred.argmax(dim=1).eq(y).sum().item()

        # identifying total number of labels in a given batch
        total=len(y)

        #calculating the loss
        loss = F.nll_loss(pred, y)
        train_loss ={"train_loss": loss.item()}

        f1= F1Score(self.num_classes, threshold=0.5, average='micro')
        f1_score = f1(pred, y)

        pre = Precision(self.num_classes)
        precisionValue = pre(pred, y)

        re = Recall(self.num_classes)
        recallValue= re(pred, y)  

        output ={
            "loss": loss,
            "correct": correct,
            "total": total,
            "f1_score": f1_score,
            "precision":precisionValue,
            "recall":recallValue
        }
        return output

    def training_epoch_end(self, outputs):
       #  the function is called after every epoch is completed
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        avg_precision = torch.stack([x['precision'] for x in outputs]).mean()
        avg_recall = torch.stack([x['recall'] for x in outputs]).mean()
        avg_f1_score = torch.stack([x['f1_score'] for x in outputs]).mean()

        #calculate correct and total predictions
        correct=sum([x["correct"] for  x in outputs])
        total=sum([x["total"] for  x in outputs])


        self.logger.experiment.add_scalar("train_loss", avg_loss, self.current_epoch)
        self.logger.experiment.add_scalar("train_acc", correct/total, self.current_epoch)
        self.logger.experiment.add_scalar("train_recall", avg_recall, self.current_epoch)
        self.logger.experiment.add_scalar("train_precision", avg_precision, self.current_epoch)
        self.logger.experiment.add_scalar("train_f1_score", avg_f1_score, self.current_epoch)

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        val_acc = self.accuracy(preds, y)

        output={'val_loss':loss,
                'val_acc' : val_acc
                }
        return output

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['val_acc'] for x in outputs]).mean()

        self.logger.experiment.add_scalar("val_loss", avg_loss, self.current_epoch)
        self.logger.experiment.add_scalar("val_accuracy", avg_acc, self.current_epoch)

        return {'val_loss': avg_loss,
                'val_accuracy': avg_acc}

    def test_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        correct=pred.argmax(dim=1).eq(y).sum().item()
        total=len(y)*1.0
        test_loss = F.nll_loss(pred, y)
        test_accuracy = correct/total

        f1= F1Score(self.num_classes, threshold=0.5, average='micro')
        f1_score = f1(pred, y)

        pre = Precision(self.num_classes)
        precisionValue = pre(pred, y)

        re = Recall(self.num_classes)
        recallValue= re(pred, y)  

        test_output ={
            "test_loss": test_loss,
            "test_f1": f1_score,
            "test_recall": recallValue,
            "test_precision": precisionValue,
            "correct": correct,
            "total": total,
        }

        return test_output

    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        correct=sum([x["correct"] for  x in outputs])
        total=sum([x["total"] for  x in outputs])

        avg_test_f1_score = torch.stack([x['test_f1'] for x in outputs]).mean()
        avg_test_recall = torch.stack([x['test_recall'] for x in outputs]).mean()
        avg_test_precision = torch.stack([x['test_precision'] for x in outputs]).mean()


        logs = {"test_loss": avg_loss, 
                "test_accuracy": correct/total, 
                "test_f1_score":avg_test_f1_score, 
                "test_recall": avg_test_recall, 
                "test_precision": avg_test_precision}

        self.logger.experiment.add_scalar("test_loss", avg_loss, self.current_epoch)
        self.logger.experiment.add_scalar("test_accuracy", correct/total, self.current_epoch)
        self.logger.experiment.add_scalar("test_f1_score", avg_test_f1_score, self.current_epoch)
        self.logger.experiment.add_scalar("test_recall", avg_test_recall, self.current_epoch)
        self.logger.experiment.add_scalar("test_precision", avg_test_precision, self.current_epoch)

        return {'log': logs, 'progress_bar': logs}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def custom_histogram_adder(self):
      for name,params in self.named_parameters():
        self.logger.experiment.add_histogram(name, params, self.current_epoch)

    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        # download
        MNIST(self.data_dir, train=True, download=True)
        MNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)
        

In [171]:
model = LitMNIST()

# Define the lightning trainer
trainer = Trainer(
    gpus=AVAIL_GPUS,
    max_epochs=2,
    progress_bar_refresh_rate=20, logger=logger,
)
trainer.tune(model)
trainer.fit(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name     | Type       | Params
----------------------------------------
0 | model    | Sequential | 55.1 K
1 | accuracy | Accuracy   | 0     
----------------------------------------
55.1 K    Trainable params
0         Non-trainable params
55.1 K    Total params
0.220     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [172]:
trainer.test()

Restoring states from the checkpoint path at tb_logs/my_model_tensorboard/version_4/checkpoints/epoch=1-step=1720.ckpt
Loaded model weights from checkpoint at tb_logs/my_model_tensorboard/version_4/checkpoints/epoch=1-step=1720.ckpt


Testing: 0it [00:00, ?it/s]

[{}]

In [173]:
# Finally lets train our AI model
trainer.fit(model)


  | Name     | Type       | Params
----------------------------------------
0 | model    | Sequential | 55.1 K
1 | accuracy | Accuracy   | 0     
----------------------------------------
55.1 K    Trainable params
0         Non-trainable params
55.1 K    Total params
0.220     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

In [174]:
! ls tb_logs/my_model_tensorboard/

version_0  version_1  version_2  version_3  version_4


In [175]:
from tensorboard.backend.event_processing import event_accumulator
import os

# creating the log folder to store json log file of each AI model version

os.makedirs("formattedjson", exist_ok=True)
os.makedirs("finallogfile", exist_ok=True)
os.makedirs("model", exist_ok=True)


versions = os.listdir('tb_logs/my_model_tensorboard')
if versions:
    versions.sort(key=lambda version_string : list(
    map(int, re.findall(r'\d+', version_string)))[0])
    latest_version = versions[-1]

print(latest_version)

if not os.path.isdir(f"formattedjson/{latest_version}"):
  os.mkdir(f"formattedjson/{latest_version}")

if not os.path.isdir(f"formattedjson/{latest_version}/train_log"):
  os.mkdir(f"formattedjson/{latest_version}/train_log")   

if not os.path.isdir(f"formattedjson/{latest_version}/test_log"):
  os.mkdir(f"formattedjson/{latest_version}/test_log")

if not os.path.isdir(f"formattedjson/{latest_version}/metrics"):
  os.mkdir(f"formattedjson/{latest_version}/metrics")   

if not os.path.isdir(f"finallogfile/{latest_version}"):
  os.mkdir(f"finallogfile/{latest_version}") 

if not os.path.isdir(f"model/{latest_version}"):
  os.mkdir(f"model/{latest_version}") 

IndexError: list index out of range

In [178]:
# function to write json log file in individual model version
def record_log(latest_version):
  ea = event_accumulator.EventAccumulator(f'tb_logs/my_model_tensorboard/{latest_version}', size_guidance={event_accumulator.SCALARS: 0})
  ea.Reload()
  
  # saving each logs into json log file
  eventTags = ea.Tags()
  scalars_var = eventTags["scalars"]
  print(f'scalar: ', scalars_var)



  # exporting the logs file in json file inside formattedjson folder
  for i in scalars_var:
    if "train" in i or "val" in i:
      folder_name = "train_log"
    elif "test" in i:
      folder_name = "test_log"
    else:
      folder_name = "metrics"

    # Removing the / with _ while saving logs file , if any logfile name contains any
    file_path = f"formattedjson/{latest_version}/{folder_name}/{i.replace('/', '_')}.json"

    # Converting pandas dataframe into json file format
    pd.DataFrame(ea.Scalars(i)).to_json(file_path, orient = 'records')


    # Extracting the hyperparameter from scalar to json file
  data = ea._plugin_to_tag_to_content["hparams"]["_hparams_/session_start_info"]
  hparam_data = HParamsPluginData.FromString(data).session_start_info.hparams
  hparam_dict = {key: hparam_data[key].ListFields()[0][1] for key in hparam_data.keys()}

  with open(f"formattedjson/{latest_version}/metrics/hparams.json", "w") as outfile:
    json.dump(hparam_dict, outfile)

In [179]:
# function for combining logs into final single log file
def record_final_log(latest_version):
  res = []
  exclude_dir = set(['metrics'])

  dir_path = f'formattedjson/{latest_version}'
  # print(dir_path)
  for parent_path, dirs, filenames in os.walk(dir_path):
    dirs[:] = [d for d in dirs if d not in exclude_dir]
    for f in filenames:
      print(f)
      res.append(os.path.join(parent_path, f))
      # print(os.path.join(parent_path, f))

  epochs ={}
  test_epochs = {}

  # print(res)
  for metrics_path in res:
    epochs_values = pd.read_json(metrics_path)


    for index, row in epochs_values.iterrows():
      metric_split_name = metrics_path.split("/")
      splited_text = os.path.splitext(metric_split_name[-1])[0]

      if "test" in splited_text:
        _epoch_value = test_epochs.get(row['step'], {})

      else:
        _epoch_value = epochs.get(row['step'], {})

      _value = _epoch_value.get('epochs_values', None)
      final_value = row['value']
      if _value:
        final_value = mean([final_value, _value])

      # _file_name = os.path.splitext(metrics_path)[0] 
      _epoch_value.update({splited_text: final_value})

      if "test" in splited_text:
        test_epochs.update({row['step']: _epoch_value})
      else:
        epochs.update({row['step']: _epoch_value})

  final_epochs={}
  final_test_epochs = {}

  for index, value in enumerate(epochs):
    final_epochs.update({index: epochs[value]})

  for index, value in enumerate(test_epochs):
    final_test_epochs.update(test_epochs[value])  

  hparam_file = open(f'formattedjson/{latest_version}/metrics/hparams.json')
  hparams_dict = json.load(hparam_file)
    
  # Date time of project log
  log_date = datetime.now()
  formatted_log_date = log_date.strftime("%m/%d/%Y, %H:%M:%S")

  # Create experiment number from versions of, tensorboard experiment version
  version_number = latest_version.split("_")
  testver = version_number[-1]
  testno = int(testver)
#   testno = int(testver) +1    


  final_data = [{
      "exp": {
          "exp_no": 'exp_'+ str(testno),
          "datetime": formatted_log_date,
          "hyperparameters": hparams_dict,
          "epochs": final_epochs,
          "test_metrics":final_test_epochs
      }
      
  }]

  with open(f"finallogfile/{latest_version}/log_exp_{testno}.json", "w") as outfile:
    json.dump(final_data, outfile) 

record_log(latest_version)
record_final_log(latest_version)


scalar:  ['hp_metric', 'val_loss', 'val_accuracy', 'train_loss', 'train_acc', 'train_recall', 'train_precision', 'train_f1_score', 'test_loss', 'test_accuracy', 'test_f1_score', 'test_recall', 'test_precision', 'Valid', 'Train Loss', 'Train Accuracy', 'Train Recall', 'Train Precision', 'Train F1 Score']
test_accuracy.json
test_f1_score.json
test_loss.json
test_precision.json
test_recall.json
val_loss.json
train_loss.json
train_acc.json
train_f1_score.json
train_precision.json
train_recall.json
val_accuracy.json


In [None]:
def upload_file_bucket(latest_version):
    # creating experiment number from latest_version
    version_number = latest_version.split("_")
    testver = version_number[-1]
    testno = int(testver)  
    
    json_path = f"finallogfile/{latest_version}/log_exp_{testno}.json"
    print(json_path)
    
    split_name = json_path.split("/")
    splited_text = split_name[-1]
    print(splited_text)

    # Upload a File to you OCI Bucket, 1st value is the path of the directory, 2nd value is bucket name and 3rd value is file name with version foldername
    s3.meta.client.upload_file(json_path, 'ds-1', 'TSD/'+VERSION_NO+'/logs/'+splited_text)
    
upload_file_bucket(latest_version)     

In [None]:
def upload_model(latest_version):
    
    # creating experiment number from latest_version
    version_number = latest_version.split("_")
    testver = version_number[-1]
    testno = int(testver)
#         testno = int(testver) +1  

    
    # save the model to local disk
    model_name = f'model/{latest_version}/tsd_model_{testno}.pkl'
    with open(model_name, 'wb') as files:
      pickle.dump(model, files)
    
    
    json_path = f"model/{latest_version}/tsd_model_{testno}.pkl"
    print(json_path)
    split_name = json_path.split("/")
    splited_text = split_name[-1]
    print(splited_text)
    
    
    # Upload a File to you OCI Bucket, 1st value is the path of the directory, 2nd value is bucket name and 3rd value is file name with version foldername
    s3.meta.client.upload_file(json_path, 'ds-1', 'TSD/'+VERSION_NO+'/artifacts/model/'+splited_text)
    
upload_model(latest_version)    