### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [99]:
import os
import numpy as np
import shutil

In [100]:
"1".zfill(3)

'001'

In [101]:
os.makedirs("Real_Fake_100shot_Samples")

In [102]:
images = []
labels = []
kl= []

for cat in range(0, 5):
    real = os.listdir(f"../data/KLGrades/{cat}")
    fake = os.listdir(f"LabeledResults/{cat}")
    real_sample = np.random.choice(real, 4)
    fake_sample = np.random.choice(fake, 4)
    
    images.extend(real_sample)
    images.extend(fake_sample)
    labels.extend(["REAL"] * 4 + ["FAKE"]*4)
    kl.extend([cat]*8)

In [107]:
import pandas as pd
info = pd.DataFrame({
    "Image ID": [str(i).zfill(3) for i in range(0, 40)],
    "Label": labels,
    "KL": kl
           
})

In [114]:
info = info.sample(40).reset_index(drop=True)

In [115]:
info.to_csv("Real_Fake_100shot.csv", index=False)

In [103]:
for i in range(0, len(kl)):
    if labels[i] == "REAL":
        shutil.copy(f"../data/KLGrades/{kl[i]}/{images[i]}", f"Real_Fake_100shot_Samples/{str(i).zfill(3)}.png")
    else:
        shutil.copy(f"LabeledResults/{kl[i]}/{images[i]}", f"Real_Fake_100shot_Samples/{str(i).zfill(3)}.png")

In [104]:
len(kl)

40

In [110]:
!rm -r Real_Fake_100shot_Samples/ 

In [111]:
!rm Real_Fake_100shot_Samples.zip

In [105]:
!zip Real_Fake_100shot_Samples.zip -r Real_Fake_100shot_Samples/

  adding: Real_Fake_100shot_Samples/ (stored 0%)
  adding: Real_Fake_100shot_Samples/038.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/033.png (stored 0%)
  adding: Real_Fake_100shot_Samples/010.png (stored 0%)
  adding: Real_Fake_100shot_Samples/024.png (stored 0%)
  adding: Real_Fake_100shot_Samples/007.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/026.png (stored 0%)
  adding: Real_Fake_100shot_Samples/030.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/008.png (stored 0%)
  adding: Real_Fake_100shot_Samples/036.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/029.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/020.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/027.png (stored 0%)
  adding: Real_Fake_100shot_Samples/000.png (stored 0%)
  adding: Real_Fake_100shot_Samples/018.png (stored 0%)
  adding: Real_Fake_100shot_Samples/004.png (deflated 0%)
  adding: Real_Fake_100shot_Samples/003.png (stored 0%)
  adding: Real_Fake_100shot_Samples/011.p

In [32]:
from proto.model import ProtoNet
import torch

In [34]:
net = ProtoNet()
net = torch.nn.DataParallel(net, device_ids=[0])

In [35]:
net.load_state_dict(torch.load('proto_results/kl_proto_50shot/models/proto_best.pt'))

<All keys matched successfully>

In [36]:
net.cuda()

DataParallel(
  (module): ProtoNet(
    (encoder): Sequential(
      (0): Sequential(
        (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (1): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (2): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
     

In [37]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchmetrics.functional.pairwise import euclidean
from torchvision import transforms
from proto.data import EmbeddingImageFolder, parse_image_set
from proto.utils import filter_files_by_class

def get_mean_embeddings(model, train_data, out_path):
    image_list = parse_image_set(train_data)
    files_by_class = filter_files_by_class(image_list, cats=[f"KL{kl}" for kl in range(0, 5)])
    trfms = transforms.Compose([
        transforms.Resize((int(224), int(224))),
        transforms.ToTensor()])

    embedding_arr = []
    for cat in files_by_class:
        data = EmbeddingImageFolder(files_by_class[cat], kl_grade=cat, transforms=trfms)
        dataloader = DataLoader(data)
        with torch.no_grad():
            embeddings = torch.tensor([])
            for img in dataloader:
                img = img.cuda()
                out = model(img).cpu()
                embeddings = torch.cat([embeddings, out])
            embeddings = embeddings.reshape(1, embeddings.shape[0], -1)
            embeddings = embeddings.mean(dim=1).squeeze()
            embedding_arr.append(embeddings.numpy())
        
            
    torch.save(torch.tensor(np.array(embedding_arr)), str(os.path.join(out_path, "train_embeddings.pt")))

def label_image(mean_embeddings, images):
    pass



In [38]:
get_mean_embeddings(net, "../data/KLGradesGANs/50-shot_train.txt", "proto_results")

In [51]:
def get_test_embeddings(model, train_data, out_path):
    image_list = parse_image_set(train_data)
    files_by_class = filter_files_by_class(image_list, cats=[f"KL{kl}" for kl in range(0, 5)])
    trfms = transforms.Compose([
        transforms.Resize((int(224), int(224))),
        transforms.ToTensor()])

    embedding_arr = []
    labels = []
    for cat in files_by_class:
        data = EmbeddingImageFolder(files_by_class[cat], kl_grade=cat, transforms=trfms)
        dataloader = DataLoader(data)
        with torch.no_grad():
            for img in dataloader:
                img = img.cuda()
                out = model(img).cpu()
                embedding_arr.append(out.numpy())
                labels.append(cat)
            
    torch.save({"embeddings": torch.tensor(np.array(embedding_arr)),
                "labels": labels}, str(os.path.join(out_path, "test_embeddings_indv.pt")))


In [53]:
get_test_embeddings(net, "../data/KLGradesGANs/50-shot_valid.txt", "proto_results")

In [7]:
embeddings = torch.load("proto_results/train_embeddings.pt")

In [8]:
embeddings.shape

torch.Size([5, 576])

In [9]:
torch.softmax(embeddings, dim=0)

tensor([[0.1976, 0.2371, 0.1962,  ..., 0.0844, 0.0015, 0.0441],
        [0.1956, 0.2056, 0.2265,  ..., 0.0655, 0.0026, 0.0454],
        [0.2178, 0.2097, 0.2116,  ..., 0.1078, 0.0034, 0.0926],
        [0.2011, 0.1814, 0.2007,  ..., 0.1209, 0.0285, 0.1012],
        [0.1879, 0.1661, 0.1650,  ..., 0.6214, 0.9640, 0.7168]])

In [10]:
import os
from torchmetrics.functional.pairwise import euclidean
from PIL import Image

labels = []
act_label = []
for kl in range(0,5):
    images = [os.path.join(f"data/KLGradesGANs/kl{kl}_test/img", img) for img in os.listdir(f"data/KLGradesGANs/kl{kl}_test/img")]
    trfms = transforms.Compose([
            transforms.Resize((int(224), int(224))),
            transforms.ToTensor()])

    for img in images:
        im = Image.open(img).convert("L")
        im = trfms(im)
        with torch.no_grad():
            emb_img = net(im.unsqueeze(dim=0).cuda())
            logits = -euclidean.pairwise_euclidean_distance(emb_img, torch.tensor(embeddings).cuda()).cpu()
            labels.append(logits.argmax(dim=1).item())
            act_label.append(kl)

In [33]:
from sklearn.metrics import confusion_matrix, classification_report

In [30]:
confusion_matrix(labels, act_label)

array([[2841,   38,   90,    0,    0],
       [  14, 1268,   30,    3,    0],
       [  28,    5, 1762,    7,    0],
       [   1,    0,   46,  948,    0],
       [   0,    0,    7,   10,  245]])

In [31]:
(2841 + 1268 + 1762 + 948 + 245)/len(labels)

0.9620046302601116

In [35]:
print(classification_report(labels, act_label))

              precision    recall  f1-score   support

           0       0.99      0.96      0.97      2969
           1       0.97      0.96      0.97      1315
           2       0.91      0.98      0.94      1802
           3       0.98      0.95      0.97       995
           4       1.00      0.94      0.97       262

    accuracy                           0.96      7343
   macro avg       0.97      0.96      0.96      7343
weighted avg       0.96      0.96      0.96      7343



In [41]:
os.makedirs("LabeledResults/0")

os.makedirs("LabeledResults/1")
os.makedirs("LabeledResults/2")
os.makedirs("LabeledResults/3")
os.makedirs("LabeledResults/4")

In [45]:
import shutil
from tqdm import tqdm
cntr = 0
for img in tqdm(os.listdir("../FastGAN-pytorch/KL_100shot_30000_images/eval_21000/img/")):
    im = Image.open(os.path.join("../FastGAN-pytorch/KL_100shot_30000_images/eval_21000/img/", img)).convert("L")
    im = trfms(im)
    with torch.no_grad():
        emb_img = net(im.unsqueeze(dim=0).cuda())
        logits = -euclidean.pairwise_euclidean_distance(emb_img, torch.tensor(embeddings).cuda()).cpu()
        label = logits.argmax(dim=1).item()

        shutil.copy(os.path.join("../FastGAN-pytorch/KL_100shot_30000_images/eval_21000/img/", img), f"LabeledResults/{label}/{img}")

100%|██████████| 30000/30000 [01:29<00:00, 333.61it/s]


In [43]:
rm -r ../FastGAN-pytorch/KL_100shot_30000_images/eval_21000/img/

In [40]:
!rm -r LabeledResults/

In [46]:
!zip gan_labeled_50shot_30000.zip -r LabeledResults/

  adding: LabeledResults/ (stored 0%)
  adding: LabeledResults/3/ (stored 0%)
  adding: LabeledResults/3/2342.png (deflated 0%)
  adding: LabeledResults/3/24150.png (deflated 0%)
  adding: LabeledResults/3/21485.png (deflated 0%)
  adding: LabeledResults/3/15463.png (deflated 0%)
  adding: LabeledResults/3/25659.png (deflated 0%)
  adding: LabeledResults/3/7730.png (deflated 0%)
  adding: LabeledResults/3/1934.png (deflated 0%)
  adding: LabeledResults/3/197.png (stored 0%)
  adding: LabeledResults/3/22291.png (deflated 0%)
  adding: LabeledResults/3/7164.png (deflated 0%)
  adding: LabeledResults/3/2871.png (deflated 0%)
  adding: LabeledResults/3/3754.png (deflated 0%)
  adding: LabeledResults/3/10547.png (deflated 0%)
  adding: LabeledResults/3/2704.png (deflated 0%)
  adding: LabeledResults/3/15644.png (deflated 0%)
  adding: LabeledResults/3/28120.png (deflated 0%)
  adding: LabeledResults/3/24417.png (deflated 0%)
  adding: LabeledResults/3/28208.png (deflated 0%)
  adding: Label

In [51]:
list(range(0, 50))

ERROR - Exception
Traceback (most recent call last):
  File "/home/datascience/conda/pytorch20_p39_gpu_v2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_264/512359963.py", line 1, in <cell line: 1>
    list(1000*range(0, 50))
TypeError: unsupported operand type(s) for *: 'int' and 'range'
TypeError: unsupported operand type(s) for *: 'int' and 'range'

In [6]:
import os
len(os.listdir("LabeledResults/4/"))

2017