In [1]:
import os
import sys
module_path = "../src"
#os.environ["WANDB_SILENT"] = "true" # Environment Variable to make wandb silent
if module_path not in sys.path:
    sys.path.append(module_path)
# 1. General Libraries
import pandas as pd
import numpy as np
from pathlib import Path
import yaml
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# 2. DataModule & Class Libraries
from utils.label_encoder import label_encoder_target
from utils.calc_stat import calc_dataset_mean_std
from utils.FeatureExtractor import get_vector_representation,prepare_projection
from dataset.ImageDataModule import ImageDataModule
from dataset.ImageDataset import ImageDataset
from models.EfficientNetClass import EfficientNetClass
from models.ResNetClass import ResNetClass
# 3. Pytorch & Pytorch Lightning Libraries
from pytorch_lightning import Trainer,seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
import torch
# 4. Wandb Tracker Experiements
import wandb

In [2]:
DATA_PATH = '../data/molecular_dataset.csv'
BASE_PATH = Path('/mnt')
BASE_IMG_DIR = BASE_PATH
SEED = 42
model_path = '/mnt/artifacts/models/Molecular/molecular_v1.pt'

In [3]:
df = pd.read_csv(DATA_PATH, sep='\t')
le_encoder,dict_label = label_encoder_target(df,target_col='higher_modality')
df.head()

Unnamed: 0,img,modality,set,source,img_path,higher_modality,caption,split_set,target
0,1423-0127-16-109-3-1.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,A. Three dimensional model of class C protein ...,VAL,D3DR
1,1423-0127-16-109-3-2.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,A. Three dimensional model of class C protein ...,TRAIN,D3DR
2,1423-0127-17-34-3-1.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Three domains of AAG A (or C)-chain: (a) domai...,TRAIN,D3DR
3,1423-0127-17-34-3-2.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Three domains of AAG A (or C)-chain: (a) domai...,VAL,D3DR
4,1423-0127-17-34-3-3.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Three domains of AAG A (or C)-chain: (a) domai...,TRAIN,D3DR


In [4]:
# 3.1 Load the Resnet Model
model = ResNetClass.load_from_checkpoint(model_path)
print('*'*5,' Resnet Model Parameters ','*'*5)
print(model.hparams)
print('*'*5,' Resnet Model Children ','*'*5)
children_counter = 0
for n,c in model.named_children():
    print("Children Counter: ",children_counter," Layer Name: ",n,)
    children_counter+=1

*****  Resnet Model Parameters  *****
"class_weights":   [0.98866213 1.5970696  0.73400673]
"fine_tuned_from": whole
"lr":              1e-05
"mean_dataset":    [0.851899  0.8511532 0.8413842]
"metric_monitor":  val_avg_loss
"mode_scheduler":  min
"name":            resnet101
"num_classes":     3
"pretrained":      True
"std_dataset":     [0.2773475  0.27720553 0.28995028]
*****  Resnet Model Children  *****
Children Counter:  0  Layer Name:  model


In [5]:
prepare_projection(model ,le_encoder,DATA_PATH,BASE_IMG_DIR,SEED,CLASSF ='molecular' ,VERSION = 1)

Feature Vector for training: 


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


Feature Vector for Validation: 


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


Feature Vector for Test: 


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))


***** PCA *****
***** UMAP *****


In [17]:
df = pd.read_parquet('/mnt/artifacts/projections/molecular_v1.parquet')
df.head()

Unnamed: 0,img,modality,set,source,img_path,higher_modality,caption,split_set,target,feature_vector,pca_x,pca_y,pca_hits,umap_x,umap_y,umap_hits
0,1423-0127-16-109-3-2.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,A. Three dimensional model of class C protein ...,TRAIN,D3DR,"[0.8944467, 0.0506167, 0.43819162, 1.297669, 0...",19.787468,-4.190973,1.0,-0.338122,5.834057,1.0
1,1423-0127-17-34-3-1.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Three domains of AAG A (or C)-chain: (a) domai...,TRAIN,D3DR,"[0.49555984, 0.31890774, 0.6259105, 0.3529846,...",16.923067,-2.768597,1.0,-0.060504,5.340167,1.0
2,1423-0127-17-34-3-3.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Three domains of AAG A (or C)-chain: (a) domai...,TRAIN,D3DR,"[1.4514995, 0.5873217, 0.48946068, 0.31073916,...",14.863858,-2.796963,1.0,-0.081096,5.753325,1.0
3,1423-0127-17-34-5-1.jpg,D3DR,TRAIN,clef16,subfigure-classification/2016/train/D3DR/1423-...,MOLECULAR,Two domains of AAG B (or D)-chain: (a) domain ...,TRAIN,D3DR,"[0.5142725, 0.32647976, 0.38910422, 0.57919765...",20.236599,-2.673384,1.0,0.457518,5.051065,1.0
4,1465-9921-12-61-7-4.jpg,GGEN,TRAIN,clef16,subfigure-classification/2016/train/GGEN/1465-...,MOLECULAR,Examples for variations of expression levels i...,TRAIN,GGEN,"[0.079927854, 0.7534603, 3.518612, 0.014287380...",-11.200251,-8.153898,1.0,7.080653,-8.956665,1.0
