In [1]:
import gc
import cv2
import torch
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt

import config as CFG
from CLIP import CLIPModel

from fmcib.preprocessing import get_transforms
from monai.data import CSVDataset
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import pandas as pd
import monai
from fmcib.models import fmcib_model
import os

In [2]:

class CLIPDatasetImg(CSVDataset):
    def __init__(self, coord_path):
        self.transform = get_transforms()
        coord_csv = pd.read_csv(coord_path)
        self.pid = pd.read_csv('/workspace/radraid/projects/imaging_biomarker/CLIP_nodule/dataset_csv/semantic_label.csv')[['pid','nodule_id']]
        super().__init__(coord_csv, transform=self.transform)
    def __getitem__(self, idx):
        img_processed = super().__getitem__(idx)
        return img_processed

In [3]:
def build_loaders(coord_path):
    dataset = CLIPDatasetImg(
        coord_path
    )
    dataloader = monai.data.DataLoader(dataset, 
                          batch_size=1, 
                          num_workers=CFG.num_workers, 
                          shuffle=False)
    return dataloader

In [4]:
def get_image_embeddings( model_path, coord_path, save_path = None):
    loader = build_loaders(coord_path)
    pid = loader.dataset.pid.pid.astype(int).astype(str)
    nodule_id = loader.dataset.pid.nodule_id.astype(int).astype(str)
    if model_path == 'foundation':
        model = fmcib_model().to(CFG.device)
    else:
        semantic_embedding = 64#valid_loader.dataset.semantic_features.shape[1]
        model = CLIPModel(semantic_embedding = semantic_embedding).to(CFG.device)
        model.load_state_dict(torch.load(model_path, map_location=CFG.device)['model'])
    model.eval()
    
    valid_image_embeddings = []
    with torch.no_grad():
        for i, batch in tqdm(enumerate(loader)):
            if model_path == 'foundation':
                image_embeddings = model(batch.to(CFG.device))
            else:
                image_features = model.image_encoder(batch.to(CFG.device))
                image_embeddings = model.image_projection(image_features)
            if save_path is not None:
                torch.save(image_embeddings, os.path.join(save_path,f'{pid[i]}_{nodule_id[i]}.pt' ))

            valid_image_embeddings.append(image_embeddings)
            
    return model, torch.cat(valid_image_embeddings)

In [31]:
#model_path = '/workspace/radraid/projects/imaging_biomarker/CLIP_nodule/results/bz_8_lr_1e-05_wd_0.001_projdim_256_dropout_0.1_epochs_100/best.pt'
#m , emb = get_image_embeddings(model_path)
m , emb = get_image_embeddings('foundation',CFG.coord_path,'/workspace/radraid/projects/imaging_biomarker/CLIP_nodule/results_downstream/foundation')

2024-04-24 09:32:17.460 | INFO     | fmcib.models.load_model:load:129 - Loaded pretrained model weights 

1167it [50:06,  2.58s/it]


In [5]:
class CLIPDatasetImg(CSVDataset):
    def __init__(self, coord_path):
        self.transform = get_transforms()
        coord_csv = pd.read_csv(coord_path)
        self.pid = coord_csv.pid.values
        super().__init__(coord_csv, transform=self.transform)
    def __getitem__(self, idx):
        img_processed = super().__getitem__(idx)
        return img_processed
def build_loaders(coord_path):
    dataset = CLIPDatasetImg(
        coord_path
    )
    dataloader = monai.data.DataLoader(dataset, 
                          batch_size=1, 
                          num_workers=CFG.num_workers, 
                          shuffle=False)
    return dataloader

def get_image_embeddings( model_path, coord_path, save_path = None):
    loader = build_loaders(coord_path)
    pid = loader.dataset.pid.astype(str)
    if model_path == 'foundation':
        model = fmcib_model().to(CFG.device)
    else:
        semantic_embedding = 64#valid_loader.dataset.semantic_features.shape[1]
        model = CLIPModel(semantic_embedding = semantic_embedding).to(CFG.device)
        model.load_state_dict(torch.load(model_path, map_location=CFG.device)['model'])
    model.eval()
    
    valid_image_embeddings = []
    with torch.no_grad():
        for i, batch in tqdm(enumerate(loader)):
            if model_path == 'foundation':
                image_embeddings = model(batch.to(CFG.device))
            else:
                image_features = model.image_encoder(batch.to(CFG.device))
                image_embeddings = model.image_projection(image_features)
            if save_path is not None:
                torch.save(image_embeddings, os.path.join(save_path,f'{pid[i]}.pt' ))

            valid_image_embeddings.append(image_embeddings)
            
    return model, torch.cat(valid_image_embeddings)

In [6]:
m , emb = get_image_embeddings('foundation',
                               '/workspace/radraid/projects/imaging_biomarker/CLIP_nodule/dataset_csv/ucla_path_label.csv',
                               '/workspace/radraid/projects/imaging_biomarker/CLIP_nodule/results_downstream/foundation')

2024-04-24 16:46:01.920 | INFO     | fmcib.models.load_model:load:129 - Loaded pretrained model weights 

0it [00:00, ?it/s]Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
56it [02:06,  2.27s/it]
