In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git
!git clone https://gitee.com/oscarlin/clip-cross-domain-test.git

In [None]:
%cd ./clip-cross-domain-test

In [None]:
import torch
from torch.utils.data import Dataset
import pandas as pd
from PIL import Image
class RetinopathyDataset(Dataset):
    def __init__(self, path, csv_path, transform):
        self.transform = transform
        self.img_folder_path = path
        df = pd.read_csv(csv_path)
        self.classes = ['no','mild','moderate','severe','proliferative']
        data = {}
        for k,v in df.groupby('level'):
            if len(v) < 1000:
                data[k] = v
            else:
                data[k] = v.iloc[:len(v),:]
        self.data = pd.concat([data[i] for i in range(5)],axis=0)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        img_name,label = self.data.iloc[index,:].values
        img_path = os.path.join(self.img_folder_path, img_name)
        img_path += '.jpeg'
        img = Image.open(img_path)
        if self.transform is not None:
            img = self.transform(img)
        return img, label   

In [None]:
import clip
import torch
from dataset import *
from tqdm import tqdm
from torch.utils.data import DataLoader
from utils import Metric
#parser = argparse.ArgumentParser()
#parser.add_argument("--vb", help="visual backbone", default='ViT-B-32.pt')
#parser.add_argument("--n", help="dataset name", default='NICO')
#parser.add_argument("--p", help="folder path", default='.')

#args = parser.parse_args()

# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
print("use {} to inference".format(device))
model, preprocess = clip.load("RN50", device)

image_folder_path = "/kaggle/input/diabetic-retinopathy-resized/resized_train/resized_train"
csv_path = '/kaggle/input/diabetic-retinopathy-resized/trainLabels.csv'
dataset = RetinopathyDataset(image_folder_path,csv_path,preprocess)
BATCH_SIZE = 8

metric = Metric(dataset.classes)
metric.clear()
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, drop_last=False)
texts = torch.cat([clip.tokenize(f"There is {c} diabetic retinopathy in this image") for c in dataset.classes]).to(device)
with torch.no_grad():
    for imgs, labels in tqdm(loader):
        image_features = model.encode_image(imgs.to(device))
        text_features = model.encode_text(texts)
        # Pick the top 5 most similar labels for the image
        image_features /= image_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)
        similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
        metric.update(similarity.cpu().type(torch.float32), labels)
    metric.report()