In [1]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

from PIL import Image
import requests
from transformers import AutoProcessor, CLIPVisionModel

import sys
sys.path.append('/local/xiaowang/food_ingredient/')
from utils.utils import get_img_path, set_seeds

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import Adam

device = "cuda:6"
batch_size = 128
set_seeds(42)
img_dir = '/local/xiaowang/food_ingredient/1m_test'
train_file = '/local/xiaowang/food_ingredient/Dataset_440_labels/train_set.json'
test_file = '/local/xiaowang/food_ingredient/Dataset_440_labels/test_set.json'
val_file = '/local/xiaowang/food_ingredient/Dataset_440_labels/val_set.json'

train_df = pd.read_json(train_file, orient='records', lines=True)
test_df = pd.read_json(test_file, orient='records', lines=True)
val_df = pd.read_json(val_file, orient='records', lines=True)

train_explode_df = train_df.explode('image_file_name_ls', ignore_index=True)
test_explode_df = test_df.explode('image_file_name_ls', ignore_index=True)
val_explode_df = val_df.explode('image_file_name_ls', ignore_index=True)

In [2]:
model = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

model.eval()

# freeze all layers for linear probe
for param in model.parameters():
    param.requires_grad = False

  return self.fget.__get__(instance, owner)()


In [3]:
train_explode_df.head(2)

Unnamed: 0,id,images,image_file_name_ls,ingredients,url,partition,title,instructions,extracted_ingredients,cleaned_ingredients,generated_intro
0,787347fc9b,"[{'id': '589aa53634.jpg', 'url': 'http://img.s...",589aa53634.jpg,"[{'text': '34 cup plain flour'}, {'text': '1 1...",http://www.food.com/recipe/anzac-biscuits-with...,test,Anzac Biscuits With Macadamias (Australian),[{'text': 'Preheat oven to 160 degrees Celsius...,"[plain flour, oat, white sugar, coconut, macad...","[oat, butter, coconut, syrup, macadamia, sugar...",Title: Turkey Meatballs and Pasta in Broth\n\n...
1,787347fc9b,"[{'id': '589aa53634.jpg', 'url': 'http://img.s...",5ec5605334.jpg,"[{'text': '34 cup plain flour'}, {'text': '1 1...",http://www.food.com/recipe/anzac-biscuits-with...,test,Anzac Biscuits With Macadamias (Australian),[{'text': 'Preheat oven to 160 degrees Celsius...,"[plain flour, oat, white sugar, coconut, macad...","[oat, butter, coconut, syrup, macadamia, sugar...",Title: Turkey Meatballs and Pasta in Broth\n\n...


In [4]:
# define labels
label_list = train_explode_df.explode('cleaned_ingredients').cleaned_ingredients.unique().tolist()
label_list += test_explode_df.explode('cleaned_ingredients').cleaned_ingredients.unique().tolist()
label_list += val_explode_df.explode('cleaned_ingredients').cleaned_ingredients.unique().tolist()
label_list = list(set(label_list))
num_classes = len(label_list)
print(num_classes)

label2idx = {label: idx for idx, label in enumerate(label_list)}
idx2label = {idx: label for idx, label in enumerate(label_list)}

440


In [5]:
classifier = torch.nn.Linear(768, num_classes).to(device)

In [6]:
# read the pooler_output from disk
train_pooler_output = torch.load('train_pooler_output.pt').to(device).squeeze(1)
val_pooler_output = torch.load('val_pooler_output.pt').to(device).squeeze(1)
test_pooler_output = torch.load('test_pooler_output.pt').to(device).squeeze(1)

print(train_pooler_output.shape)
print(val_pooler_output.shape)
print(test_pooler_output.shape)

torch.Size([107156, 768])
torch.Size([13034, 768])
torch.Size([13584, 768])


In [7]:
labels = train_explode_df.iloc[0]['cleaned_ingredients']
class_idx = [label2idx[label] for label in labels]
ohe = F.one_hot(torch.tensor(class_idx), num_classes=num_classes).sum(0).float()

In [8]:
# define dataset and dataloader
class FoodLinearProbeDataset(Dataset):
    def __init__(self, df, img_dir, device):
        self.df = df
        self.img_dir = img_dir
        self.device = device

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):

        labels = self.df.iloc[idx]['cleaned_ingredients']
        class_idx = [label2idx[label] for label in labels]
        ohe = F.one_hot(torch.tensor(class_idx), num_classes=num_classes).sum(0).float().to(device)

        return idx,ohe 

In [9]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = Adam(classifier.parameters(), lr=1e-3)

train_dataset = FoodLinearProbeDataset(train_explode_df, img_dir, device)
val_dataset = FoodLinearProbeDataset(val_explode_df, img_dir, device)
test_dataset = FoodLinearProbeDataset(test_explode_df, img_dir, device)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [10]:
# test the dataloader
for idx, ohe in train_loader:
    print(idx)
    print(ohe.shape)
    break

tensor([65101, 49399])
torch.Size([2, 440])


In [12]:
pred = classifier(train_pooler_output[idx])
pred.shape

torch.Size([2, 440])

In [15]:
pred_np = pred.detach().cpu().numpy()
pred_np_binary = (pred_np > 0.5).astype(int)

In [13]:
ohe_np = ohe.detach().cpu().numpy().astype(int)

In [16]:
from sklearn.metrics import accuracy_score, f1_score, precision_score,recall_score

acc = accuracy_score(pred_np_binary, ohe_np)
mi_f1 = f1_score(pred_np_binary, ohe_np, average='micro')
ma_f1 = f1_score(pred_np_binary, ohe_np, average='macro')
mi_precision = precision_score(pred_np_binary, ohe_np, average='micro')
ma_precision = precision_score(pred_np_binary, ohe_np, average='macro')
mi_recall = recall_score(pred_np_binary, ohe_np, average='micro')
ma_recall = recall_score(pred_np_binary, ohe_np, average='macro')

print(mi_f1)

0.03278688524590164


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score,recall_score

def get_metrics(pred_file: dict, true_file: pd.DataFrame, accu_set: set) -> dict:
    """
    Get the metrics for the accusation prediction
    """
    # ohe for pred dataset
    metrics_dict = {}
    pred_data_ohe = []
    for i in range(len(pred_file)):
        sft_pred = pred_file[str(i)]
        pred_ohe = get_pred_ohe(sft_pred, accu_set)
        pred_data_ohe.append(pred_ohe)
    pred_data_ohe = np.array(pred_data_ohe)

    # ohe for true dataset
    true_data_ohe = []
    for i in range(len(true_file)):
        true_ohe = get_true_ohe(true_file.iloc[i]['defendant_accusation'], accu_set)
        true_data_ohe.append(true_ohe)
    true_data_ohe = np.array(true_data_ohe)

    # get the metrics 
    acc = accuracy_score(true_data_ohe, pred_data_ohe)
    mi_f1 = f1_score(true_data_ohe, pred_data_ohe, average='micro')
    ma_f1 = f1_score(true_data_ohe, pred_data_ohe, average='macro')
    mi_precision = precision_score(true_data_ohe, pred_data_ohe, average='micro')
    ma_precision = precision_score(true_data_ohe, pred_data_ohe, average='macro')
    mi_recall = recall_score(true_data_ohe, pred_data_ohe, average='micro')
    ma_recall = recall_score(true_data_ohe, pred_data_ohe, average='macro')

    metrics_dict = {
        'accuracy': acc, 
        'micro_f1': mi_f1, 
        'macro_f1': ma_f1, 
        'micro_precision': mi_precision, 
        'macro_precision': ma_precision, 
        'micro_recall': mi_recall, 
        'macro_recall': ma_recall
        }
    return metrics_dict