# FactorCL on IRFL

##Preparation

In [None]:
!pip install datasets
!pip install transformers

Cloning into 'MultiBench'...
remote: Enumerating objects: 6925, done.[K
remote: Counting objects: 100% (136/136), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 6925 (delta 62), reused 123 (delta 60), pack-reused 6789[K
Receiving objects: 100% (6925/6925), 51.06 MiB | 17.93 MiB/s, done.
Resolving deltas: 100% (4248/4248), done.


In [None]:
!git clone https://github.com/irfl-dataset/IRFL

/content/MultiBench


In [None]:
import torch
import numpy as np
import pandas as pd
import json

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from datasets import load_dataset

import PIL.Image as Image
import requests
from urllib.request import urlopen

In [None]:
!git clone https://github.com/pliang279/FactorCL

Cloning into 'FactorCL'...
remote: Enumerating objects: 37, done.[K
remote: Counting objects: 100% (37/37), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 37 (delta 14), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (37/37), 20.62 KiB | 1.87 MiB/s, done.


In [None]:
%cd FactorCL

In [None]:
import os
import sys
from torch.utils.data import DataLoader
from sklearn.linear_model import LogisticRegression
from datasets import load_dataset
from transformers import AutoProcessor, CLIPModel

from IRFL_model import*

##IRFL Dataset

In [None]:
simile_df = pd.read_csv('/content/IRFL/assets/tasks/simile_understanding_task.csv')
idiom_df = pd.read_csv('/content/IRFL/assets/tasks/idiom_understanding_task.csv')
metaphor_df = pd.read_csv('/content/IRFL/assets/tasks/metaphor_understanding_task.csv')

In [None]:
def process_df(df):
  distractors_urls = df['distractors'].to_list()
  answers_urls = df['distractors'].to_list()
  phrases = df['phrase'].to_list()
  fig_types = df['figurative_type'].to_list()

  distractors = []
  answers = []
  texts = []
  types = []

  for i in range(len(distractors_urls)):
    print(f'{i}/{len(distractors_urls)}')
    try:
      d_urls = distractors_urls[i]
      distractor = [Image.open(urlopen(url)) for url in eval(d_urls)]

      a_urls = answers_urls[i]
      answer = Image.open(urlopen(eval(a_urls)[0]))

      text = phrases[i]
      fig_type = fig_types[i]
      
      distractors.append(distractor)
      answers.append(answer)
      texts.append(text)
      types.append(fig_type)
    except:
      continue
  
  return distractors, answers, texts, types

def collate_fn(batch):
    #return torch.cat([data[0] for data in batch]), torch.stack([data[1] for data in batch])

    images = [data[0] for data in batch]
    texts = [data[1] for data in batch]
    labels = [data[2] for data in batch]

    return images, texts, torch.tensor(labels, dtype=int)

def process_fn(batch):
    images, texts, labels = batch
    batch = processor(images=images, text=texts, padding=True, return_tensors='pt')

    return batch, labels

class FigTypeDataset(Dataset):
    def __init__(self, answers, texts, types):
        self.types= types
        self.images = answers
        self.texts = texts
        
        self.type_map = {'idiom': 0, 'simile': 1, 'metaphor': 2}

        self.labels = list(map(lambda x: self.type_map[x], self.types))


    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.texts[idx], self.labels[idx]

In [None]:
def get_embeds(model, processor, train_loader, test_loader):
    train_embeds = []
    train_labels = []
    test_embeds = []
    test_labels = []
    for i_batch, x in enumerate(train_loader):

        inputs, label = process_fn(x)
        inputs, label = inputs.to(device), label.to(device)

        outputs = model(**inputs)
        image_embeds = outputs.image_embeds.detach().cpu().numpy()
        text_embeds = outputs.text_embeds.detach().cpu().numpy()

        embeds = np.concatenate([image_embeds, text_embeds], axis=1)
        train_embeds.append(embeds)
        train_labels.append(label.detach().cpu().numpy())

    for i_batch, x in enumerate(test_loader):

        inputs, label = process_fn(x)
        inputs, label = inputs.to(device), label.to(device)

        outputs = model(**inputs)
        image_embeds = outputs.image_embeds.detach().cpu().numpy()
        text_embeds = outputs.text_embeds.detach().cpu().numpy()

        embeds = np.concatenate([image_embeds, text_embeds], axis=1)
        test_embeds.append(embeds)
        test_labels.append(label.detach().cpu().numpy())

    train_embeds = np.concatenate(train_embeds, axis=0)
    test_embeds = np.concatenate(test_embeds, axis=0)
    train_labels = np.concatenate(train_labels, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)

    return train_embeds, train_labels, test_embeds, test_labels

In [None]:
distractors_simile, answers_simile, texts_simile, types_simile = process_df(simile_df)
distractors_idiom, answers_idiom, texts_idiom, types_idiom = process_df(idiom_df)
distractors_metaphor, answers_metaphor, texts_metaphor, types_metaphor = process_df(metaphor_df)

In [None]:
distractors = distractors_idiom + distractors_simile + distractors_metaphor
answers = answers_idiom + answers_simile + answers_metaphor
texts = texts_idiom + texts_simile + texts_metaphor
types = types_idiom + types_simile + types_metaphor

In [None]:
batch_size = 16

dataset = FigTypeDataset(answers, texts, types)

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.8*len(dataset)), len(dataset)-int(0.8*len(dataset))])
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, collate_fn=collate_fn)

##FactorCL-SUP

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

rus_model = RUSModel(model, processor, [512,512], 3, device, lr=1e-6).to(device)
rus_model.train()

train_rusmodel(rus_model, train_loader, num_epoch=10, num_club_iter=1)

model.eval()
train_embeds, train_labels, test_embeds, test_labels = get_embeds(model, processor, train_loader, test_loader)

clf = LogisticRegression(max_iter=200).fit(train_embeds, train_labels)
score = clf.score(test_embeds, test_labels)

In [None]:
score

##SimCLR

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

simclr_model = SupConResNet(model, processor, 0.5, [512,512], [512,512]).to(device)
simclr_model.train()

optimizer = optim.Adam(simclr_model.parameters(), lr=1e-6)

train_supcon(simclr_model, train_loader, optimizer, num_epoch=10)

model.eval()
train_embeds, train_labels, test_embeds, test_labels = get_embeds(model, processor, train_loader, test_loader)

clf = LogisticRegression(max_iter=200).fit(train_embeds, train_labels)
score = clf.score(test_embeds, test_labels)

In [None]:
score