# FactorCL on IRFL

##Preparation

In [1]:
!pip install datasets
!pip install transformers

Collecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, huggingface-hub, datasets
Successfully installed datasets-2.1

In [2]:
!git clone https://github.com/irfl-dataset/IRFL

Cloning into 'IRFL'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 49 (delta 1), reused 12 (delta 1), pack-reused 36[K
Receiving objects: 100% (49/49), 45.70 MiB | 10.98 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [3]:
import torch
import numpy as np
import pandas as pd
import json

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from datasets import load_dataset

import PIL.Image as Image
import requests
from urllib.request import urlopen

In [4]:
!git clone https://github.com/pliang279/FactorCL

Cloning into 'FactorCL'...
remote: Enumerating objects: 104, done.[K
remote: Counting objects: 100% (104/104), done.[K
remote: Compressing objects: 100% (96/96), done.[K
remote: Total 104 (delta 47), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (104/104), 268.97 KiB | 9.96 MiB/s, done.
Resolving deltas: 100% (47/47), done.


In [5]:
%cd FactorCL

/content/FactorCL


In [6]:
import os
import sys
from torch.utils.data import DataLoader
from sklearn.linear_model import LogisticRegression
from datasets import load_dataset
from transformers import AutoProcessor, CLIPModel

from IRFL_model import*

##IRFL Dataset

In [7]:
simile_df = pd.read_csv('/content/IRFL/assets/tasks/simile_understanding_task.csv')
idiom_df = pd.read_csv('/content/IRFL/assets/tasks/idiom_understanding_task.csv')
metaphor_df = pd.read_csv('/content/IRFL/assets/tasks/metaphor_understanding_task.csv')

In [8]:
def process_df(df):
  distractors_urls = df['distractors'].to_list()
  answers_urls = df['distractors'].to_list()
  phrases = df['phrase'].to_list()
  fig_types = df['figurative_type'].to_list()

  distractors = []
  answers = []
  texts = []
  types = []

  for i in range(len(distractors_urls)):
    print(f'{i}/{len(distractors_urls)}')
    try:
      d_urls = distractors_urls[i]
      distractor = [Image.open(urlopen(url)) for url in eval(d_urls)]

      a_urls = answers_urls[i]
      answer = Image.open(urlopen(eval(a_urls)[0]))

      text = phrases[i]
      fig_type = fig_types[i]

      distractors.append(distractor)
      answers.append(answer)
      texts.append(text)
      types.append(fig_type)
    except:
      continue

  return distractors, answers, texts, types



def collate_fn(batch):
    images = [data[0] for data in batch]
    texts = [data[1] for data in batch]
    labels = [data[2] for data in batch]

    return images, texts, torch.tensor(labels, dtype=int)


class FigTypeDataset(Dataset):
    def __init__(self, answers, texts, types):
        self.types= types
        self.images = answers
        self.texts = texts

        self.type_map = {'idiom': 0, 'simile': 1, 'metaphor': 2}

        self.labels = list(map(lambda x: self.type_map[x], self.types))


    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.texts[idx], self.labels[idx]

In [16]:
def process_fn(batch):
    images, texts, contrastive_labels = batch
    batch = processor(images=images, text=texts, padding=True, return_tensors='pt')

    return batch, contrastive_labels


def get_embeds(model, processor, train_loader, test_loader):
    train_embeds = []
    train_labels = []
    test_embeds = []
    test_labels = []
    for i_batch, x in enumerate(train_loader):

        inputs, label = process_fn(x)
        inputs, label = inputs.to(device), label.to(device)

        outputs = model(**inputs)
        image_embeds = outputs.image_embeds.detach().cpu().numpy()
        text_embeds = outputs.text_embeds.detach().cpu().numpy()

        embeds = np.concatenate([image_embeds, text_embeds], axis=1)
        train_embeds.append(embeds)
        train_labels.append(label.detach().cpu().numpy())

    for i_batch, x in enumerate(test_loader):

        inputs, label = process_fn(x)
        inputs, label = inputs.to(device), label.to(device)

        outputs = model(**inputs)
        image_embeds = outputs.image_embeds.detach().cpu().numpy()
        text_embeds = outputs.text_embeds.detach().cpu().numpy()

        embeds = np.concatenate([image_embeds, text_embeds], axis=1)
        test_embeds.append(embeds)
        test_labels.append(label.detach().cpu().numpy())

    train_embeds = np.concatenate(train_embeds, axis=0)
    test_embeds = np.concatenate(test_embeds, axis=0)
    train_labels = np.concatenate(train_labels, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)

    return train_embeds, train_labels, test_embeds, test_labels

In [10]:
distractors_simile, answers_simile, texts_simile, types_simile = process_df(simile_df)
distractors_idiom, answers_idiom, texts_idiom, types_idiom = process_df(idiom_df)
distractors_metaphor, answers_metaphor, texts_metaphor, types_metaphor = process_df(metaphor_df)

0/277
1/277
2/277
3/277
4/277
5/277
6/277
7/277
8/277
9/277
10/277
11/277
12/277
13/277
14/277
15/277
16/277
17/277
18/277
19/277
20/277
21/277
22/277
23/277
24/277
25/277
26/277
27/277
28/277
29/277
30/277
31/277
32/277
33/277
34/277
35/277
36/277
37/277
38/277
39/277
40/277
41/277
42/277
43/277
44/277
45/277
46/277
47/277
48/277
49/277
50/277
51/277
52/277
53/277
54/277
55/277
56/277
57/277
58/277
59/277
60/277
61/277
62/277
63/277
64/277
65/277
66/277
67/277
68/277
69/277
70/277
71/277
72/277
73/277
74/277
75/277
76/277
77/277
78/277
79/277
80/277
81/277
82/277
83/277
84/277
85/277
86/277
87/277
88/277
89/277
90/277
91/277
92/277
93/277
94/277
95/277
96/277
97/277
98/277
99/277
100/277
101/277
102/277
103/277
104/277
105/277
106/277
107/277
108/277
109/277
110/277
111/277
112/277
113/277
114/277
115/277
116/277
117/277
118/277
119/277
120/277
121/277
122/277
123/277
124/277
125/277
126/277
127/277
128/277
129/277
130/277
131/277
132/277
133/277
134/277
135/277
136/277
137/277
138/27

In [11]:
distractors = distractors_idiom + distractors_simile + distractors_metaphor
answers = answers_idiom + answers_simile + answers_metaphor
texts = texts_idiom + texts_simile + texts_metaphor
types = types_idiom + types_simile + types_metaphor

In [12]:
batch_size = 16

dataset = FigTypeDataset(answers, texts, types)

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.8*len(dataset)), len(dataset)-int(0.8*len(dataset))])
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, collate_fn=collate_fn)

In [14]:
device = 'cuda'

##FactorCL-SUP

In [19]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

factorcl_sup = FactorCLSUP(model, processor, [512,512], 3, device, lr=1e-6).to(device)
factorcl_sup.train()

train_sup_model(factorcl_sup, train_loader, num_epoch=10, num_club_iter=1)

model.eval()
train_embeds, train_labels, test_embeds, test_labels = get_embeds(model, processor, train_loader, test_loader)

clf = LogisticRegression(max_iter=200).fit(train_embeds, train_labels)
score = clf.score(test_embeds, test_labels)

iter:  0  i_batch:  0  loss:  -0.0011595366522669792




iter:  1  i_batch:  0  loss:  -0.002346084453165531
iter:  2  i_batch:  0  loss:  -0.0030839326791465282
iter:  3  i_batch:  0  loss:  -0.0037324423901736736
iter:  4  i_batch:  0  loss:  -0.0055291056632995605
iter:  5  i_batch:  0  loss:  -0.006623566150665283
iter:  6  i_batch:  0  loss:  -0.007975934073328972
iter:  7  i_batch:  0  loss:  -0.010226668789982796
iter:  8  i_batch:  0  loss:  -0.012427756562829018
iter:  9  i_batch:  0  loss:  -0.014025865122675896


In [20]:
score

1.0

##FactorCL-SSL

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

factorcl_ssl = FactorCLSSL(model, processor, [512,512], 3, device, lr=1e-6).to(device)
factorcl_ssl.train()

train_ssl_model(factorcl_ssl, train_loader, num_epoch=10, num_club_iter=1)

model.eval()
train_embeds, train_labels, test_embeds, test_labels = get_embeds(model, processor, train_loader, test_loader)

clf = LogisticRegression(max_iter=200).fit(train_embeds, train_labels)
score = clf.score(test_embeds, test_labels)

In [22]:
score

0.9324324324324325

##SimCLR

In [23]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Set use_label=True for SupCon
simclr_model = SupConModel(model, processor, 0.5, [512,512], [512,512], use_label=False).to(device)
simclr_model.train()

optimizer = optim.Adam(simclr_model.parameters(), lr=1e-6)

train_supcon(simclr_model, train_loader, optimizer, num_epoch=10)

model.eval()
train_embeds, train_labels, test_embeds, test_labels = get_embeds(model, processor, train_loader, test_loader)

clf = LogisticRegression(max_iter=200).fit(train_embeds, train_labels)
score = clf.score(test_embeds, test_labels)

iter:  0  i_batch:  0  loss:  28.418033599853516




iter:  1  i_batch:  0  loss:  27.95946502685547
iter:  2  i_batch:  0  loss:  27.49124526977539
iter:  3  i_batch:  0  loss:  27.05012321472168
iter:  4  i_batch:  0  loss:  26.655895233154297
iter:  5  i_batch:  0  loss:  26.386932373046875
iter:  6  i_batch:  0  loss:  26.163578033447266
iter:  7  i_batch:  0  loss:  25.994287490844727
iter:  8  i_batch:  0  loss:  25.879667282104492
iter:  9  i_batch:  0  loss:  25.78017234802246


In [24]:
score

0.8918918918918919