In [None]:
!pip install 'transformers[torch]'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers[torch]
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers[torch])
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[torch])
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.19.0 (from transformers[torch])
  Downloading accelerate-0.19.0-py3-none-any.whl (219 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
PROJECT_ROOT = f"/content/gdrive/MyDrive/GPT-VLU"
%load_ext autoreload
%autoreload 2

In [None]:
import sys, os
sys.path.insert(0, PROJECT_ROOT)

In [None]:
from transformers import TFGPT2LMHeadModel
from transformers import GPT2Tokenizer
from transformers import pipeline, set_seed

from utils.setup_utils import parse_and_validate_df, DFExperimentInfo

import pandas as pd
import numpy as np
from dataclasses import fields
import torch
from sklearn.metrics import accuracy_score

from tqdm import tqdm

In [None]:
# Loading data
dataset_path = '/content/gdrive/MyDrive/GPT-VLU/datasets/shape_association.csv'

task = 'choice'
df = parse_and_validate_df(dataset_path, task)
experiment_df = pd.DataFrame(columns=[fields(DFExperimentInfo)])

df.head()

Unnamed: 0,word,gt,options
0,wedge,triangle,"['triangle', 'rectangle', 'circle']"
1,flag,rectangle,"['triangle', 'rectangle', 'circle']"
2,carrot,triangle,"['triangle', 'rectangle', 'circle']"
3,wheel,circle,"['triangle', 'rectangle', 'circle']"
4,towel,rectangle,"['triangle', 'rectangle', 'circle']"


In [None]:
shapes_orig_prompts = [
    "a photo of a MASK shaped WORD",
    "a photo of a MASK WORD",
    "a photo of the MASK WORD",
    "a MASK WORD",
    "the MASK WORD",
    "an image of a MASK WORD",
    "a WORD usually has a MASK shape",
    "WORDs commonly have a MASK shape",
    "the basic shape of a WORD is MASK",
    "what is the shape of a WORD? MASK"
  ]

## Initialize the model - GPT2

In [None]:
generator = pipeline('text-generation', model='gpt2')
set_seed(42)

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [None]:
generator_large = pipeline('text-generation', model='gpt2-large')
set_seed(42)

Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/3.25G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
set_seed(42)
feature_extractor = pipeline('feature-extraction', model='gpt2')

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
set_seed(42)
feature_extractor_large = pipeline('feature-extraction', model='gpt2-large')

Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/3.25G [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def score_model(fe_model, orig_prompts):
  y_true = []
  y_pred = []

  for index, row in tqdm(df.iterrows()):
    w = row['word']
    for p in orig_prompts:
      p = p.replace("WORD", w)

      shapes_options = ['triangle', ' rectangle', ' circle']

      options_results = [] 
      for option in shapes_options:
        input_prompt = p.replace("MASK", option)
        v = torch.tensor(fe_model(input_prompt, max_new_tokens=0, num_return_sequences=0, pad_token_id=50256)).squeeze().mean(axis=0).squeeze()
        v /= v.norm(p=2, dim=-1, keepdim=True)
        v = v.cpu().numpy()
        options_results.append(v) 
        
      base = torch.tensor(fe_model(p.replace("MASK", ''), 
                                   max_new_tokens=0, num_return_sequences=0, 
                                   pad_token_id=50256)).squeeze().mean(axis=0).squeeze()
      base /= base.norm(p=2, dim=-1, keepdim=True)
      base = base.cpu().numpy()

      scores = np.array(options_results) @ np.expand_dims(base, -1)
      ind_class = np.argmax(scores)
      y_pred.append(shapes_options[ind_class])
      y_true.append(row['gt'])

  return y_true, y_pred

In [None]:
y_true, y_pred = score_model(feature_extractor, shapes_orig_prompts)
accuracy_score(y_true, y_pred)

109it [06:23,  3.51s/it]


0.01651376146788991

In [None]:
y_true, y_pred = score_model(feature_extractor_large, shapes_orig_prompts)
accuracy_score(y_true, y_pred)

109it [52:40, 29.00s/it]


0.0

## Shapes

TODO: I can test without restricting the size of the outcome.

Possible extra prompts for this:
1. ... instead of MASK, should be 
2. ... 
3. ... fill in the MASK 
4. ... choose one option from the list: <> to replace MASK 
5. ... replace the MASK with correct word
6. ...

In [None]:
def retrieve_results(gen, llm_prompt, orig_prompts, max_new=1):
  y_true = []
  y_pred = []

  for index, row in df.iterrows():
      w = row['word']
      for p in orig_prompts:
        p = p.replace("WORD", w)
        input_prompt = p + llm_prompt
        out_p = gen(input_prompt, max_new_tokens=max_new, num_return_sequences=1, pad_token_id=50256)[0]['generated_text']
        out_p = out_p.replace(input_prompt, '').replace(' ', '')
        y_pred.append(out_p.lower())
        y_true.append(row['gt'])

  return y_true, y_pred

## 1. instead of MASK, should be

In [None]:
gpt_extra_prompt1 = '. instead of MASK, should be'

y_true1, y_pred1 = retrieve_results(generator, gpt_extra_prompt1, shapes_orig_prompts)
accuracy_score(y_true1, y_pred1)

0.0

In [None]:
gpt_extra_prompt1 = '. instead of MASK, should be'

y_true1, y_pred1 = retrieve_results(generator_large, gpt_extra_prompt1, shapes_orig_prompts)
accuracy_score(y_true1, y_pred1)

0.0

## 2. Empty. Use only two last promts:

**"the basic shape of a WORD is",
"what is the shape of a WORD?"**

In [None]:
last_two_shape_prompts = ["the basic shape of a WORD is", "what is the shape of a WORD?"]
gpt_extra_prompt2 = ''

retrieve_results(generator, gpt_extra_prompt2, last_two_shape_prompts)

GT: rectangle : the basic shape of a flag is 
result:   not 

GT: rectangle : what is the shape of a flag? 
result:  
 



In [None]:
# With GPT2 large
last_two_shape_prompts = ["the basic shape of a WORD is", "what is the shape of a WORD?"]
gpt_extra_prompt2 = ''

retrieve_results(generator_large, gpt_extra_prompt2, last_two_shape_prompts)

GT: rectangle : the basic shape of a flag is 
result:  , 

GT: rectangle : what is the shape of a flag? 
result:  
 



In [None]:
# With GPT2 large
last_two_shape_prompts = ["the basic shape of a WORD is", "what is the shape of a WORD?"]
gpt_extra_prompt2 = ''

retrieve_results(generator_large, gpt_extra_prompt2, last_two_shape_prompts, max_new=2)



GT: rectangle : the basic shape of a flag is 
result:  :
 

GT: rectangle : what is the shape of a flag? 
result:   A flag 



## 3.  Fill in the MASK 

In [None]:
gpt_extra_prompt3 = '.  Fill in the MASK'
y_true3, y_pred3 = retrieve_results(generator, gpt_extra_prompt3, shapes_orig_prompts)
accuracy_score(y_true3, y_pred3)

0.001834862385321101

In [None]:
gpt_extra_prompt3 = '.  Fill in the MASK'
y_true3, y_pred3 = retrieve_results(generator_large, gpt_extra_prompt3, shapes_orig_prompts)
accuracy_score(y_true3, y_pred3)

0.0009174311926605505

In [None]:
gpt_extra_prompt3 = '. MASK can be replaced with'
y_true3, y_pred3 = retrieve_results(generator, gpt_extra_prompt3, shapes_orig_prompts)
accuracy_score(y_true3, y_pred3)

0.0

In [None]:
gpt_extra_prompt3 = '. MASK can be replaced with'
y_true3, y_pred3 = retrieve_results(generator_large, gpt_extra_prompt3, shapes_orig_prompts)
accuracy_score(y_true3, y_pred3)

0.0

## 4. choose one option from the list: <> to replace MASK

In [None]:
def retrieve_results_options(gen, orig_prompts):
  y_true = []
  y_pred = []

  for index, row in tqdm(df.iterrows()):
      w = row['word']

      for p in orig_prompts:
        p = p.replace("WORD", w)
        llm_prompt = '. Choose one word from: {} to replace MASK'.format(row['options'])
        input_prompt = p + llm_prompt
        out_p = gen(input_prompt, max_new_tokens=1, num_return_sequences=1, pad_token_id=50256)[0]['generated_text']
        out_p = out_p.replace(input_prompt, '').replace(' ', '')
        y_pred.append(out_p.lower())
        y_true.append(row['gt'])

  return y_true, y_pred


In [None]:
y_true4, y_pred4 = retrieve_results_options(generator, shapes_orig_prompts)
accuracy_score(y_true4, y_pred4)

109it [02:57,  1.63s/it]


0.0

In [None]:
y_true4, y_pred4 = retrieve_results_options(generator_large, shapes_orig_prompts)
accuracy_score(y_true4, y_pred4)

109it [16:45,  9.23s/it]


0.0

In [None]:
def retrieve_results_options_n(gen, llm_prompt, orig_prompts, max_new=1, n=5):
  for index, row in df.iloc[1:].iterrows():
    w = row['word']
    gt = row['gt']
    options = row['options']

    correct_guesses = 0
    for p in orig_prompts:
      p = p.replace("WORD", w)
      llm_prompt = '. Choose one word from: {} to replace MASK'.format(options)
      input_prompt = p + llm_prompt

      list_out_options = []
      out_p = gen(input_prompt, max_new_tokens=max_new, num_return_sequences=n, pad_token_id=50256)
      for i in range(n):
        out_i = out_p[i]['generated_text']
        out_i = out_i.replace(input_prompt, '')
        list_out_options.append(out_i)
      
      print('GT:', gt, ':', input_prompt, '\nresult: ', list_out_options, '\n')
      if out_p == gt:
        correct_guesses += 1
    break

In [None]:
retrieve_results_options_n(generator, last_two_shape_prompts, shapes_orig_prompts)

GT: rectangle : a photo of a MASK shaped flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  [' word', ' with', ' at', ':', '/'] 

GT: rectangle : a photo of a MASK flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  [' with', ' with', "'s", ' flag', "'s"] 

GT: rectangle : a photo of the MASK flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  [' in', ' with', ' with', ' flags', ' with'] 

GT: rectangle : a MASK flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  [' words', ' image', ' with', '\n', ' symbol'] 

GT: rectangle : MASK flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  [',', 'FLAG', "'s", ' symbol', " '"] 

GT: rectangle : an image of a MASK flag. Choose one word from: ['triangle', 'rectangle', 'circle'] to replace MASK 
result:  ['.', ' images', ' flag', ':', ' flag'] 

GT: rectangle