In [1]:
from openprompt.data_utils.text_classification_dataset import SST2Processor, AgnewsProcessor
from openprompt.plms import load_plm
from openprompt.prompts import ManualTemplate
from openprompt.prompts import ManualVerbalizer
from openprompt import PromptForClassification
from openprompt import PromptDataLoader

from tqdm import tqdm
import sys

import torch
import time
import logging

import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import wordnet

import os, sys

import random

# Set a random seed
seed = 42
random.seed(seed)

class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout



wordnet.synsets

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# experiment configs!


dataset_name = "sst-2"
split = "test"

model = "roberta"
model_config = "roberta-large"

iteration = 5

inference_count = 0

count_from_highest = 15
# shots = 200
# shots = 64
shots = 32

proportional_decrease=0.5
proportional_increase=2

project = f"project_{count_from_highest}_{shots}_{seed}"

if_testing_small_run = None # should be changed to None
if if_testing_small_run:
    project += '_testrun'







print(dataset_name)
print(split)

dataset = {}
# dataset['train'] = SST2Processor().get_train_examples("./datasets/TextClassification/SST-2")
# dataset['validation'] = SST2Processor().get_dev_examples("./datasets/TextClassification/SST-2")


print('load model...')
plm, tokenizer, model_config, WrapperClass = load_plm(model, model_config)
print("model loaded")


# get specific some info from dataset

full_data_len = 0

if dataset_name == "sst-2":
    temp = SST2Processor().get_test_examples("./datasets/TextClassification/SST-2")
    full_data_len = len(temp)


classes = [ 
    "negative",
    "positive"
]

label_words = {
        "negative": ["terrible"],
        "positive": ["great"],
    }

dataset['test'] = SST2Processor().get_test_examples("./datasets/TextClassification/SST-2")
dataset['test'] = random.sample(dataset['test'], full_data_len)

def get_score(dataset_name,dataset,shots,candidate):


    if dataset_name == "sst-2":

        
        
        if shots:
            temp_dataset = dataset['test'][:shots]
        
        


        
        the_prompt_to_be_found = candidate
        
        template = '{"placeholder": "text_a"} ' + the_prompt_to_be_found  + ' {"mask"}.'






    promptTemplate = ManualTemplate(
        text = template,
        tokenizer = tokenizer,
    )
    promptVerbalizer = ManualVerbalizer(
        classes = classes,
        label_words = label_words,
        tokenizer = tokenizer,
    )
    promptModel = PromptForClassification(
        template = promptTemplate,
        plm = plm,
        verbalizer = promptVerbalizer,
    )
    promptModel=  promptModel.cuda()
    data_loader = PromptDataLoader(
        dataset = temp_dataset,
        tokenizer = tokenizer,
        template = promptTemplate,
        tokenizer_wrapper_class=WrapperClass,
    )
    promptModel.eval()
    allpreds = []
    alllabels = []
    
    # scorer is evaluation

    for step, inputs in tqdm(enumerate(data_loader)):

        inputs = inputs.cuda()
        logits = promptModel(inputs)
        labels = inputs['label']
        alllabels.extend(labels.cpu().tolist())
        allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())


    acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)

    result = f"{the_prompt_to_be_found.strip().ljust(150)}{str(round(acc,4)).ljust(10)}{str(shots).ljust(10)}\n"
    print(result)
    return acc






[nltk_data] Downloading package punkt to /home/jahyun/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jahyun/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


sst-2
test
load model...
model loaded


In [2]:
candidates = [
    "It was very",
    "Absolutely",
    "AbsolutelyAbsolutely",
    "Really downright",
    "Absolutely VERY absolute VERY absolute",
    
]

In [19]:
len(dataset['test'])

1821

In [3]:
for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1407.60it/s]
1821it [01:11, 25.60it/s]


It was very                                                                                                                                           0.8396    1821      



tokenizing: 1821it [00:01, 1506.94it/s]
1821it [01:12, 25.15it/s]


Absolutely                                                                                                                                            0.916     1821      



tokenizing: 1821it [00:01, 1820.54it/s]
1821it [01:12, 25.01it/s]


AbsolutelyAbsolutely                                                                                                                                  0.9006    1821      



tokenizing: 1821it [00:01, 1784.61it/s]
1821it [01:12, 24.96it/s]


Really downright                                                                                                                                      0.9154    1821      



tokenizing: 1821it [00:01, 1656.18it/s]
1821it [01:13, 24.86it/s]

Absolutely VERY absolute VERY absolute                                                                                                                0.9325    1821      






It was very                                                                                                                                           
0.8396    1821      

tokenizing: 1821it [00:01, 1506.94it/s]
1821it [01:12, 25.15it/s]
Absolutely                                                                                                                                            
0.916     1821      

tokenizing: 1821it [00:01, 1820.54it/s]
1821it [01:12, 25.01it/s]
AbsolutelyAbsolutely                                                                                                                                  
0.9006    1821      

tokenizing: 1821it [00:01, 1784.61it/s]
1821it [01:12, 24.96it/s]
Really downright                                                                                                                                      
0.9154    1821      

tokenizing: 1821it [00:01, 1656.18it/s]
1821it [01:13, 24.86it/s]
Absolutely VERY absolute VERY absolute                                                                                                                
0.9325    1821      

In [4]:
candidates = [
    "ĠwidelyĠcriticallyĠpassionatelyĠfirmlyĠwidely",
    "ĠstrategicallyĠfirmly",
    "ĠwidelyĠcritically ĠpassionatelyĠfirmly"
    
]



In [5]:
for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1673.31it/s]
1821it [01:12, 25.12it/s]


ĠwidelyĠcriticallyĠpassionatelyĠfirmlyĠwidely                                                                                                         0.9099    1821      



tokenizing: 1821it [00:01, 1713.67it/s]
1821it [01:13, 24.92it/s]


trategicallyĠfirmly                                                                                                                                   0.883     1821      



tokenizing: 1821it [00:01, 1706.55it/s]
1821it [01:12, 24.96it/s]

widelyĠcritically ĠpassionatelyĠfirmly                                                                                                                0.8462    1821      






In [6]:
candidates = [
    "ĠwidelyĠcriticallyĠpassionatelyĠfirmlyĠwidely",
    "ĠstrategicallyĠfirmly",
    "ĠwidelyĠcritically ĠpassionatelyĠfirmly"
    
]



In [7]:
for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1638.26it/s]
1821it [01:12, 25.25it/s]


ĠwidelyĠcriticallyĠpassionatelyĠfirmlyĠwidely                                                                                                         0.9099    1821      



tokenizing: 1821it [00:01, 1772.75it/s]
1821it [01:12, 24.96it/s]


ĠstrategicallyĠfirmly                                                                                                                                 0.8913    1821      



tokenizing: 1821it [00:01, 1684.90it/s]
1821it [01:13, 24.93it/s]

ĠwidelyĠcritically ĠpassionatelyĠfirmly                                                                                                               0.9083    1821      






In [8]:
candidates = [
    "widelycriticallypassionatelyfirmlywidely",
    "strategicallyfirmly",
    "widelycritically passionatelyfirmly"
    
]



In [9]:
for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1745.82it/s]
1821it [01:10, 25.79it/s]


widelycriticallypassionatelyfirmlywidely                                                                                                              0.9039    1821      



tokenizing: 1821it [00:00, 1835.48it/s]
1821it [01:12, 25.17it/s]


strategicallyfirmly                                                                                                                                   0.8232    1821      



tokenizing: 1821it [00:01, 1647.15it/s]
1821it [01:12, 24.99it/s]

widelycritically passionatelyfirmly                                                                                                                   0.8248    1821      






In [2]:
candidates = [
    "ĠwidelyĠcriticallyĠjointly Ġgenuinely"
]

for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 


tokenizing: 1821it [00:01, 1337.15it/s]
1821it [01:10, 25.86it/s]


ĠwidelyĠcriticallyĠjointly Ġgenuinely                                                                                                                 0.9297    1821      



In [3]:
candidates = [
    "ĠgenuinelyĠunequivocallyĠstrongly "
]

for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 


tokenizing: 1821it [00:01, 1409.20it/s]
1821it [01:10, 25.97it/s]

ĠgenuinelyĠunequivocallyĠstrongly                                                                                                                     0.9319    1821      






In [4]:
candidates = [
    "Absolutely VERY absolute VERY absolute"
]

for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1741.99it/s]
1821it [01:11, 25.40it/s]

Absolutely VERY absolute VERY absolute                                                                                                                0.9325    1821      






In [6]:
candidates = [
    "I find very",
    "I find really"
]

for candidate_prompt in candidates:
    score = get_score(dataset_name,dataset,full_data_len,candidate_prompt) 

tokenizing: 1821it [00:01, 1746.29it/s]
1821it [01:10, 25.96it/s]


I find very                                                                                                                                           0.8699    1821      



tokenizing: 1821it [00:01, 1786.11it/s]
1821it [01:11, 25.43it/s]

I find really                                                                                                                                         0.8402    1821      




