In [None]:
import pandas as pd
import numpy as np
from verbalizer import ManualVerbalizer
# Load Data, Utterance & Label:


def evaluation(validation_dataloader):
    prompt_model.eval()
    labels_list = np.array([])
    pred_list = np.array([])
    for step, inputs in enumerate(validation_dataloader):
        if use_cuda:
            inputs = inputs.cuda()
        logits = prompt_model(inputs)
        labels = inputs['label']
        preds  = torch.argmax(logits, dim=-1).cpu().tolist()
        labels_list = np.append(labels.cpu().tolist(), labels_list)
        pred_list = np.append(preds, pred_list)
    return f1_score(labels_list, pred_list)

for personality in ['A', 'C', 'E', 'O', 'N']:
    df_data = pd.read_csv('data/Friends_'+personality+'_whole.tsv', sep = '\t')
    df = df_data[['utterance', 'labels']]
    df_verbalizer = pd.read_csv('big_five_cleaned.tsv', sep='\t')
    pos = [a.lower() for a in list(df_verbalizer['word'][df_verbalizer[personality]>0])]
    neg = [a.lower() for a in list(df_verbalizer['word'][df_verbalizer[personality]<0])]

    seeds =  [321, 42, 1024, 0, 1, 13, 41, 123, 456, 999]
    best_f1s = []
    for SEED in seeds:
        
        # Train test split:
        from sklearn.model_selection import train_test_split
        Uttr_train, Uttr_test, label_train, label_test = \
            train_test_split(df['utterance'], df['labels'], test_size=0.1, random_state=SEED, stratify=df['labels'])

        Uttr_train, Uttr_valid, label_train, label_valid = \
            train_test_split(Uttr_train, label_train, test_size=0.1, random_state=SEED, stratify=label_train)


        # Construct Samples
        from openprompt.data_utils import InputExample
        dataset = {}
        for split in ['train', 'validation', 'test']:
            dataset[split] = []
            cnt = 0
            if split == 'train':
                for u,l in zip(Uttr_train, label_train):
                    input_sample = InputExample(text_a=u, label=int(l),guid=cnt)
                    cnt += 1
                    dataset[split].append(input_sample)
            elif split == 'validation':
                for u,l in zip(Uttr_valid, label_valid):
                    input_sample = InputExample(text_a=u, label=int(l),guid=cnt)
                    cnt += 1
                    dataset[split].append(input_sample)
            elif split == 'test':
                for u,l in zip(Uttr_test, label_test):
                    input_sample = InputExample(text_a=u, label=int(l),guid=cnt)
                    cnt += 1
                    dataset[split].append(input_sample)
        
        
        SHOT = 1
        from openprompt.data_utils.data_sampler import FewShotSampler
        sampler = FewShotSampler(num_examples_per_label=SHOT, also_sample_dev=True, num_examples_per_label_dev=SHOT)
        dataset['train'], dataset['validation'] = sampler(dataset['train'], seed=SEED)


        # Base Model
        from openprompt.plms import load_plm
        # plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-cased")
        plm, tokenizer, model_config, WrapperClass = load_plm("roberta", "roberta-base")
        # plm, tokenizer, model_config, WrapperClass = load_plm("roberta", "roberta-large")

        ## Construct Template and Wrapping
        '''
        #############
        #############
        #############
        '''
        
        # ***** Manual Template *****
        
        from openprompt.prompts import ManualTemplate
        mytemplate = ManualTemplate(
            text = '{"placeholder":"text_a"} I am a {"mask"} person',
            tokenizer = tokenizer,
        )
        
        # ***** Soft(Mixed) Templeate *****
        '''
        from openprompt.prompts import MixedTemplate
        mytemplate = MixedTemplate(
            model = plm, 
            tokenizer = tokenizer,
            text='{"placeholder":"text_a"} {"soft": "He is"} {"mask"} {"soft": "person"}.')
        '''
        
        # ***** P tuning Templeate *****
        '''
        from openprompt.prompts import PtuningTemplate
        mytemplate = PtuningTemplate(
            model = plm, 
            tokenizer = tokenizer,
        '''
        wrapped_example = mytemplate.wrap_one_example(dataset['train'][0])
        wrapped_tokenizer = WrapperClass(max_seq_length=128, tokenizer=tokenizer,truncate_method="head")


        model_inputs = {}
        for split in ['train', 'validation', 'test']:
            model_inputs[split] = []
            for sample in dataset[split]:
                tokenized_example = wrapped_tokenizer.tokenize_one_example(mytemplate.wrap_one_example(sample), teacher_forcing=False)
                model_inputs[split].append(tokenized_example)


        ## Data Loader
        from openprompt import PromptDataLoader
        batch_size = 1
        train_dataloader = PromptDataLoader(dataset=dataset["train"], template=mytemplate, tokenizer=tokenizer,
            tokenizer_wrapper_class=WrapperClass, max_seq_length=128, 
            batch_size=batch_size, shuffle=True, teacher_forcing=False, predict_eos_token=False,
            truncate_method="head")

        validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=mytemplate, tokenizer=tokenizer,
            tokenizer_wrapper_class=WrapperClass, max_seq_length=128,
            batch_size=batch_size, shuffle=False, teacher_forcing=False, predict_eos_token=False,
            truncate_method="head")

        test_dataloader = PromptDataLoader(dataset=dataset["test"], template=mytemplate, tokenizer=tokenizer,
            tokenizer_wrapper_class=WrapperClass, max_seq_length=128,
            batch_size=batch_size, shuffle=False, teacher_forcing=False, predict_eos_token=False,
            truncate_method="head")


        ## Construct Verbalizer
        '''
        #############
        #############
        #############
        '''
        # from openprompt.prompts import ManualVerbalizer
        import torch

        classes = [0,1]
        # https://www.psychometric-assessment.com/the-lexical-hypothesis-and-factor-models/
        
        
        
        myverbalizer = ManualVerbalizer(
                            classes = classes,
                            label_words = {
                                0 : neg, 
                                1 : pos
                            },
                            tokenizer=tokenizer)
        
        logits = torch.randn(2,len(tokenizer)) # creating a pseudo output from the plm, and

        from openprompt import PromptForClassification
        use_cuda = True
        prompt_model = PromptForClassification(plm=plm, template=mytemplate, verbalizer=myverbalizer, freeze_plm=False)
        if use_cuda:
            prompt_model =  prompt_model.cuda()

        from transformers import  AdamW, get_linear_schedule_with_warmup
        from sklearn.metrics import f1_score
        from sklearn.metrics import confusion_matrix, classification_report

        loss_func = torch.nn.CrossEntropyLoss()
        no_decay = ['bias', 'LayerNorm.weight']
        # it's always good practice to set no decay to biase and LayerNorm parameters
        
        # for name, param in prompt_model.named_parameters(): 
        #     print(name)
        
        optimizer_grouped_parameters = [
            {'params': [p for n, p in prompt_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in prompt_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]


        ## Training
        '''
        #############
        #############
        #############
        '''
        best_eval = 0
        best_test = 0
        optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)
        prompt_model.train()
        for epoch in range(3):
            tot_loss = 0
            for step, inputs in enumerate(train_dataloader):
                if use_cuda:
                    inputs = inputs.cuda()
                logits = prompt_model(inputs)
                labels = inputs['label']
                loss = loss_func(logits, labels)
                loss.backward()
                tot_loss += loss.item()
                optimizer.step()
                optimizer.zero_grad()
                if step % 5 == 0: # if step % 100 == 1:
                    eval_f1 = evaluation(validation_dataloader)
                    if eval_f1 > best_eval:
                        best_eval = eval_f1
                        best_test = evaluation(test_dataloader)
                    prompt_model.train()
        print('Current SEED:', SEED, 'TEST F1', best_test)
        best_f1s.append(best_test)

    print(round(np.mean(best_f1s), 3), '$\pm$', round(np.std(best_f1s), 3))


tokenizing: 2it [00:00, 2074.33it/s]
tokenizing: 2it [00:00, 2243.54it/s]
tokenizing: 72it [00:00, 2303.67it/s]


6
225
Current SEED: 321 TEST F1 0.7256637168141593


tokenizing: 2it [00:00, 2136.68it/s]
tokenizing: 2it [00:00, 1600.88it/s]
tokenizing: 72it [00:00, 2340.55it/s]


6
225
Current SEED: 42 TEST F1 0.6363636363636364


tokenizing: 2it [00:00, 2262.30it/s]
tokenizing: 2it [00:00, 1625.38it/s]
tokenizing: 72it [00:00, 2388.61it/s]


6
225
Current SEED: 1024 TEST F1 0.7027027027027027


tokenizing: 2it [00:00, 1570.02it/s]
tokenizing: 2it [00:00, 1622.55it/s]
tokenizing: 72it [00:00, 2166.79it/s]


6
225
Current SEED: 0 TEST F1 0


Token indices sequence length is longer than the specified maximum sequence length for this model (557 > 512). Running this sequence through the model will result in indexing errors
tokenizing: 2it [00:00, 967.10it/s]
tokenizing: 2it [00:00, 1095.55it/s]
tokenizing: 72it [00:00, 2315.36it/s]


6
225
Current SEED: 1 TEST F1 0.7256637168141593


tokenizing: 2it [00:00, 2011.17it/s]
tokenizing: 2it [00:00, 1821.23it/s]
tokenizing: 72it [00:00, 2214.12it/s]


6
225
Current SEED: 13 TEST F1 0.6972477064220183


tokenizing: 2it [00:00, 2321.14it/s]
tokenizing: 2it [00:00, 1102.46it/s]
tokenizing: 72it [00:00, 2183.46it/s]


6
225
Current SEED: 41 TEST F1 0.47058823529411764


tokenizing: 2it [00:00, 1774.99it/s]
tokenizing: 2it [00:00, 2165.36it/s]
tokenizing: 72it [00:00, 2365.26it/s]


6
225
Current SEED: 123 TEST F1 0.7256637168141593


tokenizing: 2it [00:00, 2441.39it/s]
tokenizing: 2it [00:00, 2002.53it/s]
tokenizing: 72it [00:00, 2274.95it/s]


6
225
Current SEED: 456 TEST F1 0.7058823529411764


tokenizing: 2it [00:00, 1944.51it/s]
tokenizing: 2it [00:00, 2286.97it/s]
tokenizing: 72it [00:00, 2466.59it/s]


6
225
Current SEED: 999 TEST F1 0.7256637168141593
0.612 $\pm$ 0.217


tokenizing: 2it [00:00, 1671.04it/s]
tokenizing: 2it [00:00, 2163.69it/s]
tokenizing: 72it [00:00, 2206.32it/s]


6
216
Current SEED: 321 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 1726.76it/s]
tokenizing: 2it [00:00, 1833.58it/s]
tokenizing: 72it [00:00, 2387.95it/s]


6
216
Current SEED: 42 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 1317.72it/s]
tokenizing: 2it [00:00, 1271.77it/s]
tokenizing: 72it [00:00, 2353.36it/s]


6
216
Current SEED: 1024 TEST F1 0.6138613861386139


tokenizing: 2it [00:00, 1455.85it/s]
tokenizing: 2it [00:00, 1099.71it/s]
tokenizing: 72it [00:00, 2384.29it/s]


6
216
Current SEED: 0 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 1377.89it/s]
tokenizing: 2it [00:00, 1486.81it/s]
tokenizing: 72it [00:00, 2304.90it/s]


6
216
Current SEED: 1 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 1988.29it/s]
tokenizing: 2it [00:00, 2303.93it/s]
tokenizing: 72it [00:00, 2472.21it/s]


6
216
Current SEED: 13 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 1634.57it/s]
tokenizing: 2it [00:00, 2173.21it/s]
tokenizing: 72it [00:00, 2383.32it/s]


6
216
Current SEED: 41 TEST F1 0.6285714285714286


Token indices sequence length is longer than the specified maximum sequence length for this model (557 > 512). Running this sequence through the model will result in indexing errors
tokenizing: 2it [00:00, 1786.33it/s]
tokenizing: 2it [00:00, 2601.93it/s]
tokenizing: 72it [00:00, 2103.05it/s]


6
216
Current SEED: 123 TEST F1 0.6285714285714286


tokenizing: 2it [00:00, 2223.92it/s]
tokenizing: 2it [00:00, 2235.17it/s]
tokenizing: 72it [00:00, 2232.53it/s]


6
216
Current SEED: 456 TEST F1 0.6285714285714286


Token indices sequence length is longer than the specified maximum sequence length for this model (557 > 512). Running this sequence through the model will result in indexing errors
tokenizing: 2it [00:00, 2050.00it/s]
tokenizing: 2it [00:00, 1445.07it/s]
tokenizing: 72it [00:00, 1993.08it/s]


6
216
Current SEED: 999 TEST F1 0.6285714285714286
0.627 $\pm$ 0.004


tokenizing: 2it [00:00, 1536.94it/s]
tokenizing: 2it [00:00, 1489.98it/s]
tokenizing: 72it [00:00, 2233.70it/s]


6
213
Current SEED: 321 TEST F1 0.6153846153846153


tokenizing: 2it [00:00, 1553.73it/s]
tokenizing: 2it [00:00, 894.59it/s]
tokenizing: 72it [00:00, 2211.60it/s]


6
213
Current SEED: 42 TEST F1 0.6213592233009708


tokenizing: 2it [00:00, 1846.49it/s]
tokenizing: 2it [00:00, 1833.98it/s]
tokenizing: 72it [00:00, 2412.31it/s]


6
213
Current SEED: 1024 TEST F1 0.6153846153846153


tokenizing: 2it [00:00, 1471.94it/s]
tokenizing: 2it [00:00, 2011.17it/s]
tokenizing: 72it [00:00, 2370.85it/s]


6
213
Current SEED: 0 TEST F1 0.5531914893617021


tokenizing: 2it [00:00, 1821.23it/s]
tokenizing: 2it [00:00, 1346.05it/s]
tokenizing: 72it [00:00, 2520.07it/s]


6
213
Current SEED: 1 TEST F1 0.6213592233009708


tokenizing: 2it [00:00, 1628.22it/s]
tokenizing: 2it [00:00, 2222.74it/s]
tokenizing: 72it [00:00, 2339.05it/s]


6
213
Current SEED: 13 TEST F1 0


tokenizing: 2it [00:00, 2105.57it/s]
tokenizing: 2it [00:00, 1586.05it/s]
tokenizing: 72it [00:00, 2292.79it/s]


6
213
Current SEED: 41 TEST F1 0.6153846153846153


tokenizing: 2it [00:00, 1268.69it/s]
tokenizing: 2it [00:00, 1810.62it/s]
tokenizing: 72it [00:00, 2206.21it/s]


6
213
Current SEED: 123 TEST F1 0.6153846153846153


tokenizing: 2it [00:00, 1670.37it/s]
tokenizing: 2it [00:00, 2261.69it/s]
tokenizing: 72it [00:00, 2151.68it/s]


6
213
Current SEED: 456 TEST F1 0.6153846153846153


tokenizing: 2it [00:00, 1525.76it/s]
tokenizing: 2it [00:00, 1873.29it/s]
tokenizing: 72it [00:00, 2244.24it/s]


6
213
Current SEED: 999 TEST F1 0
0.487 $\pm$ 0.244


tokenizing: 2it [00:00, 2528.97it/s]
tokenizing: 2it [00:00, 1414.37it/s]
tokenizing: 72it [00:00, 2327.28it/s]


6
249
Current SEED: 321 TEST F1 0.4285714285714286


tokenizing: 2it [00:00, 2043.51it/s]
tokenizing: 2it [00:00, 2133.42it/s]
tokenizing: 72it [00:00, 2173.78it/s]


6
249
Current SEED: 42 TEST F1 0
