In [1]:
# Use Google Colab
use_colab = True

# Is this notebook running on Colab?
# If so, then google.colab package (github.com/googlecolab/colabtools)
# should be available in this environment

# Previous version used importlib, but we could do the same thing with
# just attempting to import google.colab
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    drive.mount('/content/drive')

    # cd to the appropriate working directory under my Google Drive
    %cd '/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts'
    
    # Install packages specified in requirements
    #!pip install -r requirements.txt

    %cd 'prompts_subtask4'
    
    # List the directory contents
    !ls

Mounted at /content/drive
/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts
/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts/prompts_subtask4
bert_no_prompt_cls_lr_subtask4.ipynb
bert_no_prompt_pooled_lr_subtask4.ipynb
bert_prompt_logit_softmax_subtask4.ipynb
bert_prompt_lr_subtask4.ipynb
collect_results_snippet.ipynb
papermill_launcher_no_prompt_cls_lr.ipynb
papermill_launcher_no_prompt_pooled_lr.ipynb
papermill_launcher_prompt_logit_softmax.ipynb
papermill_launcher_prompt_lr.ipynb
results_bert_16_shot_no_prompt_cls_lr_atsc_restaurants
results_bert_16_shot_no_prompt_pooled_lr_atsc_restaurants
results_bert_16_shot_prompt_logit_softmax_atsc_restaurants
results_bert_16_shot_prompt_lr_concatenate_atsc_restaurants
results_bert_256_shot_no_prompt_cls_lr_atsc_restaurants
results_bert_256_shot_no_prompt_pooled_lr_atsc_restaurants
results_bert_256_shot_prompt_logit_softmax_atsc_restaurants
results_bert_64_shot_no_prompt_cls_lr_atsc_restaurants
results_bert_64_sho

In [1]:
import os
import itertools
import json

import tqdm
import numpy as np
import pandas as pd

In [95]:
# experiment id prefix
experiment_id_prefix = 'bert_prompt_logit_softmax_atsc'

# Random seed
random_seeds = [696, 685, 683, 682, 589]

# path to pretrained MLM model folder or the string "bert-base-uncased"
lm_model_paths = {
    'bert_amazon_om': '../trained_models/lm_further_pretraining_bert_amazon_electronics_original_masking_bseoh_2021-05-08--21_44_30',
    #'bert-base-uncased': 'bert-base-uncased'
}

# Prompts to be added to the end of each review text
# Note: pseudo-labels for each prompt should be given in the order of (positive), (negative), (neutral)
sentiment_prompts = {
    'i_felt': {"prompt": "I felt the {aspect} was [MASK].", "labels": ["good", "bad", "ok"]},
    'i_like': {"prompt": "I [MASK] the {aspect}.", "labels": ["like", "dislike", "ignore"]},
    'made_me_feel': {"prompt": "The {aspect} made me feel [MASK].", "labels": ["good", "bad", "indifferent"]},
    'the_aspect_is': {"prompt": "The {aspect} is [MASK].", "labels": ["good", "bad", "ok"]}
}

run_single_prompt = True
run_multiple_prompts = True

prompts_merge_behavior = 'sum_logits'
prompts_perturb = False

# Training settings
training_domain = 'laptops' # 'laptops', 'restaurants', 'joint'

# Few-shot dataset size
training_dataset_few_shot_size = 256

experiment_id_prefix_override = 'bert_' + str(training_dataset_few_shot_size) + '_shot_' + 'prompt_logit_softmax_atsc'

# Test settings
testing_batch_size = 32
testing_domain = 'laptops'

if testing_domain != training_domain:
    cross_domain = True
else:
    cross_domain = False

experiment_id_prefix_complete = []

In [96]:
if run_single_prompt:    
    for config in tqdm.tqdm(itertools.product(lm_model_paths.keys(), sentiment_prompts.keys())):
        
        lm_model_name, prompt_key = config
        
        # We will use the following string ID to identify this particular (training) experiments
        # in directory paths and other settings
        experiment_id_config = experiment_id_prefix_override + '_'
        experiment_id_config = experiment_id_config + testing_domain + '_'
        
        if cross_domain:
            experiment_id_config = experiment_id_config + 'cross_domain_'

        experiment_id_config = experiment_id_config + lm_model_name + '_'
        experiment_id_config = experiment_id_config + 'single_prompt' + '_'
        experiment_id_config = experiment_id_config + prompt_key + '_'
        
        experiment_id_prefix_complete.append(experiment_id_config)

4it [00:00, 40524.68it/s]


In [97]:
if run_multiple_prompts:
    for config in tqdm.tqdm(itertools.product(lm_model_paths.keys())):
        
        lm_model_name = config[0]
        
        # We will use the following string ID to identify this particular (training) experiments
        # in directory paths and other settings
        experiment_id_config = experiment_id_prefix_override + '_'
        experiment_id_config = experiment_id_config + testing_domain + '_'
        
        if cross_domain:
            experiment_id_config = experiment_id_config + 'cross_domain_'

        experiment_id_config = experiment_id_config + lm_model_name + '_'
        experiment_id_config = experiment_id_config + 'multiple_prompts' + '_'
        
        experiment_id_prefix_complete.append(experiment_id_config)

1it [00:00, 14169.95it/s]


In [93]:
for config in tqdm.tqdm(itertools.product(lm_model_paths.keys())):
    
    lm_model_name = config[0]
    
    # We will use the following string ID to identify this particular (training) experiments
    # in directory paths and other settings
    experiment_id_config = experiment_id_prefix_override + '_'
    experiment_id_config = experiment_id_config + testing_domain + '_'

    if cross_domain:
        experiment_id_config = experiment_id_config + 'cross_domain_'

    experiment_id_config = experiment_id_config + lm_model_name + '_'
    
    experiment_id_prefix_complete.append(experiment_id_config)

1it [00:00, 4391.94it/s]


In [99]:
for prefix in experiment_id_prefix_complete:
    
    test_metrics_all = []

    for seed in random_seeds:
        
        experiment_id = prefix + str(seed)
        
        # Load test scores file
        test_metrics = json.load(open(os.path.join('..', 'trained_models_prompts', experiment_id, 'test_metrics.json'), 'r'))
        
        test_metrics_all.append(test_metrics)

    test_metrics_all = pd.DataFrame(test_metrics_all)
    
    print(prefix)

    print(test_metrics_all)

    accuracy_mean = test_metrics_all.accuracy.mean()

    accuracy_se = test_metrics_all.accuracy.std() / np.sqrt(len(random_seeds))

    f1_mean = test_metrics_all.f1.mean()
    f1_se = test_metrics_all.f1.std() / np.sqrt(len(random_seeds))

    print("Accuracy mean: ", accuracy_mean)
    print("Accuracy se: ", accuracy_se)

    print("F1 mean: ", f1_mean)
    print("F1 se: ", f1_se)
    
    print()

bert_256_shot_prompt_logit_softmax_atsc_laptops_bert_amazon_om_single_prompt_i_felt_
   accuracy        f1  precision    recall
0  0.778997  0.730206   0.741267  0.741094
1  0.758621  0.699426   0.709431  0.720964
2  0.766458  0.726441   0.726430  0.735358
3  0.760188  0.685783   0.733129  0.718769
4  0.760188  0.703977   0.714811  0.725921
Accuracy mean:  0.7648902821316614
Accuracy se:  0.0037747945388063634
F1 mean:  0.7091665643075031
F1 se:  0.00839534265676802

bert_256_shot_prompt_logit_softmax_atsc_laptops_bert_amazon_om_single_prompt_i_like_
   accuracy        f1  precision    recall
0  0.758621  0.690454   0.741165  0.716891
1  0.768025  0.688751   0.778290  0.711180
2  0.752351  0.703561   0.716685  0.719407
3  0.766458  0.695359   0.742216  0.719157
4  0.760188  0.712366   0.717709  0.736771
Accuracy mean:  0.761128526645768
Accuracy se:  0.002830010985151682
F1 mean:  0.6980983140926279
F1 se:  0.004397130066570106

bert_256_shot_prompt_logit_softmax_atsc_laptops_bert_amaz