In [1]:
# Use Google Colab
use_colab = True

# Is this notebook running on Colab?
# If so, then google.colab package (github.com/googlecolab/colabtools)
# should be available in this environment

# Previous version used importlib, but we could do the same thing with
# just attempting to import google.colab
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    drive.mount('/content/drive')

    # cd to the appropriate working directory under my Google Drive
    %cd '/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts'
    
    # Install packages specified in requirements
    #!pip install -r requirements.txt

    %cd 'micro_f1_calc'
    
    # List the directory contents
    !ls

Mounted at /content/drive
/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts
/content/drive/My Drive/cs696ds_lexalytics/Ronald Gypsum Prompts/micro_f1_calc
bert_no_prompt_pooled_lr_micro_f1_calc.ipynb
bert_prompt_logit_softmax_micro_f1_calc.ipynb
collect_results_snippet.ipynb
papermill_launcher_no_prompt_pooled_lr.ipynb
papermill_launcher_prompt_logit_softmax.ipynb
results_bert_16_shot_no_prompt_pooled_lr_atsc_laptops
results_bert_16_shot_no_prompt_pooled_lr_atsc_restaurants
results_bert_16_shot_prompt_logit_softmax_atsc_laptops
results_bert_16_shot_prompt_logit_softmax_atsc_restaurants


In [2]:
import os
import itertools
import json

import tqdm
import numpy as np
import pandas as pd

In [6]:
# experiment id prefix
experiment_id_prefix = 'bert_no_prompt_pooled_lr_micro_f1_calc'

# Random seed
random_seeds = [696, 685, 683, 682, 589]

# path to pretrained MLM model folder or the string "bert-base-uncased"
lm_model_paths = {
    #'bert_yelp_restaurants': '../trained_models/lm_further_pretraining_bert_yelp_restaurants_bseoh_2021-03-22--15_03_31',
    'bert_amazon_electronics': '../progress/lm_further_pretraining_bert_amazon_electronics_bseoh_2021-03-06--18_59_53/results/checkpoint-1180388'
    #'bert-base-uncased': 'bert-base-uncased'
}

# Training settings
training_domain = 'laptops' # 'laptops', 'restaurants', 'joint'

# Few-shot dataset size
training_dataset_few_shot_size = 16

if training_dataset_few_shot_size == -1:
    experiment_id_prefix_override = 'bert_no_prompt_pooled_lr_atsc'
else:
    experiment_id_prefix_override = 'bert_' + str(training_dataset_few_shot_size) + '_shot_' + 'no_prompt_pooled_lr_atsc'

# Test settings
testing_batch_size = 32
testing_domain = 'laptops'

if testing_domain != training_domain:
    cross_domain = True
else:
    cross_domain = False

experiment_id_prefix_complete = []

In [None]:
if run_single_prompt:    
    for config in tqdm.tqdm(itertools.product(lm_model_paths.keys(), sentiment_prompts.keys())):
        
        lm_model_name, prompt_key = config
        
        # We will use the following string ID to identify this particular (training) experiments
        # in directory paths and other settings
        experiment_id_config = experiment_id_prefix_override + '_'
        experiment_id_config = experiment_id_config + testing_domain + '_'
        
        if cross_domain:
            experiment_id_config = experiment_id_config + 'cross_domain_'

        experiment_id_config = experiment_id_config + lm_model_name + '_'
        experiment_id_config = experiment_id_config + 'single_prompt' + '_'
        experiment_id_config = experiment_id_config + prompt_key + '_'
        
        experiment_id_prefix_complete.append(experiment_id_config)

3it [00:00, 2088.45it/s]


In [None]:
if run_multiple_prompts:
    for config in tqdm.tqdm(itertools.product(lm_model_paths.keys())):
        
        lm_model_name = config[0]
        
        # We will use the following string ID to identify this particular (training) experiments
        # in directory paths and other settings
        experiment_id_config = experiment_id_prefix_override + '_'
        experiment_id_config = experiment_id_config + testing_domain + '_'
        
        if cross_domain:
            experiment_id_config = experiment_id_config + 'cross_domain_'

        experiment_id_config = experiment_id_config + lm_model_name + '_'
        experiment_id_config = experiment_id_config + 'multiple_prompts' + '_'
        
        experiment_id_prefix_complete.append(experiment_id_config)

2it [00:00, 1703.96it/s]


In [7]:
for config in tqdm.tqdm(itertools.product(lm_model_paths.keys())):
    
    lm_model_name = config[0]
    
    # We will use the following string ID to identify this particular (training) experiments
    # in directory paths and other settings
    experiment_id_config = experiment_id_prefix_override + '_'
    experiment_id_config = experiment_id_config + testing_domain + '_'

    if cross_domain:
        experiment_id_config = experiment_id_config + 'cross_domain_'

    experiment_id_config = experiment_id_config + lm_model_name + '_'
    
    experiment_id_prefix_complete.append(experiment_id_config)

1it [00:00, 430.94it/s]


In [8]:
for prefix in experiment_id_prefix_complete:
    
    test_metrics_f1 = []
    test_metrics_precision = []
    test_metrics_recall = []

    for seed in random_seeds:
        
        experiment_id = prefix + str(seed)
        
        # Load test scores file
        test_metrics = json.load(open(os.path.join('..', 'trained_models_prompts', experiment_id, 'test_metrics_micro_f1.json'), 'r'))
        
        test_metrics_f1.append(test_metrics['f1'])
        test_metrics_precision.append(test_metrics['precision'])
        test_metrics_recall.append(test_metrics['recall'])

    test_metrics_f1 = pd.DataFrame(test_metrics_f1)
    test_metrics_precision = pd.DataFrame(test_metrics_precision)
    test_metrics_recall = pd.DataFrame(test_metrics_recall)    
    
    print(prefix)

    print('F1')
    print(test_metrics_f1)

    print('Precision')
    print(test_metrics_precision)

    print('Recall')
    print(test_metrics_recall)

    f1_0_mean = test_metrics_f1['0'].mean()
    f1_0_se = test_metrics_f1['0'].std() / np.sqrt(len(random_seeds))

    f1_1_mean = test_metrics_f1['1'].mean()
    f1_1_se = test_metrics_f1['1'].std() / np.sqrt(len(random_seeds))

    f1_2_mean = test_metrics_f1['2'].mean()
    f1_2_se = test_metrics_f1['2'].std() / np.sqrt(len(random_seeds))

    precision_0_mean = test_metrics_precision['0'].mean()
    precision_0_se = test_metrics_precision['0'].std() / np.sqrt(len(random_seeds))

    precision_1_mean = test_metrics_precision['1'].mean()
    precision_1_se = test_metrics_precision['1'].std() / np.sqrt(len(random_seeds))

    precision_2_mean = test_metrics_precision['2'].mean()
    precision_2_se = test_metrics_precision['2'].std() / np.sqrt(len(random_seeds))

    recall_0_mean = test_metrics_recall['0'].mean()
    recall_0_se = test_metrics_recall['0'].std() / np.sqrt(len(random_seeds))

    recall_1_mean = test_metrics_recall['1'].mean()
    recall_1_se = test_metrics_recall['1'].std() / np.sqrt(len(random_seeds))

    recall_2_mean = test_metrics_recall['2'].mean()
    recall_2_se = test_metrics_recall['2'].std() / np.sqrt(len(random_seeds))

    print("F1 0 mean: ", f1_0_mean)
    print("F1 0 se: ", f1_0_se)

    print("F1 1 mean: ", f1_1_mean)
    print("F1 1 se: ", f1_1_se)

    print("F1 2 mean: ", f1_2_mean)
    print("F1 2 se: ", f1_2_se)

    print("precision 0 mean: ", precision_0_mean)
    print("precision 0 se: ", precision_0_se)

    print("precision 1 mean: ", precision_1_mean)
    print("precision 1 se: ", precision_1_se)

    print("precision 2 mean: ", precision_2_mean)
    print("precision 2 se: ", precision_2_se)

    print("recall 0 mean: ", recall_0_mean)
    print("recall 0 se: ", recall_0_se)

    print("recall 1 mean: ", recall_1_mean)
    print("recall 1 se: ", recall_1_se)

    print("recall 2 mean: ", recall_2_mean)
    print("recall 2 se: ", recall_2_se)

    print()

bert_16_shot_no_prompt_pooled_lr_atsc_laptops_bert_amazon_electronics_
F1
          0         1         2
0  0.696099  0.030303  0.000000
1  0.624837  0.030075  0.370370
2  0.529711  0.357282  0.011628
3  0.628159  0.419530  0.000000
4  0.703663  0.300971  0.000000
Precision
          0         1         2
0  0.535545  0.500000  0.000000
1  0.563679  0.400000  0.334928
2  0.629032  0.237726  0.333333
3  0.816901  0.272941  0.000000
4  0.566071  0.397436  0.000000
Recall
          0         1         2
0  0.994135  0.015625  0.000000
1  0.700880  0.015625  0.414201
2  0.457478  0.718750  0.005917
3  0.510264  0.906250  0.000000
4  0.929619  0.242188  0.000000
F1 0 mean:  0.6364935962054823
F1 0 se:  0.03136257565733518
F1 1 mean:  0.22763209654175548
F1 1 se:  0.08275862290644706
F1 2 mean:  0.07639965546942291
F1 2 se:  0.07352716596255852
precision 0 mean:  0.6222458728132699
precision 0 se:  0.05100955197938861
precision 1 mean:  0.36162063441954
precision 1 se:  0.04749655774637349
