In [1]:
import os

import numpy as np
from sklearn.linear_model import LogisticRegression

from utils import load_hidden_representations_from_hdf5, read_templates_from_file

----

In [4]:
# params
log_dir = "/logfiles"
model = "bigscience-T0_3B" # bigscience-T0_B or bigscience-T0
module = "encoder" # encoder
# task = "rte"
task = "cb"
# task = "wic"

In [3]:
assert module == "encoder" # TODO(mm): support decoder as well

## Prompts

In [5]:
df = read_templates_from_file(f"/t0-analysis/prompts/{task}.csv")
display(df)

Unnamed: 0,name,template,category,includes_labels,shuffle
0,null_pattern,{premise} {hypothesis},neutral,False,False
1,null_pattern_reversed,{hypothesis} {premise},neutral,False,False
2,gpt_3_true_false_neither,"{premise} Question: {hypothesis} True, False, ...",instructive,True,False
3,gpt_3_yes_no_maybe,"{premise} Question: {hypothesis} Yes, No, or M...",instructive,True,False
4,mnli_crowdsource,{premise} Using only the above description and...,instructive,True,False
5,always_sometimes_never,"Suppose it's true that {premise} Then, is ""{hy...",instructive,True,False
6,based_on_previous_passage,"{premise} Based on the previous passage, is it...",instructive,True,False
7,infer,"Suppose {premise} Can we infer that ""{hypothes...",instructive,True,False
8,claim,"{premise} Based on that information, is the cl...",instructive,True,False
9,consider,"{premise} Keeping in mind the above text, cons...",instructive,True,False


## Train linear classifier

In [6]:
def unison_shuffled_copies(a, b):
    # from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [7]:
# RTE patterns
# use_pattern = [
#     "null_pattern",
#     "null_pattern_reversed",
#     "gpt_3_yes_no",
#     "gpt_3_yes_no_shuffled",
#     "gpt_3_true_false",
#     "gpt_3_true_false_shuffled",
#     "start_with_the",
#     "mnli_crowdsource",
#     "based_on_previous_passage",
#     "infer",
#     "follow",
#     "imply",
#     "guaranteed",
#     "justified", 
#     "must_be_true",
#     "should_assume"
# ]

# CB patterns
use_pattern = [
    # "null_pattern",
    # "null_pattern_reversed",
    "gpt_3_true_false_neither",
    "gpt_3_yes_no_maybe",
    # "mnli_crowdsource",
    # "always_sometimes_never",
    # "based_on_previous_passage",
    # "infer",
    # "claim",
    # "consider",
    # "follow",
    # "imply",
    # "guaranteed",
    # "guaranteed_possible",
    # "justified",
    # "must_be_true",
    # "should_assume",
    # "take_the_following",
]

# WIC patterns
# use_pattern = [
#     "gpt_3",
#     "gpt_3_yes_no",
#     "affirmation",
#     "grammar_homework",
#     "polysemous",
#     "question_context",
#     "question_meaning",
#     "question_meaning_yes_no",
#     "same_sense",
#     "similar_sense",
#     "similar_sense_yes_no",
# ]

In [11]:
# for layer in range(0, 10):
for layer in range(0, 25):
# for layer in range(24, 25):
    
    print("layer:", layer)
    file_names, prompt_names = [], []

    for _, row in df.iterrows():
        if row['name'] in use_pattern:
            file_names.append(f"{task}/{model}/{module}/{row['name']}/hidden_represenations_layer{layer}_avg.hdf5",)
            prompt_names.append(row['name'])

    # load hidden representations from hdf5 file
    representations = None
    classes = []
    n_sequences = 0

    for idx, file_name in enumerate(file_names):
        hidden_representations = load_hidden_representations_from_hdf5(os.path.join(log_dir, file_name))
        # print(hidden_representations.shape)
        n_sequences = hidden_representations.shape[0]

        if representations is None:
            representations = hidden_representations
        else:
            representations = np.concatenate((representations, hidden_representations), axis=0)

        classes += n_sequences * [idx] # assign representations to classes
    
    classes = np.asarray(classes)

    # shuffle representations and classes
    X, y = unison_shuffled_copies(representations, classes)
    print(X.shape, y.shape)

    # train linear classifier
    # multi_class='multinomial' uses a CE loss
    print('classifying between:', prompt_names)
    clf = LogisticRegression(random_state=0, max_iter=2000, multi_class='multinomial').fit(X, y)
    
    print(f'layer={layer}; accuracy on training data: ', clf.score(X, y))
    print('\n')



layer: 0


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3450.48it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4952.68it/s]


(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']
layer=0; accuracy on training data:  1.0


layer: 1


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4186.45it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4269.63it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=1; accuracy on training data:  1.0


layer: 2


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 4197.68it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3901.09it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=2; accuracy on training data:  1.0


layer: 3


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3852.28it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3959.49it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=3; accuracy on training data:  1.0


layer: 4


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3885.29it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3672.66it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=4; accuracy on training data:  1.0


layer: 5


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3865.85it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3958.89it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=5; accuracy on training data:  1.0


layer: 6


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3873.05it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3996.48it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=6; accuracy on training data:  1.0


layer: 7


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3700.14it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3946.05it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=7; accuracy on training data:  1.0


layer: 8


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3722.24it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3716.24it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=8; accuracy on training data:  1.0


layer: 9


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3840.25it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3858.42it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=9; accuracy on training data:  1.0


layer: 10


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3857.59it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3820.26it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=10; accuracy on training data:  1.0


layer: 11


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3530.35it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3759.90it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=11; accuracy on training data:  1.0


layer: 12


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3748.80it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3839.31it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=12; accuracy on training data:  1.0


layer: 13


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2062.77it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 2961.48it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


layer=13; accuracy on training data:  1.0


layer: 14


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3805.59it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3743.24it/s]


(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']
layer=14; accuracy on training data:  1.0


layer: 15


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3850.51it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3736.16it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=15; accuracy on training data:  1.0


layer: 16


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3594.53it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3695.36it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=16; accuracy on training data:  1.0


layer: 17


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3765.81it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3754.31it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=17; accuracy on training data:  1.0


layer: 18


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3506.00it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3536.78it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=18; accuracy on training data:  1.0


layer: 19


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3553.74it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3479.82it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=19; accuracy on training data:  1.0


layer: 20


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3430.62it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3391.83it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=20; accuracy on training data:  1.0


layer: 21


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3507.20it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3666.41it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=21; accuracy on training data:  1.0


layer: 22


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3512.98it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3599.71it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=22; accuracy on training data:  1.0


layer: 23


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3483.12it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3542.81it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']





layer=23; accuracy on training data:  1.0


layer: 24


Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3515.81it/s]
Reading embeddings: 100%|██████████| 56/56 [00:00<00:00, 3413.87it/s]

(112, 2048) (112,)
classifying between: ['gpt_3_true_false_neither', 'gpt_3_yes_no_maybe']
layer=24; accuracy on training data:  0.9821428571428571





