In [9]:
!pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
!pip install transformers

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.9.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (2041.3 MB)
[K     |████████████████████████████████| 2041.3 MB 21 kB/s s eta 0:00:01    |██████▏                         | 395.3 MB 4.8 MB/s eta 0:05:46     |██████▎                         | 400.3 MB 4.8 MB/s eta 0:05:45     |█████████                       | 574.0 MB 4.9 MB/s eta 0:04:58     |█████████████▏                  | 842.4 MB 27.6 MB/s eta 0:00:44     |██████████████▏                 | 905.8 MB 25.5 MB/s eta 0:00:45     |████████████████▌               | 1049.8 MB 8.4 MB/s eta 0:01:58     |█████████████████████▋          | 1380.9 MB 19.1 MB/s eta 0:00:35     |██████████████████████▋         | 1442.6 MB 9.0 MB/s eta 0:01:07     |████████████████████████▍       | 1556.6 MB 19.9 MB/s eta 0:00:25
[?25hCollecting torchvision==0.10.0+cu111
  Downloading https://download.pytorch.org

In [1]:
import torch
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import torch.nn as nn
import pandas as pd

In [2]:
import argparse
import glob
import logging
import os
import random
import timeit
import pickle

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

import transformers
from transformers import (
    MODEL_FOR_QUESTION_ANSWERING_MAPPING,
    WEIGHTS_NAME,
    AdamW,
    get_linear_schedule_with_warmup,
    squad_convert_examples_to_features,
)

from transformers import BertForQuestionAnswering, BertConfig, BertTokenizer

from transformers.data.metrics.squad_metrics import (
    compute_predictions_log_probs,
    compute_predictions_logits,
    squad_evaluate,
)
from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor
from transformers.trainer_utils import is_main_process


# try:
#     from torch.utils.tensorboard import SummaryWriter
# except ImportError:
#     from tensorboardX import SummaryWriter


logger = logging.getLogger(__name__)

MODEL_CONFIG_CLASSES = list(MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys())
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)


model_name_or_path = 'models/bert/'
# cache_dir=cache_dir = 'models/cache'

max_seq_length = 384
doc_stride = 128
max_query_length = 10
threads = 12
n_gpu = 1

input_dir = "./data/squad"
output_dir = "./models/bert/"
model_type="bert"
# evaluate = True
train_file = "train-v2.0.json"
version_2_with_negative=True
per_gpu_eval_batch_size=16


n_best_size=20
max_answer_length=30
do_lower_case=True
verbose_logging=True
null_score_diff_threshold=0.0

global_attention = {}

In [3]:
%set_env CUDA_VISIBLE_DEVICES=0,1
%set_env CUDA_DEVICE_ORDER=PCI_BUS_ID

env: CUDA_VISIBLE_DEVICES=0,1
env: CUDA_DEVICE_ORDER=PCI_BUS_ID


In [4]:
!pwd

/rapids/notebooks/host


In [5]:
#pipeline related parameters

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 255)) # scaler 

data_dir='/rapids/notebooks/host/QA_attentions_pickled'
representation_dir='/rapids/notebooks/host/QA_attentions_pickled/representations'

class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x    
    
representation_model = torch.hub.load('facebookresearch/barlowtwins:main', 'resnet50')
representation_model.fc = Identity() # pass through values from second to last layer, bypassing linear classifier

device = "cuda:1"
cuda = torch.device('cuda:1')
representation_model.to(cuda)

Using cache found in /root/.cache/torch/hub/facebookresearch_barlowtwins_main


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
# pipeline methods

def plot_layer_heads(attention, num_layers=12, num_heads=12):
  for i in range(0,11):
    p = attention[i]
    fig, axis = plt.subplots(1,12, figsize=(20,5), facecolor='w', edgecolor='k')
    plt.title(f'layer {i}')
    head = 0
    for axs, ph in zip(axis.flatten(), p):
      heatmap = axs.imshow(ph, cmap='hot')
    
def scale_examples(examples):
    num_examples = len(examples)
    scaled_examples = np.empty(shape=(num_examples), dtype=np.ndarray)
    for i, example in enumerate(examples):
        new_example = np.empty(shape=(12,12), dtype=np.ndarray)
        for l, layer in enumerate(example): #12 layers
            new_layer = np.array([])
            for h, head in enumerate(layer): #12 heads
                flat_head_transformed = scaler.fit_transform(head)
                new_example[l,h] = flat_head_transformed.reshape(384,384)
        scaled_examples[i] = new_example
    return scaled_examples

# create (1, 3, 384, 384) shape expected by barlow twins model
def reshape_example(image):
    example_channel = np.expand_dims(image, axis=0)
    batch = np.append(example_channel, example_channel, axis=0)
    batch = np.append(batch, example_channel, axis=0)
    example_3channel = np.expand_dims(batch, axis=0)
    return example_3channel

def get_representations(attentions):
    num_attentions = len(attentions)
    barlow_representations = np.zeros((num_attentions), np.object)
    for i, example in enumerate(attentions):
        reshaped_example = np.zeros((12,12), np.object)
        for l, layer in enumerate(example):
            for h, head in enumerate(layer):
                reshaped_head = torch.from_numpy(reshape_example(head)).to(cuda)
                representation_head = representation_model(reshaped_head.float())
                reshaped_example[l][h] = representation_head.detach().cpu().numpy()
        barlow_representations[i] = reshaped_example

    return barlow_representations

def flatten_layer_heads(representations_tensor):
    print("flattening layers/heads ...")
    num_examples =  len(representations_tensor)
    flat_array = np.zeros((num_examples * 12 * 12), np.ndarray)
    i = 0
    for example in representations_tensor:
        for layer in example:
            for h, head in enumerate(layer):
                flat_array[i] = head[0]
                i += 1
    return flat_array





In [7]:
# batch_size = 100
# batch_num = 0
# representation_df = pd.DataFrame()
# representation_array = []
# for i in range(1,21):
#     start_time = time.time()
#     batch_num = i * batch_size
#     print(f"Loading attentions batch {batch_num}")
#     attentions = torch.load(os.path.join(data_dir, f"eval_attentions_{batch_num}.bin"))
#     print("Scaling attention values to 0-255 ...")
#     scaled_attentions =  scale_examples(attentions)
#     print("Processing to 2048 value representations through barlow_twins ...")
#     barlow_representations = get_representations(scaled_attentions)
#     print("Appending results to array/dataframe ...")
#     flat_representations = flatten_layer_heads(barlow_representations)
#     representation_array.append(flat_representations)
#     df = pd.DataFrame(flat_representations)
#     df = pd.DataFrame([pd.Series(x) for x in df[0]])
#     representation_df = representation_df.append(df, ignore_index=True)
#     print(f"--- eval to representation batch {batch_num} in  {(time.time() - start_time)} seconds ---")
    

In [8]:
# evaluation
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(seed)


def to_list(tensor):
    return tensor.detach().cpu().tolist()

def evaluate(args, model, tokenizer, prefix=""):
    dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True)

#     if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
#         os.makedirs(args.output_dir)

    eval_batch_size = 1 * max(1, n_gpu)

    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=eval_batch_size)

    # multi-gpu evaluate
    if n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(dataset))
    logger.info("  Batch size = %d", eval_batch_size)

    all_results = []
    all_attentions = []
    start_time = timeit.default_timer()
    attn_count = 0

    representation_df = pd.DataFrame()
    
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            if model_type in ["xlm", "roberta", "distilbert", "camembert", "bart", "longformer"]:
                del inputs["token_type_ids"]

            feature_indices = batch[3]

            outputs = model(**inputs)
            
        for i, feature_index in enumerate(feature_indices):
            eval_feature = features[feature_index.item()]
            unique_id = int(eval_feature.unique_id)

            start_logits = outputs.start_logits
            end_logits = outputs.end_logits
            attentions = get_layers(outputs.attentions)

            result = SquadResult(unique_id, start_logits, end_logits)
            all_results.append(result)

#             append_list_as_row('QA_bert_attentions.csv', attentions)
            all_attentions.append(attentions)

    
            attn_count += 1
            if attn_count % 500 == 0:
#                 representation_array = []
#                 for i, attentions in enumerate(all_attentions):
                logger.info("Scaling attention values to 0-255 ...")
                representations = scale_examples(all_attentions)
                logger.info("Processing to 2048 value representations through barlow_twins ...")
                representations = get_representations(representations)
                logger.info("Appending results to array/dataframe ...")
                representations = flatten_layer_heads(representations)
#                 representation_array.append(flat_representations)
                df = pd.DataFrame(representations)
                df = pd.DataFrame([pd.Series(x) for x in df[0]])
                representation_df = representation_df.append(df, ignore_index=True)
#                 logger.info("  Outputting Attention File %s eval_attentions %i", output_dir, attn_count)
#                 torch.save(all_attentions, "QA_attentions_pickled/eval_attentions_" +str(attn_count)+".bin")
#                 representation_df.to_csv(os.path.join(data_dir, f"representation_df_{attn_count}.csv"))
                print(f"--- eval to representation batch {attn_count} ---")          
                all_attentions = []
                representations = []
                df = pd.DataFrame()
            
            if attn_count % 5000 == 0:
                logger.info(f"  Outputting Attention File representation_df_{attn_count} to {representation_dir}")
                representation_df.to_csv(os.path.join(representation_dir, f"representation_df_{attn_count}.csv"))
                representation_df = pd.DataFrame()

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset))
#     global_attention = all_attentions
#     logger.info("  Outputting Attention File %s eval_attentions.bin", output_dir)
#     torch.save(all_attentions, os.path.join(output_dir, "eval_attentions.bin"))
    

    
#     with open(os.path.join(output_dir, "eval_attentions.pkl"), 'wb') as attention_file:
#       pickle.dump(all_attentions, attention_file)

    # Compute predictions
    output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
    output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))

    if version_2_with_negative:
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))
    else:
        output_null_log_odds_file = None

    # XLNet and XLM use a more complex post-processing procedure
    if args.model_type in ["xlnet", "xlm"]:
        start_n_top = model.config.start_n_top if hasattr(model, "config") else model.module.config.start_n_top
        end_n_top = model.config.end_n_top if hasattr(model, "config") else model.module.config.end_n_top

        predictions = compute_predictions_log_probs(
            examples,
            features,
            all_results,
            args.n_best_size,
            args.max_answer_length,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
            start_n_top,
            end_n_top,
            args.version_2_with_negative,
            tokenizer,
            args.verbose_logging,
        )
    else:
        predictions = compute_predictions_logits(
            examples,
            features,
            all_results,
            n_best_size,
            max_answer_length,
            do_lower_case,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
            verbose_logging,
            version_2_with_negative,
            null_score_diff_threshold,
            tokenizer,
        )

    # Compute the F1 and exact scores.
    results = squad_evaluate(examples, predictions)
    return results

    return {}

In [9]:
def get_layers(attention, num_layers=12, num_heads=12):
  layers = np.ndarray((num_heads,num_layers), np.ndarray)
  for i, layer in enumerate(attention):
    layer = layer.detach().cpu().numpy()[0]
    for j, head in enumerate(layer):
      layers[i,j] = head
  return layers

In [10]:

from csv import writer
def append_list_as_row(file_name, list_of_elem):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow(list_of_elem)

In [11]:


def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
    # Load data features from cache or dataset file
#     input_dir = data_dir else "."

    cached_features_file = os.path.join(
        input_dir,
        "cached_{}_{}_{}".format(
            "train",
            list(filter(None, model_name_or_path.split("/"))).pop(),
            str(max_seq_length),
        ),
    )

    logger.info("cached features file: %s", cached_features_file)
#     Init features and dataset from cache if it exists
    if os.path.exists(cached_features_file):
        logger.info("Loading features from cached file %s", cached_features_file)
        features_and_dataset = torch.load(cached_features_file)
        features, dataset, examples = (
            features_and_dataset["features"],
            features_and_dataset["dataset"],
            features_and_dataset["examples"],
        )
    else:
        logger.info("Creating features from dataset file at %s", input_dir)

        examples = processor.get_train_examples(input_dir, filename=train_file)

        logger.info("Got features from dataset file at %s", input_dir)

        features, dataset = squad_convert_examples_to_features(
            examples=examples,
            tokenizer=tokenizer,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride,
            max_query_length=max_query_length,
            is_training=False,
            return_dataset="pt",
            threads=threads,
        )

        logger.info("Saving features into cached file %s", cached_features_file)
        torch.save({"features": features, "dataset": dataset, "examples": examples}, cached_features_file)

    if output_examples:
        return dataset, examples, features
    return dataset


In [12]:

# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
logger.warning(
    "device: %s, n_gpu: %s",
    device,
    n_gpu
)
# Set the verbosity to info of the Transformers logger (on main process only):
# if is_main_process(args.local_rank):
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
# Set seed
set_seed(42)

# Load pretrained model and tokenizer
config = BertConfig.from_pretrained(model_name_or_path, output_attentions=True) # no config_path?
tokenizer = BertTokenizer.from_pretrained(
    model_name_or_path,
    do_lower_case=True,
    cache_dir=input_dir,
    use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
)

[INFO|configuration_utils.py:528] 2021-07-13 18:03:36,612 >> loading configuration file models/bert/config.json
[INFO|configuration_utils.py:566] 2021-07-13 18:03:36,613 >> Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_attentions": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.8.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

[INFO|tokenization_utils_base.py:1651] 2021-07-13 18:03:36,614 >> Didn't find file models/bert/added_tokens.json. We won't load it.
[INFO|tokenization_utils_ba

In [None]:

#model = AutoModelForQuestionAnswering.from_pretrained(model_name_or_path, output_attentions=True)
model = BertForQuestionAnswering.from_pretrained(model_name_or_path, output_attentions=True)
device = "cuda:1"
model = model.to(device)
# Evaluate
processor = SquadV2Processor() if version_2_with_negative else SquadV1Processor()
result = evaluate({'data_dir': ""}, model, tokenizer)

result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
results.update(result)

logger.info("Results: {}".format(results))

[INFO|configuration_utils.py:528] 2021-07-13 18:03:36,722 >> loading configuration file models/bert/config.json
[INFO|configuration_utils.py:566] 2021-07-13 18:03:36,723 >> Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_attentions": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.8.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

[INFO|modeling_utils.py:1159] 2021-07-13 18:03:36,724 >> loading weights file models/bert/pytorch_model.bin
[INFO|modeling_utils.py:1345] 2021-07-13 18:03:37,5

flattening layers/heads ...


Evaluating:   0%|          | 502/131944 [15:54<3971:44:21, 108.78s/it]

--- eval to representation batch 500 ---


Evaluating:   1%|          | 997/131944 [16:05<50:44, 43.01it/s]      07/13/2021 18:21:25 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   1%|          | 997/131944 [16:21<50:44, 43.01it/s]07/13/2021 18:22:17 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 18:36:31 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   1%|          | 1000/131944 [31:29<2291:36:20, 63.00s/it]

--- eval to representation batch 1000 ---


Evaluating:   1%|          | 1498/131944 [31:43<50:41, 42.89it/s]     07/13/2021 18:37:02 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   1%|          | 1498/131944 [32:01<50:41, 42.89it/s]07/13/2021 18:37:55 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 18:52:04 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   1%|          | 1503/131944 [47:01<1854:27:21, 51.18s/it]

--- eval to representation batch 1500 ---


Evaluating:   2%|▏         | 1998/131944 [47:15<50:41, 42.72it/s]     07/13/2021 18:52:35 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   2%|▏         | 1998/131944 [47:31<50:41, 42.72it/s]07/13/2021 18:53:28 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 19:07:37 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   2%|▏         | 2004/131944 [1:02:37<1717:02:00, 47.57s/it]

--- eval to representation batch 2000 ---


Evaluating:   2%|▏         | 2499/131944 [1:02:49<50:23, 42.81it/s]     07/13/2021 19:08:08 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   2%|▏         | 2499/131944 [1:03:01<50:23, 42.81it/s]07/13/2021 19:09:01 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 19:23:14 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   2%|▏         | 2503/131944 [1:18:12<1958:27:37, 54.47s/it]

--- eval to representation batch 2500 ---


Evaluating:   2%|▏         | 2998/131944 [1:18:23<50:22, 42.66it/s]     07/13/2021 19:23:43 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   2%|▏         | 2998/131944 [1:18:41<50:22, 42.66it/s]07/13/2021 19:24:35 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 19:38:47 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   2%|▏         | 3003/131944 [1:33:47<1844:22:11, 51.49s/it]

--- eval to representation batch 3000 ---


Evaluating:   3%|▎         | 3498/131944 [1:33:59<49:50, 42.95it/s]     07/13/2021 19:39:19 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   3%|▎         | 3498/131944 [1:34:11<49:50, 42.95it/s]07/13/2021 19:40:11 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 19:54:20 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   3%|▎         | 3500/131944 [1:49:18<2399:12:49, 67.24s/it]

--- eval to representation batch 3500 ---


Evaluating:   3%|▎         | 3999/131944 [1:49:29<50:18, 42.39it/s]     07/13/2021 19:54:49 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   3%|▎         | 3999/131944 [1:49:41<50:18, 42.39it/s]07/13/2021 19:55:41 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 20:09:54 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   3%|▎         | 4003/131944 [2:04:52<1935:13:12, 54.45s/it]

--- eval to representation batch 4000 ---


Evaluating:   3%|▎         | 4498/131944 [2:05:04<49:37, 42.81it/s]     07/13/2021 20:10:23 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   3%|▎         | 4498/131944 [2:05:21<49:37, 42.81it/s]07/13/2021 20:11:16 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 20:25:25 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   3%|▎         | 4504/131944 [2:20:26<1684:18:25, 47.58s/it]

--- eval to representation batch 4500 ---


Evaluating:   4%|▍         | 4999/131944 [2:20:37<49:02, 43.14it/s]     07/13/2021 20:25:57 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   4%|▍         | 4999/131944 [2:20:53<49:02, 43.14it/s]07/13/2021 20:26:49 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 20:40:53 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


07/13/2021 20:41:13 - INFO - __main__ -     Outputting Attention File representation_df_5000 to /rapids/notebooks/host/QA_attentions_pickled/representations


--- eval to representation batch 5000 ---


Evaluating:   4%|▍         | 5499/131944 [2:59:18<49:20, 42.72it/s]      07/13/2021 21:04:38 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   4%|▍         | 5499/131944 [2:59:34<49:20, 42.72it/s]07/13/2021 21:05:32 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 21:19:56 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   4%|▍         | 5503/131944 [3:14:57<1944:28:43, 55.36s/it]

--- eval to representation batch 5500 ---


Evaluating:   5%|▍         | 5998/131944 [3:15:08<49:01, 42.81it/s]     07/13/2021 21:20:28 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   5%|▍         | 5998/131944 [3:15:25<49:01, 42.81it/s]07/13/2021 21:21:21 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 21:35:48 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   5%|▍         | 6002/131944 [3:30:49<1991:34:05, 56.93s/it]

--- eval to representation batch 6000 ---


Evaluating:   5%|▍         | 6497/131944 [3:31:00<48:50, 42.81it/s]     07/13/2021 21:36:20 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   5%|▍         | 6497/131944 [3:31:15<48:50, 42.81it/s]07/13/2021 21:37:12 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 21:51:40 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   5%|▍         | 6503/131944 [3:46:37<1723:45:47, 49.47s/it]

--- eval to representation batch 6500 ---


Evaluating:   5%|▌         | 6998/131944 [3:46:49<48:43, 42.73it/s]     07/13/2021 21:52:09 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   5%|▌         | 6998/131944 [3:47:05<48:43, 42.73it/s]07/13/2021 21:53:02 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 22:07:28 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   5%|▌         | 7000/131944 [4:02:28<2386:08:30, 68.75s/it]

--- eval to representation batch 7000 ---


Evaluating:   6%|▌         | 7498/131944 [4:02:40<48:41, 42.60it/s]     07/13/2021 22:08:00 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   6%|▌         | 7498/131944 [4:02:56<48:41, 42.60it/s]07/13/2021 22:08:53 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 22:23:16 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   6%|▌         | 7504/131944 [4:18:17<1671:38:14, 48.36s/it]

--- eval to representation batch 7500 ---


Evaluating:   6%|▌         | 7999/131944 [4:18:29<48:16, 42.80it/s]     07/13/2021 22:23:48 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   6%|▌         | 7999/131944 [4:18:46<48:16, 42.80it/s]07/13/2021 22:24:41 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 22:39:06 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   6%|▌         | 8004/131944 [4:34:07<1758:18:40, 51.07s/it]

--- eval to representation batch 8000 ---


Evaluating:   6%|▋         | 8499/131944 [4:34:19<48:25, 42.48it/s]     07/13/2021 22:39:38 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   6%|▋         | 8499/131944 [4:34:36<48:25, 42.48it/s]07/13/2021 22:40:31 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 22:54:30 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   6%|▋         | 8500/131944 [4:49:28<2462:07:43, 71.80s/it]

--- eval to representation batch 8500 ---


Evaluating:   7%|▋         | 8998/131944 [4:49:40<47:31, 43.11it/s]     07/13/2021 22:54:59 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   7%|▋         | 8998/131944 [4:49:56<47:31, 43.11it/s]07/13/2021 22:55:51 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 23:09:43 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   7%|▋         | 9003/131944 [5:04:44<1720:44:05, 50.39s/it]

--- eval to representation batch 9000 ---


Evaluating:   7%|▋         | 9498/131944 [5:04:55<47:20, 43.11it/s]     07/13/2021 23:10:15 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   7%|▋         | 9498/131944 [5:05:07<47:20, 43.11it/s]07/13/2021 23:11:07 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 23:24:59 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   7%|▋         | 9502/131944 [5:19:57<1856:34:15, 54.59s/it]

--- eval to representation batch 9500 ---


Evaluating:   8%|▊         | 9997/131944 [5:20:09<48:20, 42.04it/s]     07/13/2021 23:25:28 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   8%|▊         | 9997/131944 [5:20:27<48:20, 42.04it/s]07/13/2021 23:26:20 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/13/2021 23:40:25 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


07/13/2021 23:40:45 - INFO - __main__ -     Outputting Attention File representation_df_10000 to /rapids/notebooks/host/QA_attentions_pickled/representations


--- eval to representation batch 10000 ---


Evaluating:   8%|▊         | 10499/131944 [5:58:37<47:35, 42.53it/s]      07/14/2021 00:03:56 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   8%|▊         | 10499/131944 [5:58:50<47:35, 42.53it/s]07/14/2021 00:04:49 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/14/2021 00:19:14 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   8%|▊         | 10504/131944 [6:14:15<1722:11:15, 51.05s/it]

--- eval to representation batch 10500 ---


Evaluating:   8%|▊         | 10999/131944 [6:14:26<47:06, 42.79it/s]     07/14/2021 00:19:46 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   8%|▊         | 10999/131944 [6:14:40<47:06, 42.79it/s]07/14/2021 00:20:38 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/14/2021 00:34:42 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   8%|▊         | 11002/131944 [6:29:42<1982:17:19, 59.01s/it]

--- eval to representation batch 11000 ---


Evaluating:   9%|▊         | 11497/131944 [6:29:54<46:35, 43.08it/s]     07/14/2021 00:35:13 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   9%|▊         | 11497/131944 [6:30:10<46:35, 43.08it/s]07/14/2021 00:36:05 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/14/2021 00:50:10 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   9%|▊         | 11504/131944 [6:45:10<1505:15:05, 44.99s/it]

--- eval to representation batch 11500 ---


Evaluating:   9%|▉         | 11999/131944 [6:45:22<46:18, 43.18it/s]     07/14/2021 00:50:41 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   9%|▉         | 11999/131944 [6:45:40<46:18, 43.18it/s]07/14/2021 00:51:33 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/14/2021 01:05:24 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   9%|▉         | 12003/131944 [7:00:22<1769:48:41, 53.12s/it]

--- eval to representation batch 12000 ---


Evaluating:   9%|▉         | 12498/131944 [7:00:33<46:00, 43.27it/s]     07/14/2021 01:05:53 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:   9%|▉         | 12498/131944 [7:00:50<46:00, 43.27it/s]07/14/2021 01:06:45 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
07/14/2021 01:20:36 - INFO - __main__ -   Appending results to array/dataframe ...


flattening layers/heads ...


Evaluating:   9%|▉         | 12502/131944 [7:15:36<1813:54:48, 54.67s/it]

--- eval to representation batch 12500 ---


Evaluating:  10%|▉         | 12997/131944 [7:15:48<45:47, 43.29it/s]     07/14/2021 01:21:08 - INFO - __main__ -   Scaling attention values to 0-255 ...
Evaluating:  10%|▉         | 12997/131944 [7:16:00<45:47, 43.29it/s]07/14/2021 01:21:59 - INFO - __main__ -   Processing to 2048 value representations through barlow_twins ...
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


In [29]:
qrr = [0.03609005,0.02730089,0.01784932,0.00970263,0.03209871,0.02975531,0.01899219,0.0159386,0.09006939,0.00353505,0.00430149,0.00373916,0.004033,0.0054037,0.00314571,0.00544514,0.00577388,0.00909712,0.00366947,0.00779078,0.0060854,0.00374152,0.00431431,0.00433876,0.00607211,0.00415887,0.00477201,0.00399395,0.00448466,0.00406087,0.00554479,0.00403477,0.00403658,0.00570019,0.00253348,0.00786278,0.00268425,0.00286821,0.00445756,0.00259511,0.01361431,0.00179666,0.00279525,0.00253426,0.00443048,0.00275498,0.00693157,0.00444886,0.00678853,0.00411692,0.0071019,0.00660669,0.00816385,0.00570161,0.00614765,0.00302077,0.00278247,0.00188732,0.00286246,0.00246164,0.00384467,0.00554711,0.00819303,0.00627691,0.00650617,0.00496393,0.00559988,0.00612233,0.01119899,0.00860447,0.00352704,0.0071039,0.00396436,0.00965003,0.00511713,0.00618074,0.00690801,0.00618426,0.0035491,0.00724508,0.00985016,0.00404688,0.00427996,0.00285223,0.00245299,0.00591965,0.00313316,0.00378624,0.00741692,0.00399197,0.00309289,0.00225511,0.00550424,0.00611447,0.00585844,0.00925419,0.00312216,0.00596423,0.00268668,0.0018134,0.00285068,0.00261073,0.00597714,0.00725985,0.00449176,0.01075981,0.00320032,0.00597627,0.00291104,0.00275812,0.00203771,0.00306792,0.00339498,0.00513527,0.00229073,0.00349129,0.00306995,0.00403183,0.00349238,0.00516681,0.00469688,0.00314611,0.00399399,0.00605243,0.00566731,0.00388196,0.00273817,0.00305755,0.00224174,0.00391221,0.00251756,0.00190409,0.00828339,0.00398148,0.00303265,0.00302295,0.00196971,0.00284096,0.00145757,0.00371954,0.00356667,0.00184108,0.0033674,0.00493111,0.00370861,0.00118721,0.00130349,0.00143106,0.00201871,0.00370352,0.00239526,0.00200266,0.00348562,0.00141907,0.00355505,0.00185585,0.00149471,0.002439,0.00457984,0.00260334,0.00734435,0.00277519,0.0014447,,0.00187251,0.00321797,0.00269519,0.00156433,0.0038524,0.00148884,0.00231959,0.00253542,0.00174283,0.00346243,0.01599906,
0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.,
 0.,0.,0.,0.,0.,0.        ]



SyntaxError: invalid syntax (<ipython-input-29-78f7129200a9>, line 1)