In [2]:
import numpy as np
import os.path

import torch.nn as nn
import torch
import torch.nn.functional as F
import sys
import os

from tqdm import tqdm
from collections import namedtuple
from transformers import AutoModel, AutoTokenizer

In [90]:
RESULTS_DIR = "results/distilbert-base-cased(5)_pad10_2020-07-05-12-41/" 
pmis = np.load(RESULTS_DIR + 'pmi_matrices.npz')
loglik_npz = np.load(RESULTS_DIR + 'pseudo_logliks.npz')

sentences = list(pmis.keys())
matrices = list(pmis.values())
logliks = list(loglik_npz.values())

In [91]:
type(pmis)

numpy.lib.npyio.NpzFile

In [94]:
pmis.files[0]

"0 We 're about to see if advertising works ."

In [89]:
def get_info(directory, key):
    ''' gets specified spec value from info.txt'''
    with open(directory+'info.txt','r') as f:
        for l in f:
            if l.split()[0]==key+':':
                return(l.split()[1])

get_info(RESULTS_DIR, 'model_spec')

'bert-large-cased'

In [2]:
from collections import namedtuple

class CONLLReader():
    def __init__(self, conll_cols, additional_field_name=None):
        if additional_field_name:
            conll_cols += [additional_field_name]
        self.conll_cols = conll_cols
        self.observation_class = namedtuple("Observation", conll_cols)
        self.additional_field_name = additional_field_name

    # Data input
    @staticmethod
    def generate_lines_for_sent(lines):
        '''Yields batches of lines describing a sentence in conllx.

        Args:
            lines: Each line of a conllx file.
        Yields:
            a list of lines describing a single sentence in conllx.
        '''
        buf = []
        for line in lines:
            if line.startswith('#'):
                continue
            if not line.strip():
                if buf:
                    yield buf
                    buf = []
                else:
                    continue
            else:
                buf.append(line.strip())
        if buf:
            yield buf

    def load_conll_dataset(self, filepath):
        '''Reads in a conllx file; generates Observation objects

        For each sentence in a conllx file, generates a single Observation
        object.

        Args:
            filepath: the filesystem path to the conll dataset
            observation_class: namedtuple for observations

        Returns:
        A list of Observations
        '''
        observations = []
        lines = (x for x in open(filepath))
        for buf in self.generate_lines_for_sent(lines):
            conllx_lines = []
            for line in buf:
                conllx_lines.append(line.strip().split('\t'))
            if self.additional_field_name:
                newfield = [None for x in range(len(conllx_lines))]
                observation = self.observation_class(
                    *zip(*conllx_lines), newfield)
            else:
                observation = self.observation_class(
                    *zip(*conllx_lines))
            observations.append(observation)
        return observations



In [3]:
import pandas as pd

CONLL_COLS = ['index',
                  'sentence',
                  'lemma_sentence',
                  'upos_sentence',
                  'xpos_sentence',
                  'morph',
                  'head_indices',
                  'governance_relations',
                  'secondary_relations',
                  'extra_info']

OBSERVATIONS = CONLLReader(CONLL_COLS).load_conll_dataset('ptb3-wsj-data/CUSTOM.conllx')
OBSERVATIONS2 = CONLLReader(CONLL_COLS).load_conll_dataset('ptb3-wsj-data/CUSTOM4.conllx')

pd.DataFrame(OBSERVATIONS).head(2)

Unnamed: 0,index,sentence,lemma_sentence,upos_sentence,xpos_sentence,morph,head_indices,governance_relations,secondary_relations,extra_info
0,"(1, 2, 3, 4, 5, 6, 7, 8, 9)","(We, 're, about, to, see, if, advertising, wor...","(_, _, _, _, _, _, _, _, _)","(PRON, AUX, ADP, PART, VERB, SCONJ, NOUN, VERB...","(PRP, VBP, IN, TO, VB, IN, NN, VBZ, .)","(_, _, _, _, _, _, _, _, _)","(3, 3, 0, 5, 3, 8, 8, 5, 3)","(nsubj, aux, root, aux, xcomp, mark, nsubj, ad...","(_, _, _, _, _, _, _, _, _)","(_, _, _, _, _, _, _, _, _)"
1,"(1, 2, 3)","(Odds, and, Ends)","(_, _, _)","(NOUN, CONJ, NOUN)","(NNS, CC, NNS)","(_, _, _)","(0, 1, 1)","(root, cc, conj)","(_, _, _)","(_, _, _)"


In [4]:
pd.DataFrame(OBSERVATIONS2).head(2)

Unnamed: 0,index,sentence,lemma_sentence,upos_sentence,xpos_sentence,morph,head_indices,governance_relations,secondary_relations,extra_info
0,"(1, 2, 3)","(Odds, and, Ends)","(_, _, _)","(NOUN, CONJ, NOUN)","(NNS, CC, NNS)","(_, _, _)","(0, 1, 1)","(root, cc, conj)","(_, _, _)","(_, _, _)"
1,"(1, 2, 3, 4, 5, 6, 7, 8)","(Not, his, autograph, ;, power-hitter, McGwire...","(_, _, _, _, _, _, _, _)","(ADV, PRON, NOUN, PUNCT, NOUN, NOUN, PART, PUNCT)","(RB, PRP$, NN, :, NN, NN, POS, .)","(_, _, _, _, _, _, _, _)","(3, 3, 0, 3, 6, 3, 6, 3)","(neg, poss, root, punct, nn, poss, possessive,...","(_, _, _, _, _, _, _, _)","(_, _, _, _, _, _, _, _)"


In [99]:
import torch
import torch.nn as nn

In [100]:
preds = torch.Tensor([[.8,.1,.1],[0.4,0,6]])
labels = torch.Tensor([0,2]).type(torch.long)
print(preds.view(-1,3))
labels

tensor([[0.8000, 0.1000, 0.1000],
        [0.4000, 0.0000, 6.0000]])


tensor([0, 2])

In [101]:
nn.CrossEntropyLoss()(preds.view(-1,3),labels.view(-1))

tensor(0.3479)

In [102]:
input = torch.randn(3, 5)
target = torch.empty(3, dtype=torch.long).random_(1)
print(input)
nn.CrossEntropyLoss()(input,target)

tensor([[-0.0933, -0.5557, -0.4178,  0.7510,  0.9444],
        [-0.9207, -0.9642,  1.1006,  0.1601, -0.6287],
        [-1.3193, -2.8888,  0.2294,  1.5730,  0.8444]])


tensor(2.7084)

In [49]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {DEVICE}')

SPEC = 'bert-base-uncased'

MODEL = AutoModel.from_pretrained(SPEC).to(DEVICE)
TOKENIZER = AutoTokenizer.from_pretrained(SPEC)
POS_VOCABSIZE = 30  # or whatever

ARGS = dict(
    device=DEVICE,
    hidden_dim=MODEL.config.hidden_size,
    pos_vocabsize=POS_VOCABSIZE,
    epochs=20,
    results_path="probe-results/",
    corpus=dict(root='ptb3-wsj-data/',
                train_path='ptb3-wsj-train.conllx',
                dev_path='ptb3-wsj-dev.conllx',
                test_path='ptb3-wsj-test.conllx'),
    conll_fieldnames=[  # Columns of CONLL file
        'index', 'sentence', 'lemma_sentence', 'upos_sentence',
        'xpos_sentence', 'morph', 'head_indices',
        'governance_relations', 'secondary_relations', 'extra_info']
    )


Using device: cpu


In [52]:
TOKENIZER.mask_token_id

103

## Inspecting observations datasets

In [193]:
def load_observations(args):
    '''
    Get pytorch Datasets for train, dev, test observations
    '''
    train_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['train_path'])
    dev_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['dev_path'])
    test_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['test_path'])
    reader = CONLLReader(args['conll_fieldnames'])
    train_obs = reader.load_conll_dataset(train_corpus_path)
    dev_obs = reader.load_conll_dataset(dev_corpus_path)
    test_obs = reader.load_conll_dataset(test_corpus_path)

    return train_obs, dev_obs, test_obs

In [194]:
train_obs, dev_obs, test_obs = load_observations(ARGS)

In [196]:
x = dev_obs[2]
def obs_df(x):
    return pd.DataFrame(
        (x.sentence,
        [TOKENIZER.tokenize(w) for w in x.sentence],
        [TOKENIZER.encode(w,add_special_tokens=False) for w in x.sentence],
        x.xpos_sentence,
        x.upos_sentence,))
obs_df(x)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,The,bill,intends,to,restrict,the,RTC,to,Treasury,borrowings,only,",",unless,the,agency,receives,specific,congressional,authorization,.
1,[the],[bill],[intends],[to],[restrict],[the],"[rt, ##c]",[to],[treasury],"[borrowing, ##s]",[only],"[,]",[unless],[the],[agency],[receives],[specific],[congressional],[authorization],[.]
2,[1996],[3021],[18754],[2000],[21573],[1996],"[19387, 2278]",[2000],[9837],"[23733, 2015]",[2069],[1010],[4983],[1996],[4034],[8267],[3563],[7740],[20104],[1012]
3,DT,NN,VBZ,TO,VB,DT,NNP,TO,NNP,NNS,RB,",",IN,DT,NN,VBZ,JJ,JJ,NN,.
4,DET,NOUN,VERB,PART,VERB,DET,PROPN,ADP,PROPN,NOUN,ADV,PUNCT,SCONJ,DET,NOUN,VERB,ADJ,ADJ,NOUN,PUNCT


In [218]:
import itertools
sents_UPOS_tags = [o.upos_sentence for o in itertools.chain(train_obs,dev_obs,test_obs)]
UPOS_tagset = set(itertools.chain(*sents_UPOS_tags))
print(f"There are {len(UPOS_tagset)} POS tags in the UPOS tagset: {sorted(UPOS_tagset)}")

There are 17 POS tags in the UPOS tagset: ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']


In [219]:
sents_XPOS_tags = [o.xpos_sentence for o in itertools.chain(train_obs,dev_obs,test_obs)]
XPOS_tagset = set(itertools.chain(*sents_XPOS_tags))
print(f"There are {len(XPOS_tagset)} POS tags in the UPOS tagset: {sorted(XPOS_tagset)}")

There are 45 POS tags in the UPOS tagset: ['#', '$', "''", ',', '-LRB-', '-RRB-', '.', ':', 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB', '``']


## Now, the loading of a Dataset instead.

In [215]:
class POSDataset(torch.utils.data.Dataset):
    """ PyTorch dataloader for POS from Observations.
    """
    def __init__(self, observations, tokenizer, observation_class, POS_set):
        '''        Args:
            observations: A list of Observations describing a dataset
            tokennizer: an instance of a transformers Tokenizer class
        '''
        self.observations = observations
        self.POS_set = POS_set
        self.tokenizer = tokenizer
        self.POS_to_id = {POS: i for i, POS in enumerate(self.POS_set)}
        self.observation_class = observation_class
        self.input_ids, self.pos_ids = self.get_input_ids_and_pos_ids()

    def sentences_to_idlists(self):
        '''Replaces strings in an Observation with lists of integer ids.

        Returns:
        A list of observations with nested integer-lists for sentence fields
        '''
        idlist_observations = []
        for obs in tqdm(self.observations, desc="[getting subtoken ids]"):
            idlist = tuple([self.subword_ids(item) for item in obs.sentence])
            idlist_observations.append(self.observation_class(obs[0], idlist, *obs[2:]))
        return idlist_observations

    def subword_ids(self, item):
        '''Gets a list of subword ids for an item (word).'''
        return self.tokenizer.encode(item, add_special_tokens=False)

    def get_input_ids_and_pos_ids(self):
        idlist_observations = self.sentences_to_idlists()
        subtoken_id_lists = [obs.sentence for obs in idlist_observations]
        pos_label_lists = [obs.xpos_sentence for obs in idlist_observations]
        input_ids, pos_ids = self.repeat_POS_to_match(
            subtoken_id_lists, pos_label_lists)
        return input_ids, pos_ids

    def repeat_POS_to_match(self, list_id, list_POS):
        assert len(list_POS) == len(list_id), "list lengths don't match"
        new_id = []
        new_POS = []
        for i, el_id in enumerate(list_id):
            newlist_id = []
            newlist_POS = []
            for j, elel_id in enumerate(el_id):
                for token_id in elel_id:
                    newlist_id.append(token_id)
                    newlist_POS.append(self.POS_to_id[list_POS[i][j]])
            new_id.append(newlist_id)
            new_POS.append(newlist_POS)
        return new_id, new_POS

    def __len__(self):
        return len(self.observations)

    def __getitem__(self, index):
        return self.input_ids[index], self.pos_ids[index]

def load_datasets(args, tokenizer):
    '''
    Get pytorch Datasets for train, dev, test observations
    '''
    train_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['train_path'])
    dev_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['dev_path'])
    test_corpus_path = os.path.join(
        args['corpus']['root'],
        args['corpus']['test_path'])
    reader = CONLLReader(args['conll_fieldnames'])
#     train_obs = reader.load_conll_dataset(train_corpus_path)
    dev_obs = reader.load_conll_dataset(dev_corpus_path)
#     test_obs = reader.load_conll_dataset(test_corpus_path)

    obs_class = reader.observation_class
#     train_dataset = POSDataset(train_obs, tokenizer, obs_class)
    dev_dataset = POSDataset(dev_obs, tokenizer, obs_class, POS_set)
#     test_dataset = POSDataset(test_obs, tokenizer, obs_class)

    return dev_dataset #train_dataset, dev_dataset, test_dataset


In [216]:
# train_dataset, dev_dataset, test_dataset = load_datasets(ARGS, TOKENIZER)
dev_dataset = load_datasets(ARGS, TOKENIZER)

[getting subtoken ids]: 100%|██████████| 1700/1700 [00:02<00:00, 720.13it/s]


In [217]:
# checking:
index = 101
print(obs_df(dev_obs[index]))
pd.DataFrame(zip(
    [TOKENIZER.convert_ids_to_tokens([i])[0] for i in dev_dataset[index][0]],
    [id_to_POS[i] for i in dev_dataset[index][1]]))

              0       1             2       3       4         5        6  \
0            ``       I            'm     for     the    Giants    today   
1        [`, `]     [i]        [', m]   [for]   [the]  [giants]  [today]   
2  [1036, 1036]  [1045]  [1005, 1049]  [2005]  [1996]    [7230]   [2651]   
3            ``     PRP           VBP      IN      DT       NNP       NN   
4         PUNCT    PRON          VERB     ADP     DET     PROPN     NOUN   

        7       8       9         10      11      12           13      14  
0       ,     but    only    because    they    lost    yesterday       .  
1     [,]   [but]  [only]  [because]  [they]  [lost]  [yesterday]     [.]  
2  [1010]  [2021]  [2069]     [2138]  [2027]  [2439]       [7483]  [1012]  
3       ,      CC      RB         IN     PRP     VBD           NN       .  
4   PUNCT    CONJ     ADV      SCONJ    PRON    VERB         NOUN   PUNCT  


Unnamed: 0,0,1
0,`,``
1,`,``
2,i,PRP
3,',VBP
4,m,VBP
5,for,IN
6,the,DT
7,giants,NNP
8,today,NN
9,",",","


In [66]:
POS_set = ['#', '$', "''", ',', '-LRB-', '-RRB-', '.', ':',
                   'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR',
                   'JJS', 'LS', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT',
                   'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM',
                   'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ',
                   'WDT', 'WP', 'WP$', 'WRB', '``']

In [71]:
POS_set.index('``')

44

In [170]:
POS_to_id = {k: v for v, k in enumerate(POS_set)}
id_to_POS = {v: k for k, v in POS_to_id.items()}

In [183]:
subtoken_idlists  = [([1], [100,101], [16]),
            ([50], [17])]

POSlists = [("DT", "NN", "VBZ"),
            ("NNP", "VB")]

In [184]:
def repeat_POS_to_match(list_id, list_POS):
    assert len(list_POS) == len(list_id), "list lengths don't match"
    new_id = []
    new_POS = []
    for i, el_id in enumerate(list_id):
        newlist_id = []
        newlist_POS = []
        for j, elel_id in enumerate(el_id):
            for token_id in elel_id:
                newlist_id.append(token_id)
                newlist_POS.append(POS_to_id[list_POS[i][j]])
        new_id.append(newlist_id)
        new_POS.append(newlist_POS)
    print(new_id)
    print(new_POS)

repeat_POS_to_match(subtoken_idlists,POSlists)

[[1, 100, 101, 16], [50, 17]]
[[10, 19, 19, 39], [20, 34]]


In [156]:
def repeat_POS_to_match2(POSlists, subtoken_id_lists):
    l = [[[POS_to_id[POSlists[i][j]] for _ in idlist] 
          for j, idlist in enumerate(sent)]
         for i, sent in enumerate(subtoken_id_lists)]
    return l

repeat_POS_to_match2(POSlists,subtoken_idlists)

[[[10], [19, 19], [39]], [[20], [34]]]

In [226]:
print(TOKENIZER.convert_ids_to_tokens([2057, 10238,  2025,  9780,  2060,  9017,  2057,  1036,  2012,  2748]))
print(TOKENIZER.convert_ids_to_tokens([1005,  1998,  2010,  1024, 20138,  1998,  1005,  1036,  4162,  1998]))

print([id_to_POS[x] for x in [25, 22, 27, 20, 14, 22, 25, 44, 13, 22]])
print([id_to_POS[x] for x in [38,  8, 26,  7, 19,  8, 38, 44, 20,  8]])

['we', 'odds', 'not', 'frankfurt', 'other', 'bits', 'we', '`', 'at', 'yes']
["'", 'and', 'his', ':', 'broker', 'and', "'", '`', 'applied', 'and']
['PRP', 'NNS', 'RB', 'NNP', 'JJ', 'NNS', 'PRP', '``', 'IN', 'NNS']
['VBP', 'CC', 'PRP$', ':', 'NN', 'CC', 'VBP', '``', 'NNP', 'CC']


In [239]:
x = [[([ 2057, 10238,  2025,  9780,  2060,  9017,  2057,  1036,  2012,  2748]), ([ 1005,  1998,  2010,  1024, 20138,  1998,  1005,  1036,  4162,  1998])], [([25, 22, 27, 20, 14, 22, 25, 44, 13, 22]), ([38,  8, 26,  7, 19,  8, 38, 44, 20,  8])], ([10,  3, 14,  2, 20,  3, 10, 22, 13,  3])]
len(x)

3

In [263]:
inputs = TOKENIZER.encode("Hello, my dog is cute", return_tensors="pt")
inputs = torch.stack((inputs,inputs), dim=1).view(2,-1)
print('inputs:',inputs, inputs.shape)

outputs = MODEL(inputs)
last_hidden_states = outputs[0] 
[x.size() for x in outputs]

inputs: tensor([[  101,  7592,  1010,  2026,  3899,  2003, 10140,   102],
        [  101,  7592,  1010,  2026,  3899,  2003, 10140,   102]]) torch.Size([2, 8])


[torch.Size([2, 8, 768]), torch.Size([2, 768])]

In [273]:
(torch.empty((5,10))==0).type(torch.float)

tensor([[1., 0., 1., 0., 0., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.]])

In [285]:
TOKENIZER.encode("a compositional phrase.")
POS_to_id["."]

6

We should do it like:
```
input tokens = ['[CLS]',  'a', 'composition', '##al', 'phrase',  '.', '[SEP]', PADDING, ..., PADDING ]
POS labels   = [    pad, 'DT',          'JJ',    'JJ,     'NN',  '.',     pad,     pad, ...,     pad ]
```
which is input like
```
input_ids    = [    101, 1037,          5512,   2389,     7655, 1012,     102,      -1, ...       -1 ]
POS_ids      = [     -1,   10,            14,     14,       19,    6,      -1,      -1, ...,      -1 ]
```            

In [288]:
MODEL.pretrained_model_archive_map

{'bert-base-uncased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin',
 'bert-large-uncased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin',
 'bert-base-cased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin',
 'bert-large-cased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-pytorch_model.bin',
 'bert-base-multilingual-uncased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-pytorch_model.bin',
 'bert-base-multilingual-cased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-pytorch_model.bin',
 'bert-base-chinese': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-pytorch_model.bin',
 'bert-base-german-cased': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-pytorch_model.bin',
 'bert-large-uncased-whole-word-masking': 

In [312]:
from datetime import datetime
now = datetime.now()
now.strftime("%y.%m.%d-%H.%M")

'20.07.10-08.13'

In [316]:
import time
NOW = time


'2020-07-10 08:16:23'

In [331]:
NOW.strftime("%Y%m%d-%H%M%S")

'20200710-081716'

In [345]:
t = torch.Tensor(
    [[[-5,1,0,2,1,4,1],
      [-6,1,0,2,1,2,1],
      [-5,1,0,2,1,3,1],
      [-5,1,0,2,1,3,1],
      [-6,1,0,2,1,2,1],
      [-5,1,0,2,1,4,1]],
     
     [[-5,1,0,2,1,2,1],
      [-6,1,0,2,1,2,1],
      [-5,1,0,2,1,5,1],
      [-5,1,0,2,1,1,1],
      [-6,1,0,2,1,2,1],
      [-5,1,0,2,1,1,1]]])
print(t.size())
torch.max(t,-1)

torch.Size([2, 6, 7])


torch.return_types.max(
values=tensor([[4., 2., 3., 3., 2., 4.],
        [2., 2., 5., 2., 2., 2.]]),
indices=tensor([[5, 5, 5, 5, 5, 5],
        [5, 5, 5, 3, 5, 3]]))

In [348]:
t.argmax(-1)

tensor([[5, 5, 5, 5, 5, 5],
        [5, 5, 5, 3, 5, 3]])

In [355]:
sum(t.view(-1).eq(1))

tensor(38)

In [369]:
float(sum(t.view(-1)==t.view(-1)/2)) / 11

1.0909090909090908

In [381]:
torch.sum(t != 0).float()

tensor(72.)

In [379]:
torch.sum(t.ne(0)).float()

tensor(72.)

In [384]:
t.device

device(type='cpu')

In [392]:
t.view(-1,12).argmax(-1).device

device(type='cpu')

In [399]:
(t == t/2).sum().float() / t.eq(t).sum()

tensor(0.1429)

In [456]:
ARGS = dict(
    argument='value',
    another_arg=2,
    subdict=dict(
        arg1='value',
        arg2='moope',
        e2=2/3
    ),
    listarg=['this', 'is', 'listed'],
    device=t.device
)

def pretty_dict(d, indent=0): 
    for key, value in d.items():
        print('    '*indent + key, end=': ')
        if isinstance(value, dict):
            print()
            pretty_dict(value, indent+1)
        else:
            print(repr(value))
        
pretty_dict(ARGS)

argument: 'value'
another_arg: 2
subdict: 
    arg1: 'value'
    arg2: 'moope'
    e2: 0.6666666666666666
listarg: ['this', 'is', 'listed']
device: device(type='cpu')


In [475]:
from time import sleep
from tqdm.notebook import tqdm, trange
from tqdm.utils import _term_move_up


for ii in tqdm(range(5),position=2):
    tqdm.write(f'Hello {ii}',nolock=True)
    sleep(0.2)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

Hello 0
Hello 1
Hello 2
Hello 3
Hello 4



In [48]:
size = 3
if np.random.randint(0, 2):
    print("yes")
    print(np.random.randint(0, 2,size=size))
else:
    print("no")

yes
[0 0 0]
