In [1]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))


True
0
<torch.cuda.device object at 0x7fb221824f70>
1
NVIDIA GeForce RTX 3070


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fb221b58f50>

In [3]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
inputs

[tensor([[-0.5525,  0.6355, -0.3968]]),
 tensor([[-0.6571, -1.6428,  0.9803]]),
 tensor([[-0.0421, -0.8206,  0.3133]]),
 tensor([[-1.1352,  0.3773, -0.2824]]),
 tensor([[-2.5667, -1.4303,  0.5009]])]

In [4]:
# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))

hidden

(tensor([[[ 0.5438, -0.4057,  1.1341]]]),
 tensor([[[-1.1115,  0.3501, -0.7703]]]))

In [5]:
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    view = i.view(1, 1, -1)
    print(view)
    print("-"*20)
    out, hidden = lstm(view, hidden)
    print(out)
    print("-"*20)
    print(hidden)
    print("="*30)

tensor([[[-0.5525,  0.6355, -0.3968]]])
--------------------
tensor([[[-0.2682,  0.0304, -0.1526]]], grad_fn=<StackBackward0>)
--------------------
(tensor([[[-0.2682,  0.0304, -0.1526]]], grad_fn=<StackBackward0>), tensor([[[-1.0766,  0.0972, -0.5498]]], grad_fn=<StackBackward0>))
tensor([[[-0.6571, -1.6428,  0.9803]]])
--------------------
tensor([[[-0.5370,  0.0346, -0.1958]]], grad_fn=<StackBackward0>)
--------------------
(tensor([[[-0.5370,  0.0346, -0.1958]]], grad_fn=<StackBackward0>), tensor([[[-1.1552,  0.1214, -0.2974]]], grad_fn=<StackBackward0>))
tensor([[[-0.0421, -0.8206,  0.3133]]])
--------------------
tensor([[[-0.3947,  0.0391, -0.1217]]], grad_fn=<StackBackward0>)
--------------------
(tensor([[[-0.3947,  0.0391, -0.1217]]], grad_fn=<StackBackward0>), tensor([[[-1.0727,  0.1104, -0.2179]]], grad_fn=<StackBackward0>))
tensor([[[-1.1352,  0.3773, -0.2824]]])
--------------------
tensor([[[-0.1854,  0.0740, -0.0979]]], grad_fn=<StackBackward0>)
--------------------
(te

In [6]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state

In [7]:
print(inputs)
print(hidden)

tensor([[[-0.1473,  0.3482,  1.1371]],

        [[-0.3339, -1.4724,  0.7296]],

        [[-0.1312, -0.6368,  1.0429]],

        [[ 0.4903,  1.0318, -0.5989]],

        [[ 1.6015, -1.0735, -1.2173]]])
(tensor([[[ 0.6472, -0.0412, -0.1775]]]), tensor([[[-0.5000,  0.8673, -0.2732]]]))


In [8]:
import torch
import pandas as pd
from collections import Counter

class Dataset(torch.utils.data.Dataset):
    def __init__(
        self,
        args,
    ):
        self.args = args
        self.words = self.load_words()
        self.uniq_words = self.get_uniq_words()

        self.index_to_word = {index: word for index, word in enumerate(self.uniq_words)}
        self.word_to_index = {word: index for index, word in enumerate(self.uniq_words)}

        self.words_indexes = [self.word_to_index[w] for w in self.words]

    def load_words(self):
        train_df = pd.read_csv('../data/reddit-cleanjokes.csv')
        text = train_df['Joke'].str.cat(sep=' ')
        return text.split(' ')

    def get_uniq_words(self):
        word_counts = Counter(self.words)
        return sorted(word_counts, key=word_counts.get, reverse=True)

    def __len__(self):
        return len(self.words_indexes) - self.args["sequence_length"]

    def __getitem__(self, index):
        return (
            torch.tensor(self.words_indexes[index:index+self.args["sequence_length"]]),
            torch.tensor(self.words_indexes[index+1:index+self.args["sequence_length"]+1]),
        )


In [9]:
dataset = Dataset(args={
    "max_epochs": 10,
    "batch_size": 256,
    "sequence_length": 4
})
print(dataset.__getitem__(0))

(tensor([  2,   8,   0, 248]), tensor([  8,   0, 248,  20]))


In [40]:
import torch
import numpy as np

scalar = torch.tensor(np.random.uniform(-10, 10))
print(scalar)
tanh = torch.nn.Tanh()
t_scaled = tanh(scalar)
print(t_scaled)

sigmoid = torch.nn.Sigmoid()
s_scaled = sigmoid(t_scaled)
print(s_scaled)

tensor(5.4159)
tensor(1.0000)
tensor(0.7311)


In [1]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import torch
from transformers import BertConfig, BertModel, BertTokenizer
import numpy as np

model_name = "bert-base-cased"
device = "cuda:0" if torch.cuda.is_available() else "cpu"

config = BertConfig.from_pretrained(model_name)
bert_model = BertModel.from_pretrained(model_name, config=config)
bert_tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True)
bert_model = bert_model.eval()

[nltk_data] Downloading package punkt to /home/jorgenv/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [21]:
with torch.no_grad():
    headline = "BREAKING: Musharraf to be impeached."
    tokens = bert_tokenizer(headline, return_tensors='pt')
    bert_model = bert_model.to(device)
    output = bert_model(tokens.input_ids.to(device))
    #print(output.last_hidden_state)
    print(output.pooler_output.size())
    latent_matrix = output.last_hidden_state[0]
    print(latent_matrix.size())
    mean_vector = torch.mean(latent_matrix, 0).to('cpu').detach().numpy()
    mean_vector = mean_vector.reshape((1,-1))
    print(mean_vector.shape)

torch.Size([1, 768])
torch.Size([14, 768])
(1, 768)


In [8]:
with torch.no_grad():
    headline = "BREAKING: Musharraf to be impeached."
    tokens = bert_tokenizer(headline, return_tensors='pt')
    bert_model = bert_model.to(device)
    latent_matrix = output.last_hidden_state.to('cpu').detach().numpy()[0]
    mean_vector = np.mean(latent_matrix, axis=0).reshape((1, latent_matrix.shape[1]))
    print(mean_vector.shape)

(1, 768)


In [2]:
import torch

In [3]:
w = torch.rand((3, 2, 4))
w

tensor([[[0.2902, 0.3407, 0.9046, 0.0943],
         [0.5204, 0.8343, 0.9265, 0.4997]],

        [[0.0731, 0.9441, 0.8162, 0.1994],
         [0.1855, 0.2908, 0.5922, 0.6943]],

        [[0.4437, 0.5076, 0.0608, 0.3106],
         [0.8169, 0.3862, 0.4164, 0.3761]]])

In [4]:
b = torch.rand((3, 2, 1))
b

tensor([[[0.4222],
         [0.0664]],

        [[0.6650],
         [0.9036]],

        [[0.1224],
         [0.5757]]])

In [5]:
torch.mul(b, w)

tensor([[[0.1225, 0.1438, 0.3819, 0.0398],
         [0.0346, 0.0554, 0.0615, 0.0332]],

        [[0.0486, 0.6278, 0.5428, 0.1326],
         [0.1677, 0.2628, 0.5351, 0.6273]],

        [[0.0543, 0.0621, 0.0074, 0.0380],
         [0.4703, 0.2223, 0.2397, 0.2165]]])

In [7]:
import numpy as np

np.multiply(0.4222, [0.2902, 0.3407, 0.9046, 0.0943])

array([0.12252244, 0.14384354, 0.38192212, 0.03981346])