<a href="https://colab.research.google.com/github/sridhartroy/AIML/blob/main/LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Read a publicly available text file from a URL.

import urllib.request

url = ("https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt")
file_path = ("the-verdict.txt")

urllib.request.urlretrieve(url, file_path)

with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

print("Length of the file is : ", len(text))

print(text[:99])


Length of the file is :  20479
I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no 


In [None]:
#Split the text that was just read using reg expressions and print the length of the text before and after split

import re

preprocessed = re.split(r'([,.:;?_!"()\']|--|\s)', text)

preprocessed = [item.strip() for item in preprocessed if item.strip()]
print(len(preprocessed), len(text))

print(preprocessed[:30])


4690 20479
['I', 'HAD', 'always', 'thought', 'Jack', 'Gisburn', 'rather', 'a', 'cheap', 'genius', '--', 'though', 'a', 'good', 'fellow', 'enough', '--', 'so', 'it', 'was', 'no', 'great', 'surprise', 'to', 'me', 'to', 'hear', 'that', ',', 'in']


In [None]:
# In this step , we need to sort the tokenized text, remove dups, and assign an unique integer for each token.

all_words = sorted(set(preprocessed))
vocab_size = len(all_words)
print(vocab_size, type(all_words))

vocab = {token:integer for integer,token in enumerate(all_words)}

for i, item in enumerate(vocab.items()):
    print(item)
    if i >= 50:
        break


1130 <class 'list'>
('!', 0)
('"', 1)
("'", 2)
('(', 3)
(')', 4)
(',', 5)
('--', 6)
('.', 7)
(':', 8)
(';', 9)
('?', 10)
('A', 11)
('Ah', 12)
('Among', 13)
('And', 14)
('Are', 15)
('Arrt', 16)
('As', 17)
('At', 18)
('Be', 19)
('Begin', 20)
('Burlington', 21)
('But', 22)
('By', 23)
('Carlo', 24)
('Chicago', 25)
('Claude', 26)
('Come', 27)
('Croft', 28)
('Destroyed', 29)
('Devonshire', 30)
('Don', 31)
('Dubarry', 32)
('Emperors', 33)
('Florence', 34)
('For', 35)
('Gallery', 36)
('Gideon', 37)
('Gisburn', 38)
('Gisburns', 39)
('Grafton', 40)
('Greek', 41)
('Grindle', 42)
('Grindles', 43)
('HAD', 44)
('Had', 45)
('Hang', 46)
('Has', 47)
('He', 48)
('Her', 49)
('Hermia', 50)


In [None]:
# Tokenizer Class that takes in the vocab that we created. And also, we send a sample new text for tokenization and encoding to an unique integer id and then decode as well.
class SimpleTokenizerV1:
    def __init__(self, vocab):
        self.str_to_int = vocab # vocab is a dictionary and hence str_to_int is a dictionary as well
        self.int_to_str = {i:s for s,i in vocab.items()}
       # print(self.str_to_int)

    def encode(self, text): #new input text
        preprocessed = re.split(r'([,.?_!"()\']|--|\s)', text)
        preprocessed = [item.strip() for item in preprocessed if item.strip()]
        ids = [self.str_to_int[s] for s in preprocessed] # creating a list
        return ids

    def decode(self, ids):
        text = " ".join([self.int_to_str[i] for i in ids])

        text = re.sub(r'\s+([,.?!"()\'])', r'\1', text)
        return text

In [None]:
# use the above class by instantiating it with the vocabulary we created earlier from the verdict corpus. And then encode and decode

tokenizer = SimpleTokenizerV1(vocab)
text = """"It's the last he painted, you know,"
       Mrs. Gisburn said with pardonable pride."""
ids = tokenizer.encode(text)
print(len(ids), ids)

print(tokenizer.decode(ids))

text1 = """"Mrs. said pride."""
ids1 = tokenizer.encode(text1)
print(len(ids1), ids1)

print(tokenizer.decode(ids1))

21 [1, 56, 2, 850, 988, 602, 533, 746, 5, 1126, 596, 5, 1, 67, 7, 38, 851, 1108, 754, 793, 7]
" It' s the last he painted, you know," Mrs. Gisburn said with pardonable pride.
6 [1, 67, 7, 851, 793, 7]
" Mrs. said pride.


In [None]:
# what about words or tokens not in the corupus like below?

text2 = """"Mr. Sridhar said pride."""
ids2 = tokenizer.encode(text2)
print(len(ids2), ids2)

print(tokenizer.decode(ids2))

KeyError: 'Sridhar'

In [None]:
# Need to add some additional tokens for a. unknown b. end of source text

all_tokens = sorted(set(preprocessed))
print(len(all_tokens))
all_tokens.extend(["<|endoftext|>", "<|unk|>"])
print(len(all_tokens))

vocab = {token:integer for integer,token in enumerate(all_tokens)}
print(len(vocab))

for i, item in enumerate(list(vocab.items())[-5:]):
    print(item)

1130
1132
1132
('younger', 1127)
('your', 1128)
('yourself', 1129)
('<|endoftext|>', 1130)
('<|unk|>', 1131)


In [None]:
# Now need to modify the tokenizer custom class to include above

class SimpleTokenizerV2:
    def __init__(self, vocab):
        self.str_to_int = vocab # vocab is a dictionary and hence str_to_int is a dictionary as well
        self.int_to_str = {i:s for s,i in vocab.items()}
       # print(self.str_to_int)

    def encode(self, text): #new input text
        preprocessed = re.split(r'([,.?_!"()\']|--|\s)', text)
        preprocessed = [item.strip() for item in preprocessed if item.strip()]
        print("Preprocessed before token check : " , preprocessed)
        # now check for each token in the preprocessed against the vocab.
        preprocessed = [item if item in self.str_to_int
                             else "<|unk|>"
                        for item in preprocessed]

        ids = [self.str_to_int[s] for s in preprocessed] # creating a list
        return ids

    def decode(self, ids):
        text = " ".join([self.int_to_str[i] for i in ids])

        text = re.sub(r'\s+([,.?!"()\'])', r'\1', text)
        return text

In [None]:
# Let's test the new tokenizer class

# with existing valid text matching tokens in the vocab

tokenizer = SimpleTokenizerV2(vocab)
text = """"It's the last he painted, you know,"
       Mrs. Gisburn said with pardonable pride."""
ids = tokenizer.encode(text)
print(len(ids), ids)

text = tokenizer.decode(ids)
print(text)

# 2 unrelated texts mixed with unknown tokens

text1 = "the last he painted, Sridhar"
text2 = "Hello, do you like tea?"
text = " <|endoftext|> ".join((text1, text2))

print(text)
ids = tokenizer.encode(text)
print(len(ids), ids)

text = tokenizer.decode(ids)
print(text)

Preprocessed before token check :  ['"', 'It', "'", 's', 'the', 'last', 'he', 'painted', ',', 'you', 'know', ',', '"', 'Mrs', '.', 'Gisburn', 'said', 'with', 'pardonable', 'pride', '.']
21 [1, 56, 2, 850, 988, 602, 533, 746, 5, 1126, 596, 5, 1, 67, 7, 38, 851, 1108, 754, 793, 7]
" It' s the last he painted, you know," Mrs. Gisburn said with pardonable pride.
the last he painted, Sridhar <|endoftext|> Hello, do you like tea?
Preprocessed before token check :  ['the', 'last', 'he', 'painted', ',', 'Sridhar', '<|endoftext|>', 'Hello', ',', 'do', 'you', 'like', 'tea', '?']
14 [988, 602, 533, 746, 5, 1131, 1130, 1131, 5, 355, 1126, 628, 975, 10]
the last he painted, <|unk|> <|endoftext|> <|unk|>, do you like tea?


In [None]:
# using Byte Pair Encoding algorithm for Tokenization
!pip install tiktoken

from importlib.metadata import version
import tiktoken
print("tiktoken version:", version("tiktoken"))

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0
tiktoken version: 0.8.0


In [None]:
tokenizer = tiktoken.get_encoding("gpt2")


text1 = "the last he painted, Sridhar"
text2 = "Hello, do you like tea?"
text = " <|endoftext|> ".join((text1, text2))

integers = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
print(integers)

strings = tokenizer.decode(integers)
print(strings)

[1169, 938, 339, 13055, 11, 311, 6058, 9869, 220, 50256, 18435, 11, 466, 345, 588, 8887, 30]
the last he painted, Sridhar <|endoftext|> Hello, do you like tea?


In [None]:
from typing import TextIO
# Exercise 2.1 Byte pair encoding of unknown words
"""
Try the BPE tokenizer from the tiktoken library on the unknown words “Akwirw ier” and print the individual token IDs. Then, call the decode function on each of the resulting integers in this list to reproduce the mapping shown in figure 2.11. Lastly, call the decode method on the token IDs to check whether it can reconstruct the original input, “Akwirw ier.”
"""
tokenizerR50 = tiktoken.get_encoding("r50k_base")
tokenizerP50 = tiktoken.get_encoding("p50k_base")
tokenizerCl100k = tiktoken.get_encoding("cl100k_base")
tokenizero200k = tiktoken.get_encoding("o200k_base")

text = "Akwirw ier"

integers = tokenizerR50.encode(text, allowed_special={"<|endoftext|>"})
print("R50 ", integers, type(integers))

for i in integers:
    print(tokenizerR50.decode([i]), "-->", i)


print("---------------------------------")

print(tokenizerR50.decode(integers))


integers = tokenizerP50.encode(text, allowed_special={"<|endoftext|>"})
print("P50 ", integers, type(integers))

for i in integers:
    print(tokenizerP50.decode([i]), "-->", i)


print("---------------------------------")

print(tokenizerP50.decode(integers))

integers = tokenizerCl100k.encode(text, allowed_special={"<|endoftext|>"})
print("cl100k ", integers, type(integers))

for i in integers:
    print(tokenizerCl100k.decode([i]), "-->", i)


print("---------------------------------")

print(tokenizerCl100k.decode(integers))


integers = tokenizero200k.encode(text, allowed_special={"<|endoftext|>"})
print("o200k ", integers, type(integers))

for i in integers:
    print(tokenizero200k.decode([i]), "-->", i)


print("---------------------------------")

print(tokenizero200k.decode(integers))


R50  [33901, 86, 343, 86, 220, 959] <class 'list'>
Ak --> 33901
w --> 86
ir --> 343
w --> 86
  --> 220
ier --> 959
---------------------------------
Akwirw ier
P50  [33901, 86, 343, 86, 220, 959] <class 'list'>
Ak --> 33901
w --> 86
ir --> 343
w --> 86
  --> 220
ier --> 959
---------------------------------
Akwirw ier
cl100k  [32, 29700, 404, 86, 602, 261] <class 'list'>
A --> 32
kw --> 29700
ir --> 404
w --> 86
 i --> 602
er --> 261
---------------------------------
Akwirw ier
o200k  [32, 9500, 380, 86, 131455] <class 'list'>
A --> 32
kw --> 9500
ir --> 380
w --> 86
 ier --> 131455
---------------------------------
Akwirw ier


In [None]:
with open("the-verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

# print(raw_text)
enc_text = tokenizer.encode(raw_text)
#print(len(enc_text), type(enc_text))

# do a sampling for 50 tokens

enc_sample = enc_text[50:]
#print(enc_sample)
#print(tokenizer.decode(enc_sample))

context_size = 10
x = enc_sample[:context_size]
y = enc_sample[1:context_size+1]
print(f"x: {x}")
print(f"y:      {y}")

for i in range(1, context_size+1):
    context = enc_sample[:i]
    desired = enc_sample[i]
    print(context, "---->", desired)

for i in range(1, context_size+1):
    context = enc_sample[:i]
    desired = enc_sample[i]
    print(tokenizer.decode(context), "---->", tokenizer.decode([desired]))

x: [290, 4920, 2241, 287, 257, 4489, 64, 319, 262, 34686]
y:      [4920, 2241, 287, 257, 4489, 64, 319, 262, 34686, 41976]
[290] ----> 4920
[290, 4920] ----> 2241
[290, 4920, 2241] ----> 287
[290, 4920, 2241, 287] ----> 257
[290, 4920, 2241, 287, 257] ----> 4489
[290, 4920, 2241, 287, 257, 4489] ----> 64
[290, 4920, 2241, 287, 257, 4489, 64] ----> 319
[290, 4920, 2241, 287, 257, 4489, 64, 319] ----> 262
[290, 4920, 2241, 287, 257, 4489, 64, 319, 262] ----> 34686
[290, 4920, 2241, 287, 257, 4489, 64, 319, 262, 34686] ----> 41976
 and ---->  established
 and established ---->  himself
 and established himself ---->  in
 and established himself in ---->  a
 and established himself in a ---->  vill
 and established himself in a vill ----> a
 and established himself in a villa ---->  on
 and established himself in a villa on ---->  the
 and established himself in a villa on the ---->  Riv
 and established himself in a villa on the Riv ----> iera


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
pip install torch==2.4.0

Collecting torch==2.4.0
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.0)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.0)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.4.0)
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.4.0)
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-many

In [1]:
import torch
torch.__version__
torch.cuda.is_available()

True

In [2]:
import torch

tensor0d = torch.tensor(1)

tensor1d = torch.tensor([1.1, 2, 3])

tensor2d = torch.tensor([[1, 2,3],
                         [3, 4,5]])

tensor3d = torch.tensor([[[1, 2], [3, 4]],
                         [[5, 6], [7, 8]]])


print(tensor0d)
print(tensor1d)
print(tensor2d)
print(tensor3d)

print(tensor1d.dtype)

tensor0d = torch.tensor([1, 2, 3])
print(tensor0d.dtype)

tensor0df = tensor0d.to(torch.float32)
print(tensor0df.dtype)
print(tensor0d)
print(tensor0df)

tensor(1)
tensor([1.1000, 2.0000, 3.0000])
tensor([[1, 2, 3],
        [3, 4, 5]])
tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])
torch.float32
torch.int64
torch.float32
tensor([1, 2, 3])
tensor([1., 2., 3.])


In [3]:
print(tensor0d, tensor0d.shape)
print(tensor1d, tensor1d.shape)
print(tensor2d, tensor2d.shape)
print(tensor3d, tensor3d.shape)

print(tensor2d.reshape(3, 2))

print(tensor2d.view(3, 2))


print(tensor2d.T)

tensor([1, 2, 3]) torch.Size([3])
tensor([1.1000, 2.0000, 3.0000]) torch.Size([3])
tensor([[1, 2, 3],
        [3, 4, 5]]) torch.Size([2, 3])
tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]]) torch.Size([2, 2, 2])
tensor([[1, 2],
        [3, 3],
        [4, 5]])
tensor([[1, 2],
        [3, 3],
        [4, 5]])
tensor([[1, 3],
        [2, 4],
        [3, 5]])


In [4]:
print(tensor2d)
print("**")
print(tensor2d.T)
print("MatMul")
print(tensor2d.matmul(tensor2d.T))
print(tensor2d @ tensor2d.T)

tensor([[1, 2, 3],
        [3, 4, 5]])
**
tensor([[1, 3],
        [2, 4],
        [3, 5]])
MatMul
tensor([[14, 26],
        [26, 50]])
tensor([[14, 26],
        [26, 50]])


In [5]:
# Seeing models as computational graphs.
#  A logistic regression forward pass.

import torch.nn.functional as F

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2])
b = torch.tensor([0.0])
z = x1 * w1 + b
a = torch.sigmoid(z)
loss = F.binary_cross_entropy(a, y)

In [8]:
# computing the gradients via autograd function of torch

import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)

grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)

print(grad_L_w1)
print(grad_L_b)

print('******************************')

loss.backward()
print(w1.grad)
print(b.grad)

(tensor([-0.0898]),)
(tensor([-0.0817]),)
******************************
tensor([-0.0898])
tensor([-0.0817])


In [15]:
# Implement a multi-layer perceptron with 2 hidden layers
import torch.nn as M

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(

            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [18]:
# instantiate the above neural network

model = NeuralNetwork(50, 3)

print(model)

# No. of trainable parameters of this model.

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)

total_params = sum(p.numel() for p in model.parameters())
print("Total number of model parameters:", total_params)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)
Total number of trainable model parameters: 2213
Total number of model parameters: 2213


In [32]:
# Now let's print out the weight tensor

print("Weight Matrix")
print(model.layers[0].weight, model.layers[0].weight.shape, type(model.layers), type(model.layers[0]), (model.layers[0].weight.dtype))

print("Bias Vector")
print(model.layers[0].bias, model.layers[0].bias.shape, type(model.layers), type(model.layers[0]), (model.layers[0].bias.dtype))

Weight Matrix
Parameter containing:
tensor([[-0.1291,  0.0935,  0.0127,  ...,  0.0263, -0.1320, -0.0491],
        [-0.1210,  0.1351,  0.1339,  ...,  0.0152, -0.0989,  0.1147],
        [-0.0038,  0.1340,  0.0152,  ...,  0.1063,  0.0492,  0.0754],
        ...,
        [-0.1122,  0.0255,  0.0722,  ..., -0.0669, -0.0370, -0.0764],
        [-0.0826, -0.1013, -0.0703,  ..., -0.1244,  0.0179,  0.0663],
        [ 0.0561,  0.0303,  0.0830,  ...,  0.0114, -0.0295, -0.0990]],
       requires_grad=True) torch.Size([30, 50]) <class 'torch.nn.modules.container.Sequential'> <class 'torch.nn.modules.linear.Linear'> torch.float32
Bias Vector
Parameter containing:
tensor([ 0.0197,  0.0896, -0.0601,  0.0831,  0.1033, -0.0894,  0.0336,  0.0083,
         0.0850,  0.0245,  0.1095, -0.0851,  0.0746, -0.0323,  0.0803,  0.1118,
        -0.0947, -0.0363,  0.1379, -0.0381, -0.0640,  0.0015,  0.0638, -0.1206,
        -0.0283,  0.1148,  0.1035,  0.1004, -0.1034,  0.0665],
       requires_grad=True) torch.Size([30]

In [40]:
torch.manual_seed(123)
model = NeuralNetwork(50, 3)
print(model)
print(model.layers[0].weight.shape, model.layers[0].bias.shape)
print(model.layers[2].weight.shape, model.layers[2].bias.shape)
print(model.layers[4].weight.shape, model.layers[4].bias.shape)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)
torch.Size([30, 50]) torch.Size([30])
torch.Size([20, 30]) torch.Size([20])
torch.Size([3, 20]) torch.Size([3])


In [41]:
torch.manual_seed(123)
X = torch.rand((1, 50))
print(X)
out = model(X)
print(out)


print(model.layers[0].weight, model.layers[0].bias)
print(model.layers[2].weight, model.layers[2].bias)
print(model.layers[4].weight, model.layers[4].bias)

tensor([[0.2961, 0.5166, 0.2517, 0.6886, 0.0740, 0.8665, 0.1366, 0.1025, 0.1841,
         0.7264, 0.3153, 0.6871, 0.0756, 0.1966, 0.3164, 0.4017, 0.1186, 0.8274,
         0.3821, 0.6605, 0.8536, 0.5932, 0.6367, 0.9826, 0.2745, 0.6584, 0.2775,
         0.8573, 0.8993, 0.0390, 0.9268, 0.7388, 0.7179, 0.7058, 0.9156, 0.4340,
         0.0772, 0.3565, 0.1479, 0.5331, 0.4066, 0.2318, 0.4545, 0.9737, 0.4606,
         0.5159, 0.4220, 0.5786, 0.9455, 0.8057]])
tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)
Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=Tru