In [65]:
%matplotlib inline
!pip install tokenizers




`Learn the Basics <intro.html>`_ ||
**Quickstart** ||
`Tensors <tensorqs_tutorial.html>`_ ||
`Datasets & DataLoaders <data_tutorial.html>`_ ||
`Transforms <transforms_tutorial.html>`_ ||
`Build Model <buildmodel_tutorial.html>`_ ||
`Autograd <autogradqs_tutorial.html>`_ ||
`Optimization <optimization_tutorial.html>`_ ||
`Save & Load Model <saveloadrun_tutorial.html>`_

Quickstart
===================
This section runs through the API for common tasks in machine learning. Refer to the links in each section to dive deeper.

Working with data
-----------------
PyTorch has two `primitives to work with data <https://pytorch.org/docs/stable/data.html>`_:
``torch.utils.data.DataLoader`` and ``torch.utils.data.Dataset``.
``Dataset`` stores the samples and their corresponding labels, and ``DataLoader`` wraps an iterable around
the ``Dataset``.


In [66]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchtext.datasets import Multi30k
import copy

from tokenizers import Tokenizer
from tokenizers.models import BPE
from tokenizers.trainers import BpeTrainer
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.processors import TemplateProcessing

PyTorch offers domain-specific libraries such as `TorchText <https://pytorch.org/text/stable/index.html>`_,
`TorchVision <https://pytorch.org/vision/stable/index.html>`_, and `TorchAudio <https://pytorch.org/audio/stable/index.html>`_,
all of which include datasets. For this tutorial, we  will be using a TorchVision dataset.

The ``torchvision.datasets`` module contains ``Dataset`` objects for many real-world vision data like
CIFAR, COCO (`full list here <https://pytorch.org/vision/stable/datasets.html>`_). In this tutorial, we
use the FashionMNIST dataset. Every TorchVision ``Dataset`` includes two arguments: ``transform`` and
``target_transform`` to modify the samples and labels respectively.



In [67]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

We pass the ``Dataset`` as an argument to ``DataLoader``. This wraps an iterable over our dataset, and supports
automatic batching, sampling, shuffling and multiprocess data loading. Here we define a batch size of 64, i.e. each element
in the dataloader iterable will return a batch of 64 features and labels.



In [68]:
def get_q_k_v(X, W_q, W_k, W_v):
  Q = torch.matmul(X, W_q)
  K = torch.matmul(X, W_k)
  V = torch.matmul(X, W_v)

  return Q, K, V

Read more about `loading data in PyTorch <data_tutorial.html>`_.




--------------




Creating Models
------------------
To define a neural network in PyTorch, we create a class that inherits
from `nn.Module <https://pytorch.org/docs/stable/generated/torch.nn.Module.html>`_. We define the layers of the network
in the ``__init__`` function and specify how data will pass through the network in the ``forward`` function. To accelerate
operations in the neural network, we move it to the GPU if available.



In [69]:
class Transformer(nn.Module):
    def __init__(self, vocab_size, embedding_size, layer, N):
        super(Transformer, self).__init__()
        self.embedding = Embedding(vocab_size, embedding_size)
        self.encoder = Encoder(layer, N)
    def forward(self, x):
        print(self.embedding(x))
        return self.encoder(self.embedding(x))
        

In [70]:
class Embedding(nn.Module):
  def __init__(self, vocab_size, embedding_size, padding_idx = 1):
    super(Embedding, self).__init__()
    self.emb = nn.Embedding(vocab_size, embedding_size, padding_idx=1)

  def forward(self, x):
    return self.emb(x)

In [71]:
SRC_LANGUAGE = 'de'
TGT_LANGUAGE = 'en'

train_iter = Multi30k(split='train', language_pair=(SRC_LANGUAGE, TGT_LANGUAGE))

f = open("parallelcorpus.txt", "a")

for i in train_iter:
  for x in [x.rstrip("\n") for x in i]:
    f.write(x)
    f.write(' ')
  
f.close()


In [72]:
MAX_LEN = 64
VOCAB_SIZE = 32768
UNK_IDX, PAD_IDX, BOS_IDX, EOS_IDX = 0, 1, 2, 3

tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
trainer = BpeTrainer(vocab_size=VOCAB_SIZE, special_tokens=["[UNK]", "[PAD]", "[BOS]", "[EOS]"])
tokenizer.pre_tokenizer = Whitespace()
tokenizer.train(['parallelcorpus.txt'], trainer)

tokenizer.enable_padding(pad_id=1, length=MAX_LEN)
tokenizer.post_processor = TemplateProcessing(
    single="[BOS] $A [EOS]",
    special_tokens=[
        ("[BOS]", tokenizer.token_to_id("[BOS]")),
        ("[EOS]", tokenizer.token_to_id("[EOS]")),
    ],
)

def collate_fn(batch):
    src_batch, tgt_batch = [], []
    for src_sample, tgt_sample in batch:
        src_enc = tokenizer.encode(src_sample.rstrip("\n"))
        src_batch.append(torch.tensor(src_enc.ids))

        tgt_enc = tokenizer.encode(tgt_sample.rstrip("\n"))
        tgt_batch.append(torch.tensor(tgt_enc.ids))



    return torch.stack(src_batch), torch.stack(tgt_batch)

In [73]:
BATCH_SIZE = 16
train_iter = Multi30k(split='train', language_pair=(SRC_LANGUAGE, TGT_LANGUAGE))
train_dataloader = DataLoader(train_iter, batch_size=BATCH_SIZE, collate_fn=collate_fn)

In [74]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(512, 2048),
            nn.ReLU(),
            nn.Linear(2048, 512),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=2048, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2048, out_features=512, bias=True)
  )
)


In [75]:
class MultiHeadAttention(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MultiHeadAttention, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(input_dim, output_dim) for i in range(4)])
    
    def attention(query, key, value):
        return torch.matmul(torch.softmax(torch.matmul(query, key.T) / torch.sqrt(query.size(-1))), value)
        

    def forward(self, query, key, value):
        query = self.linears[0](query)
        key = self.linears[1](key)
        value = self.linears[2](value)
        Z = self.attention(query, key, value)

        return self.linears[-1](Z)


In [76]:
class FeedForward(nn.Module):
  def __init__(self, d_in, d_out, dropout):
    super(FeedForward, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(512, 2048),
        nn.Dropout(dropout),
        nn.ReLU(),
        nn.Linear(2048, 512),
    )
    self.dropout = nn.Dropout(dropout)

  def forward(self, x):
    print(self.linear_relu_stack(x))
    return self.linear_relu_stack(x)


In [77]:
def clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [78]:
class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.a_1 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        print(self.a_2 * (x - mean) / (std + self.eps) + self.a_1)
        return self.a_2 * (x - mean) / (std + self.eps) + self.a_1

In [79]:
class EncoderLayer(nn.Module):
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.size = size
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = LayerNorm(size)
        self.layer_norm2 = LayerNorm(size)
    
    def forward(self, x):
        y = self.layer_norm(x + self.self_attn(x, x, x))
        print(self.layer_norm2(y + self.feed_forward(y)))
        return self.layer_norm2(y + self.feed_forward(y))


In [80]:
class Encoder(nn.Module):
    def __init__(self, layer, N):
        super(Encoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = LayerNorm(layer.size)
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        print(self.norm(x))
        return self.norm(x)

In [81]:
encoder_layer = EncoderLayer((512, 2048), MultiHeadAttention(512, 2048), FeedForward(512, 2048, 0.15), 0.15)
model = Transformer(VOCAB_SIZE, MAX_LEN, encoder_layer, 6)

for src, trg in train_dataloader:
    print(model(src))
    break



tensor([[[ 4.6607e-01,  7.3898e-01, -3.6909e-01,  ..., -1.1221e+00,
           1.5682e-01, -3.1734e-01],
         [ 8.4615e-01, -4.7236e-01, -9.8101e-01,  ...,  3.5059e-01,
           8.8774e-01,  6.9109e-01],
         [ 1.6058e-01,  2.1077e-01,  1.4302e+00,  ..., -7.4099e-01,
          -5.8058e-01,  1.1109e+00],
         ...,
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00]],

        [[ 4.6607e-01,  7.3898e-01, -3.6909e-01,  ..., -1.1221e+00,
           1.5682e-01, -3.1734e-01],
         [-7.8466e-01, -1.3349e+00,  1.0837e+00,  ...,  1.2425e+00,
           1.4563e+00, -2.5843e-01],
         [ 5.3470e-01, -1.1274e+00,  1.3627e+00,  ..., -1.1752e+00,
           1.1324e+00,  2.8633e-01],
         ...,
         [ 0.0000e+00,  0

RuntimeError: ignored

In [47]:

for src, tgt in train_dataloader:
  print(src)
  print(src.shape)
  print(src==1)

  print(tgt)
  print(tgt.shape)
  print(tgt==1)
  #train
  break

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
        ...,
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True]])
tensor([[    2,   161,   865,  ...,     1,     1,     1],
        [    2,   117,   149,  ...,     1,     1,     1],
        [    2,   161,   544,  ...,     1,     1,     1],
        ...,
        [    2,   117, 10370,  ...,     1,     1,     1],
        [    2,   117,  4003,  ...,     1,     1,     1],
        [    2,  2984,   389,  ...,     1,     1,     1]])
torch.Size([16, 64])
tensor([[False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True],
        ...,
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ...,  True,  True,  True]])
tensor

Read more about `building neural networks in PyTorch <buildmodel_tutorial.html>`_.




--------------




Optimizing the Model Parameters
----------------------------------------
To train a model, we need a `loss function <https://pytorch.org/docs/stable/nn.html#loss-functions>`_
and an `optimizer <https://pytorch.org/docs/stable/optim.html>`_.



In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and
backpropagates the prediction error to adjust the model's parameters.



In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

We also check the model's performance against the test dataset to ensure it is learning.



In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

The training process is conducted over several iterations (*epochs*). During each epoch, the model learns
parameters to make better predictions. We print the model's accuracy and loss at each epoch; we'd like to see the
accuracy increase and the loss decrease with every epoch.



In [None]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Read more about `Training your model <optimization_tutorial.html>`_.




--------------




Saving Models
-------------
A common way to save a model is to serialize the internal state dictionary (containing the model parameters).



In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Loading Models
----------------------------

The process for loading a model includes re-creating the model structure and loading
the state dictionary into it.



In [None]:
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))

This model can now be used to make predictions.



In [None]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Read more about `Saving & Loading your model <saveloadrun_tutorial.html>`_.


