In [4]:
!pip3 install labml



In [3]:
!pip3 install labml-helpers



In [5]:
!pip3 install labml-nn

Collecting labml-nn
  Downloading labml_nn-0.4.100-py3-none-any.whl (237 kB)
[K     |████████████████████████████████| 237 kB 3.1 MB/s 
[?25hCollecting einops
  Downloading einops-0.3.0-py2.py3-none-any.whl (25 kB)
Installing collected packages: einops, labml-nn
Successfully installed einops-0.3.0 labml-nn-0.4.100


In [6]:
!pip3 install torch



In [7]:
#RTX3060
!pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.9.0%2Bcu111-cp39-cp39-linux_x86_64.whl (2041.4 MB)
[K     |████████████████████████████████| 2041.4 MB 23 kB/s 
[?25hCollecting torchvision==0.10.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torchvision-0.10.0%2Bcu111-cp39-cp39-linux_x86_64.whl (23.1 MB)
[K     |████████████████████████████████| 23.1 MB 1.2 MB/s 
[?25hCollecting torchaudio==0.9.0
  Downloading torchaudio-0.9.0-cp39-cp39-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 3.1 MB/s 
Collecting pillow>=5.3.0
  Downloading Pillow-8.2.0-cp39-cp39-manylinux1_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 27.5 MB/s 
Installing collected packages: torch, pillow, torchvision, torchaudio
  Attempting uninstall: torch
    Found existing installation: torch 1.9.0
    Uninstalling torch-1.9.0:
      Succ

In [11]:
!pip3 install torchtext

Collecting torchtext
  Downloading torchtext-0.10.0-cp39-cp39-manylinux1_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 2.9 MB/s 
[?25hCollecting tqdm
  Downloading tqdm-4.61.1-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 6.6 MB/s 
Installing collected packages: tqdm, torchtext
Successfully installed torchtext-0.10.0 tqdm-4.61.1


In [9]:
#apenas para execução local
#!pip3 install --upgrade git+https://github.com/pytorch/text
#import sys
#sys.path.append("./.local/lib/python3.9/site-packages")


In [14]:
import torch
from torch import nn

from labml import experiment
from labml.configs import option
from labml_helpers.module import Module
from labml_nn.experiments.nlp_classification import NLPClassificationConfigs
from labml_nn.transformers import Encoder
from labml_nn.transformers import TransformerConfigs
from labml_nn.transformers.utils import subsequent_mask


In [21]:
from collections import Counter
from typing import Callable

import torch
import torchtext
from torch import nn
from torch.utils.data import DataLoader
from torchtext.vocab import Vocab

from labml import lab, tracker, monit
from labml.configs import option
from labml_helpers.device import DeviceConfigs
from labml_helpers.metrics.accuracy import Accuracy
from labml_helpers.module import Module
from labml_helpers.train_valid import TrainValidConfigs, hook_model_outputs, BatchIndex
from labml_nn.optimizers.configs import OptimizerConfigs

class NLPClassificationConfigs(TrainValidConfigs):
    """
    <a id="NLPClassificationConfigs">
    ## Trainer configurations
    </a>
    This has the basic configurations for NLP classification task training.
    All the properties are configurable.
    """

    # Optimizer
    optimizer: torch.optim.Adam
    # Training device
    device: torch.device = DeviceConfigs()

    # Autoregressive model
    model: Module
    # Batch size
    batch_size: int = 16
    # Length of the sequence, or context size
    seq_len: int = 512
    # Vocabulary
    vocab: Vocab = 'ag_news'
    # Number of token in vocabulary
    n_tokens: int
    # Number of classes
    n_classes: int = 'ag_news'
    # Tokenizer
    tokenizer: Callable = 'character'

    # Whether to periodically save models
    is_save_models = True

    # Loss function
    loss_func = nn.CrossEntropyLoss()
    # Accuracy function
    accuracy = Accuracy()
    # Model embedding size
    d_model: int = 512
    # Gradient clipping
    grad_norm_clip: float = 1.0

    # Training data loader
    train_loader: DataLoader = 'ag_news'
    # Validation data loader
    valid_loader: DataLoader = 'ag_news'

    def init(self):
        """
        ### Initialization
        """
        # Set tracker configurations
        tracker.set_scalar("accuracy.*", True)
        tracker.set_scalar("loss.*", True)
        # Add a hook to log module outputs
        hook_model_outputs(self.mode, self.model, 'model')
        # Add accuracy as a state module.
        # The name is probably confusing, since it's meant to store
        # states between training and validation for RNNs.
        # This will keep the accuracy metric stats separate for training and validation.
        self.state_modules = [self.accuracy]

    def step(self, batch: any, batch_idx: BatchIndex):
        """
        ### Training or validation step
        """

        # Move data to the device
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Update global step (number of tokens processed) when in training mode
        if self.mode.is_train:
            tracker.add_global_step(data.shape[1])

        # Whether to capture model outputs
        with self.mode.update(is_log_activations=batch_idx.is_last):
            # Get model outputs.
            # It's returning a tuple for states when using RNNs.
            # This is not implemented yet. 😜
            output, *_ = self.model(data)

        # Calculate and log loss
        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        # Calculate and log accuracy
        self.accuracy(output, target)
        self.accuracy.track()

        # Train the model
        if self.mode.is_train:
            # Calculate gradients
            loss.backward()
            # Clip gradients
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip)
            # Take optimizer step
            self.optimizer.step()
            # Log the model parameters and gradients on last batch of every epoch
            if batch_idx.is_last:
                tracker.add('model', self.model)
            # Clear the gradients
            self.optimizer.zero_grad()

        # Save the tracked metrics
        tracker.save()


@option(NLPClassificationConfigs.optimizer)
def _optimizer(c: NLPClassificationConfigs):
    """
    ### Default [optimizer configurations](../optimizers/configs.html)
    """

    optimizer = OptimizerConfigs()
    optimizer.parameters = c.model.parameters()
    optimizer.optimizer = 'Adam'
    optimizer.d_model = c.d_model

    return optimizer


@option(NLPClassificationConfigs.tokenizer)
def basic_english():
    """
    ### Basic  english tokenizer
    We use character level tokenizer in this experiment.
    You can switch by setting,
    ```
        'tokenizer': 'basic_english',
    ```
    as the configurations dictionary when starting the experiment.
    """
    from torchtext.data import get_tokenizer
    return get_tokenizer('basic_english')


def character_tokenizer(x: str):
    """
    ### Character level tokenizer
    """
    return list(x)


@option(NLPClassificationConfigs.tokenizer)
def character():
    """
    Character level tokenizer configuration
    """
    return character_tokenizer


@option(NLPClassificationConfigs.n_tokens)
def _n_tokens(c: NLPClassificationConfigs):
    """
    Get number of tokens
    """
    return len(c.vocab) + 2


class CollateFunc:
    """
    ## Function to load data into batches
    """

    def __init__(self, tokenizer, vocab: Vocab, seq_len: int, padding_token: int, classifier_token: int):
        """
        * `tokenizer` is the tokenizer function
        * `vocab` is the vocabulary
        * `seq_len` is the length of the sequence
        * `padding_token` is the token used for padding when the `seq_len` is larger than the text length
        * `classifier_token` is the `[CLS]` token which we set at end of the input
        """
        self.classifier_token = classifier_token
        self.padding_token = padding_token
        self.seq_len = seq_len
        self.vocab = vocab
        self.tokenizer = tokenizer

    def __call__(self, batch):
        """
        * `batch` is the batch of data collected by the `DataLoader`
        """

        # Input data tensor, initialized with `padding_token`
        data = torch.full((self.seq_len, len(batch)), self.padding_token, dtype=torch.long)
        # Empty labels tensor
        labels = torch.zeros(len(batch), dtype=torch.long)

        # Loop through the samples
        for (i, (_label, _text)) in enumerate(batch):
            # Set the label
            labels[i] = int(_label) - 1
            # Tokenize the input text
            _text = [self.vocab[token] for token in self.tokenizer(_text)]
            # Truncate upto `seq_len`
            _text = _text[:self.seq_len]
            # Transpose and add to data
            data[:len(_text), i] = data.new_tensor(_text)

        # Set the final token in the sequence to `[CLS]`
        data[-1, :] = self.classifier_token

        #
        return data, labels


@option([NLPClassificationConfigs.n_classes,
         NLPClassificationConfigs.vocab,
         NLPClassificationConfigs.train_loader,
         NLPClassificationConfigs.valid_loader])
def ag_news(c: NLPClassificationConfigs):
    """
    ### AG News dataset
    This loads the AG News dataset and the set the values for
     `n_classes', `vocab`, `train_loader`, and `valid_loader`.
    """

    # Get training and validation datasets
    train, valid = torchtext.datasets.AG_NEWS(root=str(lab.get_data_path() / 'ag_news'), split=('train', 'test'))

    # Load data to memory
    with monit.section('Load data'):
        from labml_nn.utils import MapStyleDataset

        # Create [map-style datasets](../utils.html#map_style_dataset)
        train, valid = MapStyleDataset(train), MapStyleDataset(valid)

    # Get tokenizer
    tokenizer = c.tokenizer

    # Create a counter
    counter = Counter()
    # Collect tokens from training dataset
    for (label, line) in train:
        counter.update(tokenizer(line))
    # Collect tokens from validation dataset
    for (label, line) in valid:
        counter.update(tokenizer(line))
    # Create vocabulary
    vocab = Vocab(counter, min_freq=1)

    # Create training data loader
    train_loader = DataLoader(train, batch_size=c.batch_size, shuffle=True,
                              collate_fn=CollateFunc(tokenizer, vocab, c.seq_len, len(vocab), len(vocab) + 1))
    # Create validation data loader
    valid_loader = DataLoader(valid, batch_size=c.batch_size, shuffle=True,
                              collate_fn=CollateFunc(tokenizer, vocab, c.seq_len, len(vocab), len(vocab) + 1))

    # Return `n_classes', `vocab`, `train_loader`, and `valid_loader`
    return 4, vocab, train_loader, valid_loader


In [22]:
class TransformerClassifier(nn.Module):
  
  def __init__(self, encoder: Encoder, src_embed: Module, generator: nn.Linear):
    super().__init__()
    self.src_embed = src_embed
    self.encoder = encoder
    self.generator = generator

  def forward(self, x: torch.Tensor):
    x = self.src_embed(x)
    x = self.encoder(x, None)
    x = self.generator(x[-1])
    return x, None



In [23]:
class Configs(NLPClassificationConfigs):
  model: TransformerClassifier
  transformer: TransformerConfigs


In [24]:
@option(Configs.transformer)
def _transformer_configs(c: Configs):
    """
    ### Transformer configurations
    """

    # We use our
    # [configurable transformer implementation](../configs.html#TransformerConfigs)
    conf = TransformerConfigs()
    # Set the vocabulary sizes for embeddings and generating logits
    conf.n_src_vocab = c.n_tokens
    conf.n_tgt_vocab = c.n_tokens

    #
    return conf


@option(TransformerConfigs.encoder_attn)
def fnet_mix():
    """
    Create `FNetMix` module that can replace the self-attention in
    [transformer encoder layer](../models.html#TransformerLayer)
.
    """
    from labml_nn.transformers.fnet import FNetMix
    return FNetMix()


@option(Configs.model)
def _model(c: Configs):
    """
    Create classification model
    """
    m = TransformerClassifier(c.transformer.encoder,
                              c.transformer.src_embed,
                              nn.Linear(c.d_model, c.n_classes)).to(c.device)

    return m

In [25]:

def main():
    # Create experiment
    experiment.create(name="fnet")
    # Create configs
    conf = Configs()
    # Override configurations
    experiment.configs(conf, {
        # Use world level tokenizer
        'tokenizer': 'basic_english',

        # Train for $32$ epochs
        'epochs': 32,
        # Switch between training and validation for $10$ times
        # per epoch
        'inner_iterations': 10,

        # Transformer configurations (same as defaults)
        'transformer.d_model': 512,
        'transformer.ffn.d_ff': 2048,
        'transformer.n_heads': 8,
        'transformer.n_layers': 6,

        # Use [FNet](index.html) instead of self-a
        # ttention
        'transformer.encoder_attn': 'fnet_mix',

        # Use [Noam optimizer](../../optimizers/noam.html)
        'optimizer.optimizer': 'Noam',
        'optimizer.learning_rate': 1.,
    })

    # Set models for saving and loading
    experiment.add_pytorch_models({'model': conf.model})

    # Start the experiment
    with experiment.start():
        # Run training
        conf.run()

In [26]:
if __name__ == '__main__':
    main()

  calc_value = func(self)


TypeError: ignored

In [12]:
def get_available_gpus():
    return [ torch.cuda.get_device_properties(i) for i in range(torch.cuda.device_count())]
get_available_gpus()


[_CudaDeviceProperties(name='GeForce RTX 3060', major=8, minor=6, total_memory=12053MB, multi_processor_count=28)]