# User Defined Models With x-tagger Wrapper (toy example)

In [2]:
import nltk
from sklearn.model_selection import train_test_split

import torch
from xtagger import LSTMForTagging
from xtagger import xtagger_dataset_to_df, df_to_torchtext_data


nltk_data = list(nltk.corpus.conll2000.tagged_sents(tagset='universal'))
train_set, test_set = train_test_split(nltk_data,train_size=0.8,test_size=0.2)

df_train = xtagger_dataset_to_df(train_set)
df_test = xtagger_dataset_to_df(test_set)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_iterator, valid_iterator, test_iterator, TEXT, TAGS = df_to_torchtext_data(
    df_train, 
    df_test, 
    device, 
    batch_size = 32
)

Number of training examples: 8758
Number of testing examples: 2190
Unique tokens in TEXT vocabulary: 17487
Unique tokens in TAGS vocabulary: 13


In [3]:
import torch
import torch.nn as nn

class CNNTagger(nn.Module):
    def __init__(self, n_tags,  out_channels = 1):
        super(CNNTagger, self).__init__()
        self.n_tags = n_tags
        self.out_channels = out_channels
        self.cnn1 = nn.Conv1d(
            in_channels = 1, 
            out_channels = self.out_channels, 
            kernel_size=3, 
            padding=1
        )
        
        self.cnn2 = nn.Conv1d(
            in_channels = 1, 
            out_channels = self.out_channels, 
            kernel_size=5, 
            padding=2
        )
        
        self.cnn3 = nn.Conv1d(
            in_channels = 1, 
            out_channels = self.out_channels, 
            kernel_size=7, 
            padding=3
        )
        
        self.dropout = nn.Dropout()
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.fcn_1 = nn.Linear(self.out_channels, 5)
        self.fcn_out = nn.Linear(5, self.n_tags)
        
        
    def forward(self, x):
        x = x[:,None,:].cuda().float()
        out1 = self.relu(self.cnn1(x))
        out2 = self.relu(self.dropout(self.cnn2(x)))
        out3 = self.relu(self.dropout(self.cnn3(x)))
        out = (out1 + out2 + out3) / 3 # B x 3 x 7
        out = self.fcn_1(out.permute(0,2,1))
        out = self.fcn_out(out)
        return out

In [4]:
from xtagger import PyTorchTagTrainer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNTagger(13,3).to(device)
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.CrossEntropyLoss(ignore_index = 0)


trainer = PyTorchTagTrainer(
    model = model,
    criterion = criterion,
    optimizer = optimizer,
    device = device,
    train_iterator = train_iterator,
    val_iterator = test_iterator,
    test_iterator = None,
    TAGS = TAGS,
    TEXT = TEXT
)

In [5]:
model = trainer.train(10)

HBox(children=(FloatProgress(value=0.0, max=2740.0), HTML(value='')))

Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 19.637577491654746}, 'train': {'acc': 20.136540238727687}, 'eval_loss': 8.780225228572238, 'train_loss': 10.04117732674536}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 25.461134954697183}, 'train': {'acc': 25.9058733591705}, 'eval_loss': 2.3991619538569795, 'train_loss': 2.4615511015383866}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.41678588459704}, 'train': {'acc': 30.384799616797064}, 'eval_loss': 2.158846554548844, 'train_loss': 2.1545461773002237}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.44349070100143}, 'train': {'acc': 30.436570720779564}, 'eval_loss': 2.1233302786730337, 'train_loss': 2.1217943873718705}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.426323319027183}, 'train': {'acc': 30.19464967413235}, 'eval_loss': 2.114841509556425, 'train_loss': 2.118328948960687}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.514067715784453}, 'train': {'acc': 30.513017771520083}, 'eval_loss': 2.0931937003481216, 'train_loss': 2.098367074980353}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.111587982832617}, 'train': {'acc': 30.001596678907873}, 'eval_loss': 2.1070213559745015, 'train_loss': 2.113463208623176}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 29.65951359084406}, 'train': {'acc': 29.11955254283212}, 'eval_loss': 2.1065807377082715, 'train_loss': 2.109799807524159}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.309966618979495}, 'train': {'acc': 30.231421673222727}, 'eval_loss': 2.0885737927063652, 'train_loss': 2.093431923946325}
Evaluating...


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


{'eval': {'acc': 30.435860753457323}, 'train': {'acc': 30.471407351496765}, 'eval_loss': 2.078521628310715, 'train_loss': 2.079871195076156}

