In [4]:
from torchtext import data
from torchtext.data import Field, BucketIterator, TabularDataset
import torch 

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
params_fr = {'src_path':'./data/french.txt', 
          'trg_path':'./data/english.txt',
          'src_data': None,
          'trg_data': None, 
          'src_lang':'fr',
          'trg_lang':'en',
          'device' : torch.device('cuda'),
          'load_weights': None, 
          'load_weights':None, 
          'max_length': 100, 
          'src_pad': None, 
          'trg_pad': None, 
          'optimizer': None, 
          'no_cuda': True, 
          'SGDR': None, 
          'epoch': 10, 
          'dropout': 0.1, 
          'batchsize': 1500,
          'printevery': 100,
          'lr': 0.0001, 
          'create_valset': 'store_true', 
          'd_model' : 512, 
          'heads': 8, 
          'n_layers': 6}


# Get data

In [6]:
from lib.data_processing import read_data
# The 'read_data' function adds the french and english text into the 
# params dict as 'src_data' and 'trg_data' as a list of strings
read_data(params_fr)

In [9]:
from lib.data_processing import create_fields 
import dill as pickle 

#SRC_fr, TRG_fr = create_fields(params_fr)

with open('weights/SRC_fr.pkl', 'rb') as f:
    SRC_fr = pickle.load(f)
   
with open('weights/TRG_fr.pkl', 'rb') as f:
    TRG_fr = pickle.load(f)

In [10]:
from lib.data_processing import create_dataset

params_fr["train"] = create_dataset(params_fr, SRC_fr, TRG_fr)

creating dataset and iterator... 


In [11]:
params_fr["train"]

<lib.batch.MyIterator at 0x7f451d91ac50>

In [12]:
SRC_fr.vocab.stoi['maison'], SRC_fr.vocab.itos[115]

(115, 'maison')

In [47]:
import dill as pickle 

pickle.dump(SRC_fr, open('weights/SRC_fr.pkl', 'wb'))
pickle.dump(TRG_fr, open('weights/TRG_fr.pkl', 'wb'))

In [23]:
from lib.models import get_model

model_fr = get_model(params, len(SRC_fr.vocab), len(TRG_fr.vocab))

In [24]:
model_fr

Transformer(
  (encoder): Encoder(
    (embed): Embedder(
      (embed): Embedding(23465, 512)
    )
    (pe): PositionalEncoder(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (layers): ModuleList(
      (0): EncoderLayer(
        (norm_1): Norm()
        (norm_2): Norm()
        (attn): MultiHeadAttention(
          (q_linear): Linear(in_features=512, out_features=512, bias=True)
          (v_linear): Linear(in_features=512, out_features=512, bias=True)
          (k_linear): Linear(in_features=512, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (out): Linear(in_features=512, out_features=512, bias=True)
        )
        (ff): FeedForward(
          (linear_1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear_2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (dropout_1): Dropout(p=0.1, inplace=False)
        (dropout_2): Dropout(p=0.

# Train model 

In [27]:
# Add some additional params
from lib.data_processing import get_len
from lib.optim import CosineWithRestarts

params["checkpoint"] = 0
params["d_model"] = 512
params["heads"] = 8
params["n_layers"] = 6
params["epoch"] = 100
params["train_len"] = get_len(params["train"])
params["optimizer"] = torch.optim.Adam(model_fr.parameters(), lr=params["lr"], betas=(0.9, 0.98), eps=1e-9)
params["SGDR"] = True
params["sched"] = CosineWithRestarts(params["optimizer"], T_max=params["train_len"])

In [40]:
from lib.train import train_model

In [36]:
torch.save(model_fr, 'model/model_fr')

In [38]:
model_fr2 = torch.load('model/model_fr')

In [39]:
model_fr2

Transformer(
  (encoder): Encoder(
    (embed): Embedder(
      (embed): Embedding(23465, 512)
    )
    (pe): PositionalEncoder(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (layers): ModuleList(
      (0): EncoderLayer(
        (norm_1): Norm()
        (norm_2): Norm()
        (attn): MultiHeadAttention(
          (q_linear): Linear(in_features=512, out_features=512, bias=True)
          (v_linear): Linear(in_features=512, out_features=512, bias=True)
          (k_linear): Linear(in_features=512, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (out): Linear(in_features=512, out_features=512, bias=True)
        )
        (ff): FeedForward(
          (linear_1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear_2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (dropout_1): Dropout(p=0.1, inplace=False)
        (dropout_2): Dropout(p=0.

In [None]:
class ShutdownOnFinish:
    
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_value, tb):
        if exc_type is not KeyboardInterrupt:
            import os
            os.system('sudo shutdown -h now')
            
with ShutdownOnFinish():
    train_model(model_fr, params, "model_fr")
    torch.save(model_fr, 'model/model_fr')

training model...
6m: epoch 1 [####################]  100%  loss = 1.52727
epoch 1 complete, loss = 1.527
   10m: epoch 2 [#############       ]  68%  loss = 1.378

# Translate

In [2]:
model_fr = torch.load('model/model_fr')


In [15]:
import dill as pickle 

with open('weights/SRC_fr.pkl', 'rb') as f:
    SRC = pickle.load(f)
   
with open('weights/TRG_fr.pkl', 'rb') as f:
    TRG = pickle.load(f)

In [16]:
params_fr["k"] = 3

from lib.translate import translate_text

translate_text("Il veut poser une question", params_fr, model_fr, SRC_fr, TRG_fr)

'he wants to ask a question .'

In [19]:
translate_text("Tu veux faire quelques chose après boulot?", params_fr, model_fr, SRC_fr, TRG_fr)

'do you want to do something after work ?'