### load dataset > DataLoader

In [1]:
from load_data import data

In [2]:
train, val, test = data.make_loader()
data.vocab_size

121855

In [3]:
for r in train:
    sample = r["input_ids"]
    labels = r["labels"]
    break 
print(sample.size())
print(labels.size())

torch.Size([32, 128])
torch.Size([32])


### training
+ save local `.onnx`
+ save cloud `S3`
+ model_db info

In [4]:
from networks.rnn_net import RNN
from trainer import trainer 
from evaluate import evaluate
import torch 
import warnings 
warnings.filterwarnings("ignore")

In [5]:
N_TOKEN = sample.size()[1]
EMBEDDING_DIM = 256
HIDDEN_DIM = 100
VOCAB_SIZE = data.vocab_size 
TAG_SIZE = 2

rnn = RNN(N_TOKEN, EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, TAG_SIZE)

# 動作確認
with torch.no_grad():
    y, h = rnn(sample, hidden_flg=True)
print(y.size())
print(h.size())

torch.Size([32, 2])
torch.Size([32, 100])


In [6]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=2e-2)

best_rnn = trainer(train, val, rnn, criterion, optimizer, 10, "running demo.ipynb")

INFO:trainer:device cpu
INFO:trainer:start training ....
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 469/469 [14:49<00:00,  1.90s/it]
INFO:trainer:duration in seconds 889.1406211853027


1/1  | train | loss: 0.0252


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:07<00:00, 20.14it/s]
INFO:trainer:duration in seconds 896.9439284801483


1/1 | val | loss: 0.0254
1/1 | val | accuracy: 0.5034
best validation loss: 127.1508
graph(%input_ids : Long(1, 128, strides=[128, 1], requires_grad=0, device=cpu),
      %embed_net.embeddings.weight : Float(121855, 256, strides=[256, 1], requires_grad=1, device=cpu),
      %hidden_net.layers.0.i2h.weight : Float(100, 356, strides=[356, 1], requires_grad=1, device=cpu),
      %hidden_net.layers.0.i2h.bias : Float(100, strides=[1], requires_grad=1, device=cpu),
      %hidden_net.layers.1.i2h.weight : Float(100, 356, strides=[356, 1], requires_grad=1, device=cpu),
      %hidden_net.layers.1.i2h.bias : Float(100, strides=[1], requires_grad=1, device=cpu),
      %hidden_net.layers.2.i2h.weight : Float(100, 356, strides=[356, 1], requires_grad=1, device=cpu),
      %hidden_net.layers.2.i2h.bias : Float(100, strides=[1], requires_grad=1, device=cpu),
      %hidden_net.layers.3.i2h.weight : Float(100, 356, strides=[356, 1], requires_grad=1, device=cpu),
      %hidden_net.layers.3.i2h.bias : F

INFO:trainer:sucessfully saving model format onnxruntime >> filename is ./onnx/rnn_imdb_301ac8.onnx
INFO:trainer:successfully saving model weights format pth >> filename is ./onnx/rnn_imdb_301ac8.pth 


### evaluate 

In [15]:
evaluate(test, best_rnn, criterion)

INFO:evaluate:device: cpu
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:09<00:00, 15.73it/s]

test loss: 0.0257
test accuracy: 0.4952





### saving preprocessing for inferences prediction time 
* save local `.pkl`
* save cloud `S3`


In [8]:
from pred.make_pkl import make_dump_prep, load_dump_prep, upload_s3_bucket 
from pred.prediction import pred 
from pred.preprocessing import prep 

In [9]:
make_dump_prep(prep)
make_dump_prep(pred, "./pred/prediction/prediction_rnn.pkl")

prep_ = load_dump_prep()
pred_ = load_dump_prep("./pred/prediction/prediction_rnn.pkl")

input_ids = prep_.transform("sample text .")
print(f"prep: {input_ids}")

pred, proba = pred_.transform(torch.rand(1, 2))
print(pred)
print(proba)

prep: tensor([[    2, 56357, 24899,     1,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,   

In [10]:
upload_s3_bucket()
upload_s3_bucket("./pred/prediction/prediction_rnn.pkl")

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:pred.make_pkl:upload file to s3 ....
INFO:pred.make_pkl:complete upload task !!
INFO:pred.make_pkl:upload file to s3 ....
INFO:pred.make_pkl:complete upload task !!
