In [1]:
import argparse
import os
import torch
import random
import sys
from data.dataloader import itemDataset,ToTensor,collate_fn
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms, utils

from model.model_builder import build_model #still need some little modify
from model.util.optimizers import build_optim
from model.util.Logger import logger,init_logger  #finish
from model.util.saver import build_model_saver  #finish
from model.util import Report_manager

import torch.nn as nn
from model import Trainer
import opts

def _check_save_model_path(opt):
    save_model_path = os.path.abspath(opt.save_model)
    model_dirname = os.path.dirname(save_model_path)
    if(not os.path.exists(model_dirname)):
        os.makedirs(model_dirname)

def training_opt_postprocessing(opt):
    if(torch.cuda.is_available() and not opt.gpuid):
        logger.info("you should use gpu to train the model.")
    
    if opt.seed > 0:
        torch.manual_seed(opt.seed)
        # this one is needed for torchtext random call (shuffled iterator)
        # in multi gpu it ensures datasets are read in the same order
        random.seed(opt.seed)
        # some cudnn methods can be random even after fixing the seed
        # unless you tell it to be deterministic
        torch.backends.cudnn.deterministic = True

    if opt.gpuid:
        torch.cuda.set_device(opt.device_id)
        if opt.seed > 0:
            # These ensure same initialization in multi gpu mode
            torch.cuda.manual_seed(opt.seed)
    return opt

    



In [2]:
opts.src_word_vec_size=256
opts.tar_word_vec_size=256

opts.feat_merge='sum'
opts.feat_vec_size=256

opts.encoder_type='transformer'
opts.decoder_type='transformer'
opts.replace=True
opts.num_layer=3
opts.enc_layer=6
opts.dec_layer=3
opts.model_dim=256
opts.nin_dim_en=1024
opts.nin_dim_de=512


opts.dropout=0.1

opts.global_attention='general'

opts.self_attn_type="scaled_dot"
opts.num_head=8

opts.gpuid=0
opts.seed=0
opts.log_file = "./logger"
opts.save_model = "./sssss/output1"
opts.show = True
opts.train_from = "./ch_en/model_save/step_300000.pt"

In [3]:
opt = opts

In [7]:
opt = training_opt_postprocessing(opt)
init_logger(opt)

if(opt.train_from):
    logger.info('loading checkpoint from {0}'.format(opt.train_from))
    device = torch.device('cpu')
    checkpoint = torch.load(opt.train_from,map_location=device)

    model_opt = checkpoint['opt']
else:
    checkpoint = None
    model_opt = opt

data_token = dict()

for ttype in ['source','target']:
    data_token[ ttype ] = dict()
    with open('./ch_en/subword.{0}'.format(ttype)) as f_in:
        for j,word in enumerate(f_in):
            data_token[ttype][word.strip()[1:-1]] = j

logger.info("source size:{0}".format(len(data_token['source'])))
logger.info("target size:{0}".format(len(data_token['target'])))



[2018-10-29 02:40:32,188 INFO you should use gpu to train the model.]
[2018-10-29 02:40:32,190 INFO loading checkpoint from ./ch_en/model_save/step_300000.pt]
[2018-10-29 02:40:32,190 INFO loading checkpoint from ./ch_en/model_save/step_300000.pt]
[2018-10-29 02:40:32,574 INFO source size:32750]
[2018-10-29 02:40:32,574 INFO source size:32750]
[2018-10-29 02:40:32,576 INFO target size:32751]
[2018-10-29 02:40:32,576 INFO target size:32751]


In [8]:
model_opt.nin_dim_en = model_opt.nin_dim
model_opt.nin_dim_de = model_opt.nin_dim


In [9]:
logger.info("start build model")
model = build_model(model_opt,opt,data_token,checkpoint)
model.to(torch.device('cpu'))

[2018-10-29 02:40:36,649 INFO start build model]
[2018-10-29 02:40:36,649 INFO start build model]
[2018-10-29 02:40:36,652 INFO Building model...]
[2018-10-29 02:40:36,652 INFO Building model...]
[2018-10-29 02:40:36,975 INFO finish build encoder]
[2018-10-29 02:40:36,975 INFO finish build encoder]
[2018-10-29 02:40:37,363 INFO finish build decoder]
[2018-10-29 02:40:37,363 INFO finish build decoder]
[2018-10-29 02:40:37,378 INFO loading model weight from checkpoint]
[2018-10-29 02:40:37,378 INFO loading model weight from checkpoint]


the size will be 36313583 13158400 23155183


RuntimeError: Error(s) in loading state_dict for Transformer:
	Missing key(s) in state_dict: "encoder.embedding.tag_emb.weight". 
	size mismatch for encoder.embedding.word_emb.weight: copying a param of torch.Size([32750, 256]) from checkpoint, where the shape is torch.Size([32760, 256]) in current model.

In [None]:
model.encoder.embedding.word_emb

In [None]:
weight = model.encoder.embedding.word_emb.weight.detach().clone()

In [None]:
weight_tag = weight[2:12]

In [None]:
weight_tag

In [None]:
weight_word = torch.cat((weight[:2],weight[12:]))

In [7]:
weight_word

NameError: name 'weight_word' is not defined

In [8]:
model.encoder.embedding.word_emb = nn.Embedding(32750,256,padding_idx=0)
model.encoder.embedding.word_emb.weight = nn.Parameter(weight_word)

NameError: name 'model' is not defined

In [9]:
model.encoder.embedding.tag_emb = nn.Embedding(10,256)
model.encoder.embedding.tag_emb.weight = nn.Parameter(weight_tag)

NameError: name 'model' is not defined

In [10]:
model.encoder.embedding.tag_emb.weight

NameError: name 'model' is not defined

In [17]:
model.encoder.embedding

Embedding(
  (word_emb): Embedding(32750, 256, padding_idx=0)
  (pos_emb): Embedding(128, 256)
  (drop): Dropout(p=0.1)
  (tag_emb): Embedding(10, 256)
)

In [18]:
model_state_dict = model.state_dict()

#check for dataset_setting
checkpoint = {
    "model" : model_state_dict,
    "opt":checkpoint['opt'],
    'optim':checkpoint['optim'],
    "reporter":checkpoint['reporter']
}

{'model': OrderedDict([('encoder.embedding.word_emb.weight',
               tensor([[ 0.0038, -0.0061, -0.0112,  ...,  0.0116, -0.0034,  0.0118],
                       [ 0.0390, -0.0318, -0.0893,  ...,  0.2093,  0.0677,  0.0430],
                       [-0.0119, -0.0048,  0.0120,  ...,  0.0019,  0.0041, -0.0121],
                       ...,
                       [ 0.0135,  0.0132,  0.0113,  ...,  0.0027, -0.0096,  0.0108],
                       [ 0.0067, -0.0037,  0.0030,  ...,  0.0097, -0.0113, -0.0037],
                       [ 0.0065,  0.0116,  0.0008,  ...,  0.0042, -0.0024,  0.0068]])),
              ('encoder.embedding.pos_emb.weight',
               tensor([[ 0.0257,  0.0832, -0.0978,  ..., -0.0482, -0.0363,  0.0252],
                       [-0.0906, -0.1156, -0.0448,  ...,  0.0517, -0.1088, -0.1176],
                       [ 0.0101, -0.1060, -0.1104,  ..., -0.0181, -0.0125,  0.0812],
                       ...,
                       [-0.1171, -0.0520,  0.0064,  ...,  0.0794

In [19]:
torch.save(checkpoint,'./total_pretrain.pt')

In [20]:
check = torch.load('./total/total_pretrain.pt')