## Glove模型训练

In [62]:
# 环境配置
%cd /playground/sgd_deep_learning/sgd_nlp/
import sys 
sys.path.append('./python')

/playground/sgd_deep_learning/sgd_nlp


In [63]:
import os
from torch.optim.lr_scheduler import ExponentialLR
import torch
import os
import time
import pickle

from sgd_nlp.embedding import Glove, CorpusFactoryGlove

## 语料处理

In [64]:
def load_corpus(config):
    if config.load_corpus_obj and os.path.isfile(config.corpus_obj_path):
        with open(config.corpus_obj_path, 'rb') as fin:
            print("!!! load corpus factory success !!!")
            return pickle.load(fin)
    else:
        # corpus files path
        print('CURRENT PATH:\t', config.corpus_dir_path)

        # new obj from origin corpus file path
        corpus_factory = CorpusFactoryGlove(config.corpus_dir_path,
                                            win_width=config.win_width,
                                            pair_symmetric=config.pair_symmetric)
        corpus_factory.vocab.log_info()  # show log

        with open(config.corpus_obj_path, 'wb') as fout:
            pickle.dump(corpus_factory, fout)

        return corpus_factory

## Training Loop

In [65]:
def train(corpus_factory, model, optimizer, scheduler, config):
    # train setting
    all_words_num = corpus_factory.word_pairs_num()  # 文档中的总词数
    epoch = int(config.corpus_run_loop * all_words_num / config.batch_size)  # 总共需要迭代几个epoch
    global_min_loss = 1e10

    # training loop
    for i in range(epoch):
        t1 = time.time()
        optimizer.zero_grad()

        # forward
        batch_data = corpus_factory.training_batch(config.batch_size, device=config.device)
        y = model.forward(batch_data)

        # objective function (loss function)
        j = torch.mean(y)  # minimize objective

        # backward and update weight
        j.backward()
        optimizer.step()

        # if epoch % config.scheduler_step == 0:
        #     scheduler.step()

        # output info
        tmp_t = time.time() - t1
        if i % config.log_step == 0:
            print('epoch:{}/{}, loss:{}, csot_time: {}'.format(i, epoch, j, tmp_t))

        # save best model
        if j < global_min_loss:
            global_min_loss = j
            torch.save(model.state_dict(), config.model_weights_obj_path)
            print('new bset loss: {}'.format(j))

### 训练参数设置

In [66]:
class config:
    # 文本语料路径
    data_home = r'./data'
    sub_dir = r'friends/season10'
    corpus_dir_path = os.path.join(data_home, sub_dir)
    
    # 直接加载对象
    SAVE_HOME = r'./apps/embedding/save/'
    model_name = r'glove'
    
    load_corpus_obj = True # 训练前修改！
    corpus_obj_path = os.path.join(SAVE_HOME, model_name, r'corpus_obj.cf') # 加载预处理语料  default:None
     
    load_model_weight_obj = True # 训练前修改！
    model_weights_obj_path = os.path.join(SAVE_HOME, model_name, r'glove_weights.path') # 加载预训练模型参数 default:None
    
    # 语料预处理参数
    win_width = 10  # context 窗口大小
    pair_symmetric = True
  
    
    # 模型参数
    device = torch.device('cuda')
    emb_dim = 300
    sparse_emb = False
    
    # 训练参数
    lr = 1e-2 # 初始学习率
    corpus_run_loop = 10  # 看n遍文本
    batch_size = 1024   # 每个batch的大小
    scheduler_step = 100
    log_step = 500

In [67]:
def app():
    # class obj
    corpus_factory = load_corpus(config=config)

    model = Glove(emb_dim=config.emb_dim,
                  token_num=corpus_factory.token_num(),
                  sparse_emb=config.sparse_emb).to(config.device)

    # for pa in model.parameters():
    #     print(pa)
    #     print(pa.device)

    # load weight
    if config.load_model_weight_obj and os.path.isfile(config.model_weights_obj_path):
        model.load_state_dict(torch.load(config.model_weights_obj_path))
        print("!!! Load model weights success !!!")

    # optimizer = torch.optim.SparseAdam(params=model.parameters(), lr=1e-1)
    # optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-1, momentum=0.9)
    optimizer = torch.optim.Adagrad(params=model.parameters(), lr=config.lr)
    scheduler = ExponentialLR(optimizer, gamma=0.9)

    train(corpus_factory=corpus_factory,
          model=model,
          optimizer=optimizer,
          scheduler=scheduler,
          config=config)

app()

!!! load corpus factory success !!!
!!! Load model weights success !!!
epoch:0/14708, loss:0.45056708188466177, csot_time: 0.01168513298034668
new bset loss: 0.45056708188466177
new bset loss: 0.34416452005027226
new bset loss: 0.32712092037998153
new bset loss: 0.3226029156847734
new bset loss: 0.32169612111348805
new bset loss: 0.3047192806519861
new bset loss: 0.29708835836208775
epoch:500/14708, loss:0.48392802933632617, csot_time: 0.004575014114379883
epoch:1000/14708, loss:0.3644226818461068, csot_time: 0.0046062469482421875
new bset loss: 0.28362923298549564
epoch:1500/14708, loss:0.45654477333453786, csot_time: 0.004652500152587891
new bset loss: 0.24977882111505798
new bset loss: 0.24550845171336855
epoch:2000/14708, loss:0.32347228072811174, csot_time: 0.005177974700927734
new bset loss: 0.23148263134084396
epoch:2500/14708, loss:0.31313353024114043, csot_time: 0.004429340362548828
epoch:3000/14708, loss:0.4088120956819198, csot_time: 0.005589723587036133
epoch:3500/14708, lo