In [1]:
import torch
import torch.nn as nn
from torch import optim

import pandas as pd
import numpy as np
from tqdm import tqdm
import importlib
import pickle

import models
import DataLoader
import utils
import main

# data load

In [2]:
importlib.reload(DataLoader)
# features = ['chid', 'masts', 'educd', 'trdtp', 'poscd', 'gender_code',
#                'age', 'label1', 'label2', 'label3', 'tx1', 'tx2', 'tx3']
path = './dataset/user_features4/'
train_idxs = torch.arange(0, 500000)
data_loader = DataLoader.dataLoader(path, train_idxs, batch_size=10000, train=True)
# data_loader = DataLoader.DataSet(path, train_idxs)

# model define

In [3]:
# features
masts = [0., 1., 2., 3.]
educd = [0., 1., 2., 3., 4., 5., 6.]
trdtp = [0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
       26., 27., 28., 29.]
poscd = [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 99.]
gender_code = [0., 1.]
age = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]
labels = [0, 2, 6, 10, 12, 13, 15, 18, 19, 21, 22, 25, 26, 36, 37, 39, 48]

In [4]:
importlib.reload(models)
epochs = 300
lr = 0.001
var1 = 0.9
var2 = 0.999
savePath ='./models/checkpoints.pt'
# weight = torch.tensor([0.0020, 0.0242, 0.0765, 0.0206, 0.0453, 0.1016, 0.0199, 0.0882, 0.0463,
#           0.1176, 0.1164, 0.0860, 0.1119, 0.0216, 0.0109, 0.0836, 0.0273])
weight = None

featureType = {'Qn': 16, 'Qn_dim':23,
               'Ql':[len(masts), len(educd), len(trdtp), len(poscd), len(gender_code), len(age), len(labels), len(labels), len(labels)],
               'Ql_dim':23
              }
modelType = {'hidden_dim':100, 'layer_num':5, 'output_dim': 17, 'topN': 3, 'weight': weight, 'bias':True}

logger = utils.create_logger('./models/', 'logger.txt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )
LSTM = models.LSTMModel(featureType, modelType, device=device).to(device)
optimizer = optim.Adam(LSTM.parameters(), betas=[var1, var2], lr=lr)
device

device(type='cuda')

In [5]:
try:
    models.train(LSTM, optimizer, data_loader, device=device, epochs=epochs, savePath=savePath,
                 featureType=featureType, modelType=modelType, logger=logger)
except Exception as e:
    logger.warning("something raised an exception: {}", exc_info=True)

2022-01-03 16:47:34,534 INFO epoch: 1/300, trainLoss: 8.223831014633179, valLoss: 8.28961009979248
2022-01-03 16:48:23,331 INFO epoch: 2/300, trainLoss: 7.594730043411255, valLoss: 7.867662124633789
2022-01-03 16:49:11,975 INFO epoch: 3/300, trainLoss: 7.534705028533936, valLoss: 7.820180158615113
2022-01-03 16:50:01,615 INFO epoch: 4/300, trainLoss: 7.494595947265625, valLoss: 7.779039058685303
2022-01-03 16:50:50,276 INFO epoch: 5/300, trainLoss: 7.471688241958618, valLoss: 7.756323719024659
2022-01-03 16:51:39,567 INFO epoch: 6/300, trainLoss: 7.460184698104858, valLoss: 7.743845653533936
2022-01-03 16:52:29,349 INFO epoch: 7/300, trainLoss: 7.4548015308380124, valLoss: 7.7383527755737305
2022-01-03 16:53:18,841 INFO epoch: 8/300, trainLoss: 7.452145328521729, valLoss: 7.735686798095703
2022-01-03 16:54:08,258 INFO epoch: 9/300, trainLoss: 7.4499032688140865, valLoss: 7.73334677696228
2022-01-03 16:54:57,869 INFO epoch: 10/300, trainLoss: 7.44885443687439, valLoss: 7.732296438217163

KeyboardInterrupt: 

In [5]:
# load model for train
importlib.reload(models)
modelPath = './models/checkpoints_hoo.pt'
savePath ='./models/checkpoints.pt'

epochs = 500
lr = 0.001
var1 = 0.9
var2 = 0.999
checkpoint = torch.load(modelPath, map_location=torch.device('cpu'))

re_epochs = checkpoint['epochs']
featureType = checkpoint['featureType']
modelType = checkpoint['modelType']
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )

logger = utils.create_logger('./models/', 'logger.txt')
LSTM = models.LSTMModel(featureType, modelType, device=device).to(device)
optimizer = optim.Adam(LSTM.parameters(), betas=[var1, var2], lr=lr)
LSTM.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
device

device(type='cuda')

In [None]:
try:
    models.train(LSTM, optimizer, data_loader, device=device, epochs=epochs, savePath=savePath,
                 featureType=featureType, modelType=modelType, logger=logger, re_epochs=re_epochs)
except Exception as e:
    logger.warning("something raised an exception: {}", exc_info=True)

2021-12-27 13:19:30,087 INFO epoch: 301/500, trainLoss: 7.451547145843506, valLoss: 7.254924297332764
2021-12-27 13:20:11,083 INFO epoch: 302/500, trainLoss: 7.45157642364502, valLoss: 7.254914093017578
2021-12-27 13:20:51,886 INFO epoch: 303/500, trainLoss: 7.451534414291382, valLoss: 7.25483512878418


In [None]:
# inputs 測試
for x, label in data_loader:
    inputs = x[:, :, 1:-3]
    outputs = LSTM(inputs)
    loss = LSTM.loss_fun(outputs, label)
    loss.backward()
    break

In [14]:
LSTM.QlV.shape

torch.Size([116, 3])

In [6]:
outputs.shape, label.shape

(torch.Size([10000, 23, 17]), torch.Size([10000, 23, 3]))

# test

In [2]:
# features
masts = [0., 1., 2., 3.]
educd = [0., 1., 2., 3., 4., 5., 6.]
trdtp = [0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
       26., 27., 28., 29.]
poscd = [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 99.]
gender_code = [0., 1.]
age = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]
labels = [0, 2, 6, 10, 12, 13, 15, 18, 19, 21, 22, 25, 26, 36, 37, 39, 48]

In [3]:
# test loader
importlib.reload(DataLoader)
path = './dataset/user_features2/'
idxs = torch.arange(0, 500000)
data_loader = DataLoader.dataLoader(path, idxs, batch_size=10000, train=False, shuffle=False)
# data_loader = DataLoader.DataSet(path, train_idxs)

In [29]:
importlib.reload(models)
savePath ='./models/checkpoints.pt'
checkpoint = torch.load('./models/checkpoints.pt', map_location=torch.device('cpu'))

featureType = checkpoint['featureType']
modelType = checkpoint['modelType']
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )

LSTM = models.LSTMModel(featureType, modelType, device=device).to(device)
LSTM.load_state_dict(checkpoint['model'])
device

device(type='cpu')

In [None]:
result = []
idx = []
for inputs in data_loader:
    with torch.no_grad():
        idx.append(inputs[:, 0:1, 0])
        
        inputs = inputs[:, :, 1:-3]
        outputs = LSTM(inputs)[:, -1, :]
        result.append(outputs)

result = torch.cat(result, dim=0).numpy()
idx = torch.cat(idx, dim=0).numpy()

In [55]:
d = {'result': result, 'idx': idx}
with open('./result/result1.pickle', 'wb') as f:
    pickle.dump(d, f)

In [38]:
f = c[:, 1:].argsort(dim=1, descending=True) + 1

In [39]:
f[:, :3].unique()

tensor([ 1,  3,  6, 13, 14, 16])

In [45]:
c.argsort(dim=1, descending=True)[:, :4].unique()

tensor([ 0,  1,  3,  6, 13, 14, 16])

# val

In [6]:
# val
result = []
l = []
for inputs, label in data_loader:
    with torch.no_grad():
        inputs = inputs[:, :, 1:]
        outputs = LSTM(inputs)[:, -1, :]
    
        result.append(outputs)
        l.append(label[:, -1, :])
    break

In [28]:
ff = result[0][:, 1:].argsort(dim=1, descending=True)[:, :3] + 1
ff

tensor([[16, 14, 13],
        [14, 16, 13],
        [14, 16, 13],
        ...,
        [13, 14, 16],
        [14, 13, 16],
        [14, 13, 16]])

In [29]:
c1 = ff[:, 0]
c1

tensor([16, 14, 14,  ..., 13, 14, 14])

In [30]:
c2 = l[0][:, 0].reshape(-1).type(torch.float)
c2

tensor([16., 14.,  1.,  ..., 13.,  0.,  0.])

In [31]:
((c1 == c2) + 0.).sum()

tensor(3255.)

In [22]:
c1.shape

torch.Size([10000])

# try

In [43]:
torch.empty(3, dtype=torch.long).random_(5)

tensor([0, 3, 4])

In [13]:
for i, (x, label) in enumerate(data_loader):
    break
x.shape, label.shape

(torch.Size([100, 23, 10]), torch.Size([100, 23, 3]))

In [14]:
x = x[:, :, 1:]
outputs = LSTM(x)

In [19]:
outputs.argsort(dim=1, descending=True).reshape(-1, 17)

tensor([[ 3, 13,  0,  ..., 18,  3, 18],
        [ 2, 21,  1,  ..., 17,  2, 17],
        [ 4, 14,  2,  ..., 20,  4, 16],
        ...,
        [21,  2, 13,  ...,  2, 21,  2],
        [22,  1, 17,  ...,  1, 22,  1],
        [ 0,  0, 16,  ...,  0,  0,  0]])

In [10]:
torch.empty(3, dtype=torch.long).random_(5)

tensor([4, 0, 3])

# try

In [9]:
x = torch.arange(12).reshape(3, 4)
x

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [31]:
x.chunk(3, dim=1)

(tensor([[0, 1],
         [4, 5],
         [8, 9]]),
 tensor([[ 2,  3],
         [ 6,  7],
         [10, 11]]))

In [32]:
torch.tensor(5 * 3)

tensor(15)

In [35]:
torch.tanh(torch.tensor([5]))

tensor([0.9999])