In [1]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

from tqdm import tqdm

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
train_data = torch.load('/project/annotation/train.pt')
test_data = torch.load('/project/annotation/test.pt')
valid_data = torch.load('/project/annotation/valid.pt')

In [4]:
# dataloaders
batch_size = 16
device='cuda'

# make sure to SHUFFLE for your training data
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

In [5]:
import torch.nn as nn

class SentimentLSTM(nn.Module):

    def __init__(self, seq_size, output_size, hidden_dim, n_layers):
        
        super(SentimentLSTM, self).__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        # embedding and LSTM layers
        self.lstm = nn.LSTM(seq_size, hidden_dim, n_layers, batch_first=True)
        
        # linear and sigmoid layer
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)
        
        lstm_out, hidden = self.lstm(x)
        ## lstm_out.shape [batch, seq_len, hidden_dim]
        ## hidden [batch, hidden_dim]

        # fully connected layer        
        out = self.fc(lstm_out[:, -1, :])
        ## out.shape: [n_layer * n_direction, batch, output_size]

        # sigmoid function
        sig_out = self.sig(out)
        
        # return last sigmoid output and hidden state
        return sig_out, hidden

In [6]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        # nn.init.uniform_(param.data, -0.1, 0.1)
        

In [7]:
hidden_dim = 512
n_layers = 4
lr = 0.01
output_size = 1

model = SentimentLSTM(32, output_size, hidden_dim, n_layers)
model.to('cuda')
model.apply(init_weights)


SentimentLSTM(
  (lstm): LSTM(32, 512, num_layers=4, batch_first=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (sig): Sigmoid()
)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.BCELoss().cuda()

In [None]:
train_losses = []
valid_losses = []
num_epochs = 10
device = 'cuda'
m = nn.Softmax(dim = 1)

for epoch in tqdm(range(num_epochs+1)): 
    # train

    losses = []
    for i, (inputs, target) in enumerate(train_loader):
        inputs, target = inputs.cuda(), target.cuda()

        model.train()
        outputs, hidden = model(inputs.to(device))
        optimizer.zero_grad()


        # print(f'target shape: {target.shape}')
        # print(f'outputs shape: {outputs.shape}')
        # print(outputs[0])
        # print(target[0])
        loss = criterion(outputs.squeeze(), target.float())
        loss.backward()

        # scheduler.step()
        # torch.nn.utils.clip_grad_norm_(lstm.parameters(), 10)

        optimizer.step()
        losses.append(loss.item())

    train_losses.append(np.mean(np.array(losses)))

    losses = []
    for i, (inputs, target) in enumerate(valid_loader):
        inputs, target = inputs.cuda(), target.cuda()

        model.eval()
        valid, hidden = model(inputs.to(device))

        #inverse
        # valid = torch.from_numpy(scaler.inverse_transform(valid.cpu().detach().numpy()))


        vall_loss = criterion(valid.squeeze(), target.float())
        # scheduler.step(vall_loss)
        losses.append(vall_loss.item())

    valid_losses.append(np.mean(np.array(losses)))
    
    
    if epoch % 2 == 0:
        # print(criterion1(outputs, y_train.to(device),quantile))

        print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                        optimizer.param_groups[0]["lr"]))

    torch.save(model.state_dict(), './model_weight.pth')
    # model.load_state_dict(torch.load(SAVEPATH+'model_weight.pth'))

    # early_stopping는 validation loss가 감소하였는지 확인이 필요하며,
    # 만약 감소하였을경우 현제 모델을 checkpoint로 만든다.
    
    """
    early_stopping(round(valid_losses[-1],5), model)

    if early_stopping.early_stop:
        print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                      optimizer.param_groups[0]["lr"]))
        break
    """

  9%|▉         | 1/11 [02:28<24:47, 148.76s/it]

Epoch: 0, loss: 0.10245 valid loss:  0.09295 lr: 0.01000 


 27%|██▋       | 3/11 [07:27<19:55, 149.41s/it]

Epoch: 2, loss: 0.10238 valid loss:  0.10376 lr: 0.01000 


 45%|████▌     | 5/11 [12:27<14:57, 149.65s/it]

Epoch: 4, loss: 0.10137 valid loss:  0.10024 lr: 0.01000 


 64%|██████▎   | 7/11 [17:27<09:58, 149.74s/it]

Epoch: 6, loss: 0.10252 valid loss:  0.09274 lr: 0.01000 


In [10]:
model.eval()
input, target = test_data[0]

print(input)
print(target)

tensor([[9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.1120e+03, 6.5300e+02,
         2.0000e+00],
        [9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.1120e+03, 6.5300e+02,
         2.0000e+00],
        [9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.1120e+03, 6.5300e+02,
         2.0000e+00],
        ...,
        [9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.0520e+03, 6.6400e+02,
         2.0000e+00],
        [9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.0520e+03, 6.6400e+02,
         1.0000e+00],
        [9.3900e+02, 9.3500e+02, 2.0000e+00,  ..., 1.0620e+03, 6.7000e+02,
         1.0000e+00]])
tensor(0, dtype=torch.int32)


In [11]:
losses = []
val1 = None
tor1 = None
for i, (input, target) in enumerate(test_loader):
    
    input, target = input.cuda(), target.cuda()

    model.eval()
    valid, hidden = model(input.to(device))
    

    #inverse
    # valid = torch.from_numpy(scaler.inverse_transform(valid.cpu().detach().numpy()))

    vall_loss = criterion(valid.squeeze(), target.float())
    val1 = valid
    tor1 = target.type(torch.long).to(device)
    # scheduler.step(vall_loss)
    losses.append(vall_loss.item())
    

#valid_losses.append(np.mean(np.array(losses)))

In [15]:
print(val1)

tensor([[1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13],
        [1.3292e-13]], device='cuda:0', grad_fn=<SigmoidBackward0>)


In [16]:
print(tor1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')


In [None]:
vall_loss = criterion(val1.squeeze(), tor1.float())

In [None]:
print(vall_loss)

In [13]:
print(len(losses))

1264


In [14]:
print(len(test_data))

20217
