In [1]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

from tqdm import tqdm

In [2]:
train_data = torch.load('/project/annotation/train.pt')
test_data = torch.load('/project/annotation/test.pt')
valid_data = torch.load('/project/annotation/valid.pt')

In [3]:
# dataloaders
batch_size = 16

# make sure to SHUFFLE for your training data
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

In [4]:
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()

print('Sample input size: ', sample_x.size()) # batch_size, seq_length
print('Sample input: \n', sample_x)
print()
print('Sample label size: ', sample_y.size()) # batch_size
print('Sample label: \n', sample_y)

Sample input size:  torch.Size([16, 30, 48])
Sample input: 
 tensor([[[8.8700e+02, 8.9100e+02, 2.0000e+00,  ..., 1.0980e+03,
          5.8400e+02, 2.0000e+00],
         [1.0460e+03, 8.3600e+02, 2.0000e+00,  ..., 1.1770e+03,
          5.1300e+02, 2.0000e+00],
         [1.0580e+03, 8.4700e+02, 2.0000e+00,  ..., 1.1930e+03,
          5.1600e+02, 2.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00]],

        [[9.4100e+02, 8.7200e+02, 2.0000e+00,  ..., 1.0640e+03,
          5.3600e+02, 2.0000e+00],
         [9.4100e+02, 8.7200e+02, 2.0000e+00,  ..., 1.0640e+03,
          5.3600e+02, 2.0000e+00],
         [9.4100e+02, 8.7200e+02, 2.0000e+00,  ..., 1.0640e+03,
          5.3600e+02, 2.0000e+00],
         ...,
         [0.0000e+

In [10]:
import torch.nn as nn

class SentimentLSTM(nn.Module):

    def __init__(self, seq_size, output_size, hidden_dim, n_layers):
        
        super(SentimentLSTM, self).__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        # embedding and LSTM layers
        self.lstm = nn.LSTM(seq_size, hidden_dim, n_layers, batch_first=True)
        
        # linear and sigmoid layer
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, x):
        batch_size = x.size(0)
        
        lstm_out, hidden = self.lstm(x)
        ## lstm_out.shape [batch, seq_len, hidden_dim]
        ## hidden [batch, hidden_dim]

        # fully connected layer        
        out = self.fc(lstm_out[:, -1, :])
        ## out.shape: [n_layer * n_direction, batch, output_size]

        # sigmoid function
        sig_out = self.sig(out)
        
        # return last sigmoid output and hidden state
        return sig_out, hidden

In [11]:
hidden_dim = 512
n_layers = 4
lr = 0.01
output_size = 1

model = SentimentLSTM(48, output_size, hidden_dim, n_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [12]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        # nn.init.uniform_(param.data, -0.1, 0.1)

In [13]:
model.to('cuda')
model.apply(init_weights)

SentimentLSTM(
  (lstm): LSTM(48, 512, num_layers=4, batch_first=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (sig): Sigmoid()
)

In [None]:
train_losses = []
valid_losses = []
num_epochs = 20
device = 'cuda'
m = nn.Softmax(dim = 1)

for epoch in tqdm(range(num_epochs+1)): 
    # train

    losses = []
    for i, (input, target) in enumerate(train_loader):

        model.train()
        outputs, hidden = model(input.to(device))
        optimizer.zero_grad()


        # print(f'target shape: {target.shape}')
        # print(f'outputs shape: {outputs.shape}')
        # print(outputs[0])
        # print(target[0])
        loss = criterion(outputs, target.type(torch.long).to(device))
        loss.backward()

        # scheduler.step()
        # torch.nn.utils.clip_grad_norm_(lstm.parameters(), 10)

        optimizer.step()
        losses.append(loss.item())

    train_losses.append(np.mean(np.array(losses)))

    losses = []
    for i, (input, target) in enumerate(train_loader):

        model.eval()
        valid, hidden = model(input.to(device))

        #inverse
        # valid = torch.from_numpy(scaler.inverse_transform(valid.cpu().detach().numpy()))

        vall_loss = criterion(valid, target.type(torch.long).to(device))
        # scheduler.step(vall_loss)
        losses.append(vall_loss.item())

    valid_losses.append(np.mean(np.array(losses)))
    
    
    if epoch % 2 == 0:
        # print(criterion1(outputs, y_train.to(device),quantile))

        print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                        optimizer.param_groups[0]["lr"]))

    torch.save(model.state_dict(), './model_weight.pth')
    # model.load_state_dict(torch.load(SAVEPATH+'model_weight.pth'))

    # early_stopping는 validation loss가 감소하였는지 확인이 필요하며,
    # 만약 감소하였을경우 현제 모델을 checkpoint로 만든다.
    
    """
    early_stopping(round(valid_losses[-1],5), model)

    if early_stopping.early_stop:
        print("Epoch: %d, loss: %1.5f valid loss:  %1.5f lr: %1.5f " %(epoch, train_losses[-1],valid_losses[-1],
                                                                      optimizer.param_groups[0]["lr"]))
        break
    """

  0%|          | 0/21 [00:00<?, ?it/s]

Epoch: 0, loss: 0.00000 valid loss:  0.00000 lr: 0.01000 


 52%|█████▏    | 11/21 [13:11<12:01, 72.10s/it]

Epoch: 10, loss: 0.00000 valid loss:  0.00000 lr: 0.01000 


 81%|████████  | 17/21 [20:24<04:48, 72.16s/it]

In [68]:
a, b= train_data[4]

In [70]:
print(b)

tensor(0, dtype=torch.int32)
