In [None]:
# https://www.kaggle.com/pinocookie/pytorch-dataset-and-dataloader/data
# https://discuss.pytorch.org/t/runtimeerror-multi-target-not-supported-newbie/10216/4

# Build the Dataset. We are going to generate a simple data set and then we will read it.
# Build the DataLoader.
# Build the model.
# Define the loss function and the optimizer.
# Train the model.
# Generate predictions.
# Plot the results. 

In [1]:
import logging
import numpy as np
import collections, gc

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
from sklearn import preprocessing, metrics
from torch.utils.data import DataLoader, Dataset


# from scripts.utils.logger import Logger
from scripts.utils.data_reading import lower_dim, rr_reader
from scripts.utils.processing import *

logging.basicConfig(level=logging.INFO )

In [2]:
# N, T_leaves & PI_parents have to be present globally! (list of all the labels)
# one_hot_labels because I will keep accessing it for each document <1082>
p2c_table, c2p_table, _, _, PI_parents, T_leaves, N = lookup_table("swiki/data/cat_hier.txt", subset = False)

65333it [00:00, 249997.23it/s]


In [3]:
n = 16 # wn vector size  --> ~log_{2}(num_classes)

In [4]:
order_mapping = generate_order_mapping(N)
wn_tensors = generate_wn(N, n)
binary_yin = generate_binary_yin(N)

100%|████████████████████████████████████████████████████████████████████████| 50312/50312 [00:00<00:00, 161865.70it/s]


In [5]:
num_gpus = torch.cuda.device_count()

In [6]:
num_gpus

0

In [7]:
device = torch.device("cpu" if (torch.cuda.is_available() and num_gpus > 0) else "cpu")

In [8]:
device

device(type='cpu')

In [9]:
wn_tensors = wn_tensors.to(device)
binary_yin = binary_yin.to(device)

In [10]:
torch.cuda.manual_seed(123)

In [11]:
def too_hot_mapping(label_tuple):

    # order_mapping, wn_tensors & binary_yin HAVE TO BE A GLOBAL OBJECT
    
    doc_labels = list(map(int, list(label_tuple)))
    w_n = []
    w_pi = []
    y_in = []
    try:
        for label in doc_labels:
            int_rep = order_mapping[label]
            w_n.append(wn_tensors[int_rep-1])
            if label in T_leaves:
                y_in.append(binary_yin[int_rep-1])
                if label in c2p_table:
                    pi_n = order_mapping[c2p_table[label][0]]
                    w_pi.append(wn_tensors[pi_n-1])

    except:
        print("wait whaat?")
    
    w_n = list2tensor(w_n)
    w_pi = list2tensor(w_pi)
    y_in = list2tensor(y_in)
    
    return w_n, w_pi, y_in

In [12]:
class DatasetSWIKI(Dataset):
    
    def __init__(self, file_path, reduce = True, n_components = 128):
        self.reduce = reduce
        self.n_components = n_components
        self.data, self.labels = lower_dim(file_path, reduce, n_components)
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        
        if self.reduce:
            document = torch.from_numpy(self.data[index]).to(device)
        else:
            document = torch.from_numpy(self.data[index].todense()).to(device)
        
        label = self.labels[index]
        
        w_n, w_pi, y_in = too_hot_mapping(label)        
        
        return document, label, w_n, w_pi, y_in

In [13]:
n_components = 128

In [14]:
train_data = DatasetSWIKI("swiki/data/train_remapped_small.txt", reduce=True, n_components = n_components)
# valid_data = DatasetSWIKI("swiki/data/valid_remapped.txt", reduce=True, n_components = n_components)

INFO:root:Elapsed time: 2.0min 54.63sec


In [15]:
len(train_data)

500

In [16]:
batch_size = 1

In [17]:
train_loader = DataLoader(train_data, batch_size=1, shuffle = False)
# valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle = False)

In [18]:
train_iter = iter(train_loader)

doc, labbbs, w_n, w_pi, y_in = train_iter.next()

print('docs shape on batch size = {}'.format(doc.shape))
print('w_n shape on batch size = {}'.format(w_n.shape))
print('w_pi shape on batch size = {}'.format(w_pi.shape))
print('y_in shape on batch size = {}'.format(y_in.shape))

docs shape on batch size = torch.Size([1, 128])
w_n shape on batch size = torch.Size([1, 3, 16])
w_pi shape on batch size = torch.Size([1, 3, 16])
y_in shape on batch size = torch.Size([1, 3, 16])


In [19]:
labbbs

[tensor([13402.], dtype=torch.float64),
 tensor([33692.], dtype=torch.float64),
 tensor([393382.], dtype=torch.float64)]

In [22]:
train_data.__getitem__(0)

(tensor([ 9.8365e+00, -5.0457e+00, -1.7610e+00, -1.7764e-01, -2.7741e+00,
          2.3263e+00, -2.0546e-01,  1.7506e+00,  1.1513e+00,  3.7410e-01,
         -2.5467e+00, -1.2244e+00,  4.9557e-01,  6.2389e-01,  1.1025e+00,
          4.3328e-01, -2.2094e-02, -9.5338e-01, -1.0785e+00, -1.6221e-01,
         -1.1985e+00,  1.3402e+00,  2.4101e-01, -1.3047e+00,  8.0909e-01,
          1.4383e+00, -9.6044e-01,  8.9424e-01,  1.6953e+00, -5.8823e-01,
         -7.6265e-01,  2.2405e-01,  1.3576e+00,  1.9262e-01, -4.2536e-01,
         -1.1699e+00, -1.5318e-01,  4.7740e-01,  8.5998e-01, -1.2309e+00,
          4.8969e-01,  3.4071e-01, -1.8085e-01,  6.8598e-01,  5.7038e-01,
          3.5133e-01, -1.0711e+00,  1.4789e+00,  2.3927e-01, -2.5339e-01,
         -1.8474e-01, -1.2786e+00,  1.0679e+00,  7.7296e-02, -3.3784e-01,
          2.4934e+00,  3.2623e-01, -1.9476e+00,  2.6238e-01, -1.7930e+00,
         -1.8926e+00, -3.6320e-01,  7.2859e-02,  1.4309e+00,  7.9354e-01,
         -4.4250e-01,  1.7334e+00, -6.

In [None]:
208516400/10940
# 2085164/547

In [24]:
# Hyper Parameters 
input_size = train_data.data.shape[1] #2085164 -> 128

num_classes = n #50312 --> n (16)
num_epochs = 10
learning_rate = 0.001

In [25]:
# Model
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear1 = nn.Linear(input_size, batch_size, False)
        
    def forward(self, x, y):
        x1 = self.linear1(x)
        x1 = F.relu(x1)
        return x1.mm(y)

In [26]:
model = LogisticRegression().to(device)

In [27]:
model

LogisticRegression(
  (linear1): Linear(in_features=128, out_features=1, bias=False)
)

In [28]:
criterion = nn.SoftMarginLoss(reduction='mean') 
L2Loss = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [29]:
total_step = len(train_loader)

In [30]:
torch.cuda.empty_cache()

In [None]:
# torch.cuda.memory_cached()-torch.cuda.memory_allocated()

In [31]:
# Training the Model
losses = []

for epoch in range(num_epochs):
    train_iter = iter(train_loader)
    for i, (document, _, labels, pis, y_ins) in enumerate(train_iter):
        
        document = Variable(document).float().to(device) # batch size 100
        labels = Variable(labels).float().to(device)
        print("document.shape", document.shape)
        print("labels.shape", labels.shape)

        if type(optimizer) != torch.optim.LBFGS:
            
            optimizer.zero_grad()

            w_xi = model(document, labels)
            print("w_xi.shape", w_xi.shape)
            loss1 = criterion(w_xi, y_in)
            print("y_in.shape", y_in.shape)
            print("loss1.shape", loss1)
            loss2 = L2Loss(labels, pi)
            print("loss2.shape", loss2)
            loss_full = torch.sqrt(loss2) + loss1
            print("loss_full.shape", loss_full)
            if (i+1) % 5 == 0: 
                print ('Epoch [{}/{}], step:[{}/{}], loss: {:.6f}'.format(epoch+1, num_epochs, i+1, total_step, loss_full.item()))
                torch.cuda.empty_cache()
 
            losses.append(loss_full.item())
            loss_full.backward()

            optimizer.step()

        else:
            
            def closure():
                optimizer.zero_grad()

                w_xi = model(document, labels)
                loss1 = criterion(w_xi, y_in)
                loss2 = L2Loss(labels, pi)
                loss_full = torch.sqrt(loss2) + loss1
                if (i+1) % 5 == 0: 
                    print ('Epoch [{}/{}], step:[{}/{}], loss: {:.6f}'.format(epoch+1, num_epochs, i+1, total_step, loss_full.item()))
                    torch.cuda.empty_cache()

                losses.append(loss_full.item())
                loss_full.backward()
                return loss_full

            optimizer.step(closure)
            
        break
    break

document.shape torch.Size([1, 128])
labels.shape torch.Size([1, 3, 16])


RuntimeError: matrices expected, got 2D, 3D tensors at c:\users\vssadministrator\appdata\local\temp\pip-req-build-ya97m4qs\aten\src\th\generic/THTensorMath.cpp:935

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(losses);

In [None]:
labels.t().mm(document)

In [None]:
losses[-469]

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for documents, _, labels in valid_data:
        docs = Variable(torch.from_numpy(documents)).float()
        outputs = model(docs)
        print(torch.sum(torch.where(outputs>0.0001, torch.tensor(1), torch.tensor(0)), dim=0))
        print(torch.sum(torch.where(labels>0, torch.tensor(1), torch.tensor(0)), dim=0))

        umm, predicted = torch.max(outputs.data, 1)
        print(umm.shape)
        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

In [None]:
# Save the model checkpoint
torch.save(model.state_dict(), 'train_valid_model.ckpt')

In [None]:
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
writer = SummaryWriter()

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')

fig = plt.figure()

c1 = plt.Circle((0.2, 0.5), 0.2, color='r')
c2 = plt.Circle((0.8, 0.5), 0.2, color='r')

ax = plt.gca()
ax.add_patch(c1)
ax.add_patch(c2)
plt.axis('scaled')


# from tensorboardX import SummaryWriter
# writer = SummaryWriter()
writer.add_figure('matplotlib', fig)
writer.close()

In [None]:
# #         Forward + Backward + Optimize
#         def closure():
#             optimizer.zero_grad()
#             outputs = model(document)
#             loss = criterion(outputs, torch.max(labels, 1)[0])
# #             print('loss:', loss.item())
#             loss.backward()
#             return loss
#         optimizer.step(closure)


In [None]:
import umap # fit should get a sparse matrix
%time trans = umap.UMAP(n_neighbors=5, random_state=42, n_components=32, verbose=True).fit(train_data.data)
trans.embedding_

In [None]:
167593,441685 160318:1 227881:1 255720:1 265934:1 432905:2 515946:1 538188:1 586136:1 610561:1 692683:1 
                                        735075:1 828325:1 874107:1 898766:1 1087064:1 1354716:1 1432746:1 
                                        1454292:1 1463839:1 1626714:1 1715083:1 1839104:1 1864180:1 2023750:1 
