In [1]:
import utils
import predict
import time as tm
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.datasets import make_classification
import scipy
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.functional as F

In [3]:
# from w2v import *
# from embedding_layer import embedding_layer
from sklearn import preprocessing
from sklearn.decomposition import PCA
import scipy.io as sio
from scipy import sparse
import argparse
from visdom import Visdom
from sklearn.externals import joblib 
# from futils import *
# from loss import loss



In [4]:
# load Data
dictSize = 225
(X, y) = utils.loadData( "train", dictSize = dictSize )
X = scipy.sparse.csr_matrix.toarray(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
# XML_CNN github xmlCNN.py
class xmlCNN(nn.Module):
    def __init__(self, params, embedding_weights):
        super(xmlCNN, self).__init__()
        self.params = params
        self.embedding_layer = embedding_layer(params, embedding_weights)
        self.classifier = cnn_encoder(params)
        
    def forward(self, batch_x, batch_y):
        # ----------- Encode (X, Y) --------------------------------------------
        e_emb = self.embedding_layer.forward(batch_x)
        Y = self.classifier.forward(e_emb)
        loss = self.params.loss_fn(Y, batch_y)
        
        if(loss<0):
            print(cross_entropy)
            print(Y[0:100])
            print(batch_y[0:100])
            sys.exit()

        return loss.view(-1,1), Y

In [7]:
# XML_CNN github embedding_layer.py
class embedding_layer(torch.nn.Module):

    def __init__(self, params, embedding_weights):
        super(embedding_layer, self).__init__()
        self.l = nn.Embedding(params.vocab_size, params.embedding_dim)
        if params.model_variation == 'pretrain':
            self.l.weight.data.copy_(torch.from_numpy(embedding_weights))
            self.l.weight.requires_grad=False

    def forward(self, inputs):
        o = self.l(inputs)
        return o

In [8]:
# XML_CNN github cnn_encoder.py
def out_size(l_in, kernel_size, padding=0, dilation=1, stride=1):
    a = l_in + 2*padding - dilation*(kernel_size - 1) - 1
    b = int(a/stride)
    return b + 1

class cnn_encoder(torch.nn.Module):
    
    def __init__(self, params):
        super(cnn_encoder, self).__init__()
        self.params = params
        self.conv_layers = nn.ModuleList()
        self.pool_layers = nn.ModuleList()
        fin_l_out_size = 0
        
        if(params.dropouts):
            self.drp = nn.Dropout(p=.25)
            self.drp5 = nn.Dropout(p=.5)

        for fsz in params.filter_sizes:
            l_out_size = out_size(params.sequence_length, fsz, stride=2)
            pool_size = l_out_size // params.pooling_units
            l_conv = nn.Conv1d(params.embedding_dim, params.num_filters, fsz, stride=2)
            torch.nn.init.xavier_uniform_(l_conv.weight)
            if params.pooling_type == 'average':
                l_pool = nn.AvgPool1d(pool_size, stride=None, count_include_pad=True)
                pool_out_size = (int((l_out_size - pool_size)/pool_size) + 1)*params.num_filters
            elif params.pooling_type == 'max':
                l_pool = nn.MaxPool1d(2, stride=1)
                pool_out_size = (int(l_out_size*params.num_filters - 2) + 1)
            fin_l_out_size += pool_out_size

            self.conv_layers.append(l_conv)
            self.pool_layers.append(l_pool)

        self.fin_layer = nn.Linear(fin_l_out_size, params.hidden_dims)
        self.out_layer = nn.Linear(params.hidden_dims, params.y_dim)
        torch.nn.init.xavier_uniform_(self.fin_layer.weight)
        torch.nn.init.xavier_uniform_(self.out_layer.weight)

    def forward(self, inputs):
        #o0 = self.drp(self.bn_1(inputs)).permute(0,2,1)
        o0 = inputs.permute(0,2,1)# self.bn_1(inputs.permute(0,2,1))
        if(self.params.dropouts):
            o0 = self.drp(o0) 
        conv_out = []

        for i in range(len(self.params.filter_sizes)):
            o = self.conv_layers[i](o0)
            o = o.view(o.shape[0], 1, o.shape[1]*o.shape[2])
            o = self.pool_layers[i](o)
            o = nn.functional.relu(o)
            o = o.view(o.shape[0],-1)
            conv_out.append(o)
            del o
        if len(self.params.filter_sizes)>1:
            o = torch.cat(conv_out,1)
        else:
            o = conv_out[0]

        o = self.fin_layer(o)
        o = nn.functional.relu(o)
        if(self.params.dropouts):
            o = self.drp5(o) 
        o = self.out_layer(o)
        o = torch.nn.functional.sigmoid(o)
        return o

In [15]:
# XML_CNN github classifier.py
class classifier(nn.Module):
    def __init__(self, params):
        super(classifier, self).__init__()
        self.params = params
        if(self.params.dropouts):
            self.drp = nn.Dropout(.5)
        self.l1 = nn.Linear(params.h_dim, params.H_dim)
        self.l2 = nn.Linear(params.H_dim, params.y_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        torch.nn.init.xavier_uniform_(self.l1.weight)

    def forward(self, H):
        H = self.l1(H)
        H = self.relu(H)
        H = self.l2(H)
        H = self.sigmoid(H)
        return H

In [47]:
import sys
import argparse
# ------------------------ Params -------------------------------------------------------------------------------
parser = argparse.ArgumentParser(description='Process some integers.')

parser.add_argument('--zd', dest='Z_dim', type=int, default=100, help='Latent layer dimension')
parser.add_argument('--mb', dest='mb_size', type=int, default=20, help='Size of minibatch, changing might result in latent layer variance overflow')
# parser.add_argument('--hd', dest='h_dim', type=int, default=600, help='hidden layer dimension')
parser.add_argument('--lr', dest='lr', type=int, default=1e-3, help='Learning Rate')
parser.add_argument('--p', dest='plot_flg', type=int, default=0, help='1 to plot, 0 to not plot')
parser.add_argument('--e', dest='num_epochs', type=int, default=100, help='step for displaying loss')

parser.add_argument('--d', dest='disp_flg', type=int, default=0, help='display graphs')
parser.add_argument('--sve', dest='save', type=int, default=1, help='save models or not')
parser.add_argument('--ss', dest='save_step', type=int, default=10, help='gap between model saves')
parser.add_argument('--mn', dest='model_name', type=str, default='', help='model name')
parser.add_argument('--tr', dest='training', type=int, default=1, help='model name')
parser.add_argument('--lm', dest='load_model', type=str, default="", help='model name')
parser.add_argument('--ds', dest='data_set', type=str, default="rcv", help='dataset name')

parser.add_argument('--pp', dest='pp_flg', type=int, default=0, help='1 is for min-max pp, 2 is for gaussian pp, 0 for none')
parser.add_argument('--loss', dest='loss_type', type=str, default="BCELoss", help='Loss')

parser.add_argument('--hidden_dims', type=int, default=512, help='hidden layer dimension')
parser.add_argument('--sequence_length',help='max sequence length of a document', type=int,default=500)
parser.add_argument('--embedding_dim', help='dimension of word embedding representation', type=int, default=300)
parser.add_argument('--model_variation', help='model variation: CNN-rand or CNN-pretrain', type=str, default='CNN-rand')
parser.add_argument('--pretrain_type', help='pretrain model: GoogleNews or glove', type=str, default='glove')
parser.add_argument('--vocab_size', help='size of vocabulary keeping the most frequent words', type=int, default=30000)
parser.add_argument('--drop_prob', help='Dropout probability', type=int, default=.3)
parser.add_argument('--load_data', help='Load Data or not', type=int, default=0)
parser.add_argument('--mg', dest='multi_gpu', type=int, default=0, help='1 for 2 gpus and 0 for normal')
parser.add_argument('--filter_sizes', help='number of filter sizes (could be a list of integer)', type=int, default=[2, 4, 8], nargs='+')
parser.add_argument('--num_filters', help='number of filters (i.e. kernels) in CNN model', type=int, default=32)
parser.add_argument('--pooling_units', help='number of pooling units in 1D pooling layer', type=int, default=32)
parser.add_argument('--pooling_type', help='max or average', type=str, default='max')
parser.add_argument('--model_type', help='glove or GoogleNews', type=str, default='glove')
parser.add_argument('--num_features', help='50, 100, 200, 300', type=int, default=300)
parser.add_argument('--dropouts', help='0 for not using, 1 for using', type=int, default=0)
parser.add_argument('--clip', help='gradient clipping', type=float, default=1000)
parser.add_argument('--dataset_gpu', help='load dataset in full to gpu', type=int, default=1)
parser.add_argument('--dp', dest='dataparallel', help='to train on multiple GPUs or not', type=int, default=0)

# dummy argument to avoid error since using Ipython and argparse already been parsed
parser.add_argument('-f') 

params = parser.parse_args()

In [48]:
# define Params
print (params)
params.loss_fn = torch.nn.BCELoss(size_average=False)
embedding_weights = None
if torch.cuda.is_available():
    params.dtype = torch.cuda.FloatTensor
else:
    params.dtype = torch.FloatTensor

Namespace(Z_dim=100, clip=1000, data_set='rcv', dataparallel=0, dataset_gpu=1, disp_flg=0, drop_prob=0.3, dropouts=0, embedding_dim=300, f='/home/aditya/.local/share/jupyter/runtime/kernel-5b081fa4-859d-4d1b-b457-5d824cb6cc47.json', filter_sizes=[2, 4, 8], hidden_dims=512, load_data=0, load_model='', loss_type='BCELoss', lr=0.001, mb_size=20, model_name='', model_type='glove', model_variation='CNN-rand', multi_gpu=0, num_epochs=100, num_features=300, num_filters=32, plot_flg=0, pooling_type='max', pooling_units=32, pp_flg=0, pretrain_type='glove', save=1, save_step=10, sequence_length=500, training=1, vocab_size=30000)


In [49]:
# params from saveload data
# y_train.shape[1]
params.X_dim = X_train.shape[1]
params.y_dim = 1
params.N = X_train.shape[0]
params.vocab_size = 50
params.classes = 1

In [50]:
if(len(params.model_name)==0):
    params.model_name = "Gen_data_CNN_Z_dim-{}_mb_size-{}_hidden_dims-{}_preproc-{}_loss-{}_sequence_length-{}_embedding_dim-{}_params.vocab_size={}".format(params.Z_dim, params.mb_size, params.hidden_dims, params.pp_flg, params.loss_type, params.sequence_length, params.embedding_dim, params.vocab_size)

print('Saving Model to: ' + params.model_name)

Saving Model to: Gen_data_CNN_Z_dim-100_mb_size-20_hidden_dims-512_preproc-0_loss-BCELoss_sequence_length-500_embedding_dim-300_params.vocab_size=50


In [51]:
from sklearn.metrics import log_loss

# def pass(a, b, model, x_tr, Y, params):
#     # e_emb = model.embedding_layer.forward(x_tr[i:i+params.mb_size].view(params.mb_size, x_te.shape[1]))
#     # Y[i:i+params.mb_size,:] = model.classifier(e_emb).data
#     e_emb = model.embedding_layer.forward(x_tr[a:b].view(params.mb_size, x_tr.shape[1]))
#     Y[a:b,:] = model.classifier(e_emb).data

#     return Y

def test_class(x_te, y_te, params, model=None, x_tr=None, y_tr=None, embedding_weights=None, verbose=True, save=True ):

    
    if(model==None):
        if(embedding_weights is None):
            print("Error: Embedding weights needed!")
            exit()
        else:
            model = xmlCNN(params, embedding_weights)
            # state_dict = torch.load(params.load_model + "/model_best", map_location=lambda storage, loc: storage)
            # new_state_dict = OrderedDict()
            # for k, v in state_dict.items():
            #     name = k[7:]
            #     new_state_dict[name] = v
            # model.load_state_dict(new_state_dict)
            # del new_state_dict
            model = load_model(model, params.load_model)
            
    if(torch.cuda.is_available()):
        params.dtype_f = torch.cuda.FloatTensor
        params.dtype_i = torch.cuda.LongTensor
        model = model.cuda()
    else:
        params.dtype_f = torch.FloatTensor
        params.dtype_i = torch.LongTensor

    if(x_tr is not None and y_tr is not None):
        x_tr, _ = load_batch_cnn(x_tr, y_tr, params, batch=False)
        Y = np.zeros(y_tr.shape)
        rem = x_tr.shape[0]%params.mb_size 
        for i in range(0, x_tr.shape[0] - rem, params.mb_size ):
            e_emb = model.embedding_layer.forward(x_tr[i:i+params.mb_size].view(params.mb_size, x_te.shape[1]))
            Y[i:i+params.mb_size,:] = model.classifier(e_emb).data
    if(rem):
        e_emb = model.embedding_layer.forward(x_tr[-rem:].view(rem, x_te.shape[1]))
        Y[-rem:, :] = model.classifier(e_emb).data
        
    loss = log_loss(y_tr, Y)
    prec = precision_k(y_tr.todense(), Y, 5)
    print('Test Loss; Precision Scores [1->5] {} {} {} {} {} Cross Entropy {};'.format(prec[0], prec[1], prec[2], prec[3], prec[4],loss))
    
    
    x_te, _ = load_batch_cnn(x_te, y_te, params, batch=False)
    Y2 = np.zeros(y_te.shape)
    rem = x_te.shape[0]%params.mb_size
    for i in range(0,x_te.shape[0] - rem,params.mb_size):
        e_emb = model.embedding_layer.forward(x_te[i:i+params.mb_size].view(params.mb_size, x_te.shape[1]))
        Y2[i:i+params.mb_size,:] = model.classifier(e_emb).data

    if(rem):
        e_emb = model.embedding_layer.forward(x_te[-rem:].view(rem, x_te.shape[1]))
        Y2[-rem:,:] = model.classifier(e_emb).data

    loss = log_loss(y_te, Y2) # Reverse of pytorch
    #print("A")
    prec = precision_k(y_te.todense(), Y2, 5) # Reverse of pytorch
    print('Test Loss; Precision Scores [1->5] {} {} {} {} {} Cross Entropy {};'.format(prec[0], prec[1], prec[2], prec[3], prec[4],loss))
    
    if(save):
        Y_probabs2 = sparse.csr_matrix(Y2)
        sio.savemat('/'.join(params.load_model.split('/')[-1]) + '/score_matrix.mat' , {'score_matrix': Y_probabs2})

    return prec[0], loss

In [56]:
def load_batch_cnn(x_tr, y_tr, params, batch=True, batch_size=0, decoder_word_input=None, decoder_target=None, testing=0):

    indexes = 0 # for scope
    if(batch):
        if(batch_size):
            params.go_row = np.ones((batch_size,1))*params.vocabulary[params.go_token]
            params.end_row = np.ones((batch_size,1))*params.vocabulary[params.end_token]
            indexes = np.array(np.random.randint(x_tr.shape[0], size=batch_size))
            x_tr, y_tr = x_tr[indexes,:], y_tr[indexes,:]
        else:
            params.go_row = np.ones((params.mb_size,1))*params.vocabulary[params.go_token]
            params.end_row = np.ones((params.mb_size,1))*params.vocabulary[params.end_token]
            indexes = np.array(np.random.randint(x_tr.shape[0], size=params.mb_size))
            x_tr, y_tr = x_tr[indexes,:], y_tr[indexes,:]
    else:
        params.go_row = np.ones((x_tr.shape[0],1))*params.vocabulary[params.go_token]
        params.end_row = np.ones((x_tr.shape[0],1))*params.vocabulary[params.end_token]

    x_tr = x_tr.todense()
    y_tr = y_tr.todense()

    x_tr = Variable(torch.from_numpy(x_tr.astype('int')).type(params.dtype_i))
    if(testing==0):
        y_tr = Variable(torch.from_numpy(y_tr.astype('float')).type(params.dtype_f))

    return x_tr, y_tr

In [57]:
def train(x_tr, y_tr, x_te, y_te, embedding_weights, params):
		
	viz = Visdom()
	loss_best = float('Inf')
	bestTotalLoss = float('Inf')
	best_test_acc = 0
	max_grad = 0

	num_mb = np.ceil(params.N/params.mb_size)
	
	model = xmlCNN(params, embedding_weights)
	if(torch.cuda.is_available()):
		print("--------------- Using GPU! ---------")
		model.params.dtype_f = torch.cuda.FloatTensor
		model.params.dtype_i = torch.cuda.LongTensor
		
		model = model.cuda()
	else:
		model.params.dtype_f = torch.FloatTensor
		model.params.dtype_i = torch.LongTensor
		print("=============== Using CPU =========")

	optimizer = optim.Adam(filter(lambda p: p.requires_grad,model.parameters()), lr=params.lr)
	print(model);print("%"*100)
	
	if params.dataparallel:
		model = nn.DataParallel(model)
	
	if(len(params.load_model)):
		params.model_name = params.load_model
		print(params.load_model)
		model, optimizer, init = load_model(model, params.load_model, optimizer=optimizer)
	else:
		init = 0
	iteration = 0
	# =============================== TRAINING ====================================
	for epoch in range(init, params.num_epochs):
		totalLoss = 0.0

		for i in range(int(num_mb)):
			# ------------------ Load Batch Data ---------------------------------------------------------
			batch_x, batch_y = load_batch_cnn(x_tr, y_tr, params)
			# -----------------------------------------------------------------------------------
			loss, output = model.forward(batch_x, batch_y)
			loss = loss.mean().squeeze()
			# --------------------------------------------------------------------

			totalLoss += loss.data
			
			if i % int(num_mb/12) == 0:
				print('Iter-{}; Loss: {:.4}; best_loss: {:.4}; max_grad: {}:'.format(i, loss.data, loss_best, max_grad))
				if not os.path.exists('../saved_models/' + params.model_name ):
					os.makedirs('../saved_models/' + params.model_name)
				save_model(model, optimizer, epoch, params.model_name + "/model_best_batch")
				if(loss<loss_best):
					loss_best = loss.data

			# ------------------------ Propogate loss -----------------------------------
			loss.backward()
			loss = loss.data
			torch.nn.utils.clip_grad_norm(model.parameters(), params.clip)
			sm = 0
			sm2=0
			max_grad = 0
			for p in model.parameters():
				if(p.grad is not None):
					max_grad = max(torch.max(p.grad).data[0], max_grad)
					sm += p.grad.view(-1).shape[0]
					sm2 = p.grad.mean().squeeze()*p.grad.view(-1).shape[0]
			avg_grad = (sm2/sm).data[0]
			# optimizer.step()
			if(torch.__version__ == '0.4.0'):
					torch.nn.utils.clip_grad_norm_(model.parameters(), params.clip)
			else:
					torch.nn.utils.clip_grad_norm(model.parameters(), params.clip)
			for p in model.parameters():
					if(p.grad is not None):
							p.data.add_(-params.lr, p.grad.data)

			optimizer.zero_grad()

			# ----------------------------------------------------------------------------
			if(params.disp_flg):
				if(iteration==0):
					loss_old = loss
				else:
					viz.line(X=np.linspace(iteration-1,iteration,50), Y=np.linspace(loss_old, loss,50), update='append', win=win)
					loss_old = loss
				if(iteration % 100 == 0 ):
					win = viz.line(X=np.arange(iteration, iteration + .1), Y=np.arange(0, .1))
			iteration +=1

			if(epoch==0):
				break

		if(totalLoss<bestTotalLoss):

			bestTotalLoss = totalLoss
			if not os.path.exists('../saved_models/' + params.model_name ):
				os.makedirs('../saved_models/' + params.model_name)
			save_model(model, optimizer, epoch, params.model_name + "/model_best_epoch")

		print('End-of-Epoch: {} Loss: {:.4}; best_loss: {:.4};'.format(epoch, totalLoss, bestTotalLoss))
	
		# import pdb
		# pdb.set_trace()
		test_prec_acc, test_ce_loss = test_class(x_te, y_te, params, model=model, verbose=False, save=False)
		model.train()
		
		if(test_prec_acc > best_test_acc):
			best_test_loss = test_ce_loss
			best_test_acc = test_prec_acc
			print("This acc is better than the previous recored test acc:- {} ; while CELoss:- {}".format(best_test_acc, best_test_loss))
			if not os.path.exists('../saved_models/' + params.model_name ):
				os.makedirs('../saved_models/' + params.model_name)
			save_model(model, optimizer, epoch, params.model_name + "/model_best_test")

		if epoch % params.save_step == 0:
			save_model(model, optimizer, epoch, params.model_name + "/model_" + str(epoch))


In [58]:
train(X_train, y_train, X_test, y_test, embedding_weights, params)

Setting up a new session...
Traceback (most recent call last):
  File "/home/aditya/Vpy35/lib/python3.5/site-packages/urllib3/connection.py", line 157, in _new_conn
    (self._dns_host, self.port), self.timeout, **extra_kw
  File "/home/aditya/Vpy35/lib/python3.5/site-packages/urllib3/util/connection.py", line 84, in create_connection
    raise err
  File "/home/aditya/Vpy35/lib/python3.5/site-packages/urllib3/util/connection.py", line 74, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/aditya/Vpy35/lib/python3.5/site-packages/urllib3/connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "/home/aditya/Vpy35/lib/python3.5/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/lib/python3.5/http/client.py", line 1122, in request
 

Exception in user code:
------------------------------------------------------------
xmlCNN(
  (embedding_layer): embedding_layer(
    (l): Embedding(50, 300)
  )
  (classifier): cnn_encoder(
    (conv_layers): ModuleList(
      (0): Conv1d(300, 32, kernel_size=(2,), stride=(2,))
      (1): Conv1d(300, 32, kernel_size=(4,), stride=(2,))
      (2): Conv1d(300, 32, kernel_size=(8,), stride=(2,))
    )
    (pool_layers): ModuleList(
      (0): MaxPool1d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
      (1): MaxPool1d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
      (2): MaxPool1d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
    )
    (fin_layer): Linear(in_features=23869, out_features=512, bias=True)
    (out_layer): Linear(in_features=512, out_features=1, bias=True)
  )
)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


AttributeError: 'Namespace' object has no attribute 'vocabulary'