In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline


import numpy as np
import theano
import theano.tensor as T
import string
import sys
from datetime import datetime, timedelta
import importlib
import time
import cPickle 
import cPickle as pickle
#import src.utils_contact as utils
#from utils_contact import confusionMatrix, TopAccuracyByRange
import matplotlib.pyplot as plt
import random
import os
import lasagne
import collections

np.random.seed(1234)
sys.setrecursionlimit(15000000)

Using gpu device 3: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5005)


In [2]:
##### Load Data ############
print "loading data ...",
start_time = time.time()
feats = cPickle.load( open('/mnt/home/siqi/NewContact/TrainFeats/feats_train.pkl') )
weight = cPickle.load( open('/mnt/home/siqi/NewContact/TrainFeats/weights_train.pkl') )
contact = cPickle.load( open('/mnt/home/siqi/NewContact/TrainFeats/contact_train.pkl') )

x_train = feats['train_feat']; x_valid = feats['valid_feat']
w_train = weight['train_weight']; w_valid = weight['valid_weight']
y_train = contact['train_contact']; y_valid = contact['valid_contact']

feats = None; weight = None; contact = None
print "completed ..., it takes", time.time() - start_time, 's'

loading data ... completed ..., it takes 10.0629279613 s


In [3]:
##### Summary ############

print 'we have', len(x_train), 'epochs, each epoch has', len( x_train[0] ), 'elements, the meaning of each elements'
print '\t0. 1D feats', x_train[-1][0].shape
print '\t1. 2D feats', x_train[-1][1].shape
print '\t2. 1D mask', x_train[-1][2].shape
print '\t3. 2D mask', x_train[-1][3].shape
print '\t4. Other 1D feat...', x_train[-1][4].shape

print '\nfor each weight, we have  (0) 3 label weight   and   (1) 12 label weight'

print '\nfor label, we have (0) 3 label  (1) 12 label (2) C_b distance'

we have 4758 epochs, each epoch has 5 elements, the meaning of each elements
	0. 1D feats (28, 27, 26)
	1. 2D feats (28, 27, 27, 5)
	2. 1D mask (28, 27)
	3. 2D mask (28, 27, 27)
	4. Other 1D feat... (28, 27, 78)

for each weight, we have  (0) 3 label weight   and   (1) 12 label weight

for label, we have (0) 3 label  (1) 12 label (2) C_b distance


In [89]:
##### Hyper Parameters ############

TOL = 1e-5
num_epochs = 20
classes = 12
lambda_reg = 0.0001
cut_norm = 10
optimizer = 'adam'
init_lr = 0.01
reduce_lr = True
output_dir = '/mnt/home/siqi/x_models/debug'
config_name = 'config_dense'
shuffle = False

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
experiment_id = "%s-%s" % (config_name, timestamp)
metadata_path = os.path.join( output_dir,  "retrain_%s" % experiment_id)

response = 1 if classes == 12 else 0
print 'output path is', metadata_path, ', response is', response

output path is /mnt/home/siqi/x_models/debug/retrain_config_dense-20170214-110508 , response is 1


In [5]:
import config_dense as CD
import config_hyper as CH
import contact_util as CU

l_in_1, l_in_2, l_in_3, l_1dout = CH.OneDResNet()

l_out = CD.DenseNet(l_1dout, l_in_1, classes=classes, depth=19, first_output=4, growth_rate=12, num_blocks=2, dropout=0.0,\
                    filter_size = 3, n_hidden1 = 40, n_hidden2 = 40)
CU.SummaryNet(l_out, num_para=True, print_shape=False)

# of parameters is 433625


In [6]:
sym_y = T.itensor3('target_contact_map')
sym_mask = T.itensor3('mask')
sym_weight = T.tensor3('contact_weight')


grad_start_time = time.time()
out_train = lasagne.layers.get_output(l_out, deterministic=False)
out_eval = lasagne.layers.get_output(l_out, deterministic=True, batch_norm_use_averages=False)

# reshape out for calculating loss
pred_train = out_train[:, :, :, :].reshape((-1, classes))
pred_valid = out_eval [:, :, :, :].reshape((-1, classes))

params = lasagne.layers.get_all_params(l_out, regularizable=True)
reg_term = sum(T.sum(p**2) for p in params)

######### calculate cost function for train #######
cost_train_cc = lasagne.objectives.categorical_crossentropy(T.clip(pred_train, TOL, 1-TOL), sym_y.flatten())
cost_train_weight_cc = T.sum( cost_train_cc * sym_mask.flatten() * sym_weight.flatten() ) / \
                       T.sum( sym_weight.flatten() * sym_mask.flatten() )
cost_train = cost_train_weight_cc + lambda_reg * reg_term

######### calculate cost function for inference #######
cost_valid_cc = lasagne.objectives.categorical_crossentropy(T.clip(pred_valid, TOL, 1-TOL), sym_y.flatten())
cost_valid_weight_cc = T.sum(cost_valid_cc*sym_mask.flatten()*sym_weight.flatten()) / \
                       T.sum(sym_weight.flatten()*sym_mask.flatten())   # masked loss
cost_inference = cost_valid_weight_cc

In [7]:
sh_lr = theano.shared(lasagne.utils.floatX(init_lr))
all_params = lasagne.layers.get_all_params(l_out, trainable=True)

all_grads = [T.grad(cost_train, p, consider_constant= [sym_weight]) for p in all_params] 
print "Creating cost function and computing grads..."
updates, norm_calc = lasagne.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True)

if optimizer == 'adam':
    print 'Using Adam Optimizer, with init step size', sh_lr.get_value(),
    updates = lasagne.updates.adam(updates, all_params, learning_rate=sh_lr)
elif optimizer == 'sgd':
    print 'Using SGD Momentom Optimizer, with step size', sh_lr.get_value(),
    updates = lasagne.updates.nesterov_momentum(updates, all_params, sh_lr, 0.9)
print ', done, time cosuming', time.time() - grad_start_time, 's'

Creating cost function and computing grads...
Using Adam Optimizer, with init step size 0.0010000000475 , done, time cosuming 63.6361210346 s


In [8]:
print 'compiling train and eval...'
t_compile = time.time()
train = theano.function([l_in_1.input_var, l_in_2.input_var, l_in_3.input_var, sym_y, sym_mask, sym_weight], \
                        [cost_train, cost_train_cc, cost_train_weight_cc, lambda_reg*reg_term, out_train], \
                        updates=updates, allow_input_downcast=True)

eval_valid  = theano.function([l_in_1.input_var, l_in_2.input_var, l_in_3.input_var, sym_y, sym_mask, sym_weight], \
                        [cost_inference, cost_valid_cc, cost_valid_weight_cc, lambda_reg*reg_term, out_eval], \
                        allow_input_downcast=True)

eval_train  = theano.function([l_in_1.input_var, l_in_2.input_var, l_in_3.input_var, sym_y, sym_mask, sym_weight], \
                        [cost_train, cost_train_cc, cost_train_weight_cc, lambda_reg*reg_term, out_train], \
                        allow_input_downcast=True)
print "compile time %fs" %(time.time()-t_compile)

compiling train and eval...
compile time 156.092435s


In [9]:
print 'Truncate data to MAX_LEN = 400'
CU.TruncateTrainData(x_train, y_train, w_train, MAX_LEN = 400)

Truncate data to MAX_LEN = 400


In [90]:
start_epoch = 0; num_epochs =10; log_file = open( output_dir + '/log.' + experiment_id, 'w', 0)

loss_train_mean = []; acc_train_mean = []
loss_valid_mean = []; acc_valid_mean = []
loss_valid_mean2 = []; acc_valid_mean2 = []


for epoch in range(start_epoch, num_epochs):
    start_time = time.time()
    if shuffle:
        combined = list(zip(x_train, w_train, y_train))
        random.shuffle(combined)
        x_train[:], w_train[:],  y_train[:] = zip(*combined)
        
    ################   Train  ##########################
    loss_train_epoch = []; weight_train_epoch = [];  acc_train_epoch = []
    print 'epoch', epoch, 'with lr =', np.round( sh_lr.get_value(), 6)
    sys.stdout.flush
    
    for i in range(4000, 4004):
        sys.stdout.write('\r%d/%d for train'%(i+1, len(x_train) ))
        sys.stdout.flush()
        
        l, w, a = CU.RunFuncs(x_train[i], y_train[i], w_train[i], train)
        loss_train_epoch.append(l); weight_train_epoch.append(w); acc_train_epoch.append(a)       
    loss_train_mean.append( np.average(loss_train_epoch, axis=0, weights= weight_train_epoch) )
    acc_train_mean.append( np.mean( acc_train_epoch, 0) )
    print '\t', ' '.join( [str(np.round(t,5)) for t in loss_train_mean[-1][[0,2,3]]] ), acc_train_mean[-1].mean(0)[4]
    
    ################   VALID  ##########################
    loss_valid_epoch = []; weight_valid_epoch = [];  acc_valid_epoch = []
    for i in range(100, 103):
        sys.stdout.write('\r%d/%d for valid'%(i+1, len(x_valid) ))
        sys.stdout.flush()
        l, w, a = CU.RunFuncs(x_valid[i], y_valid[i], w_valid[i], eval_valid)
        loss_valid_epoch.append(l); weight_valid_epoch.append(w); acc_valid_epoch.append(a)       
    loss_valid_mean.append( np.average(loss_valid_epoch, axis=0, weights= weight_valid_epoch) )
    acc_valid_mean.append( np.mean( acc_valid_epoch, 0) )
    print '\t', ' '.join( [str(np.round(t,5)) for t in loss_valid_mean[-1][[0,2,3]]] ), acc_valid_mean[-1].mean(0)[4]

    if reduce_lr and ( epoch+1 == (num_epochs-start_epoch) * 0.5 or \
                      epoch+1 == (num_epochs-start_epoch) * 0.8 ):
        new_lr = sh_lr.get_value() * 0.1
        sh_lr.set_value(lasagne.utils.floatX(new_lr))
        
    with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f:
        cPickle.dump({'config_name': config_name, 'param_values': lasagne.layers.get_all_param_values(l_out)}, f, \
                 protocol=pickle.HIGHEST_PROTOCOL)
       
    print >> log_file, epoch+1, 
    print >> log_file, ' '.join( [str(np.round(t,5)) for t in loss_train_mean[-1][[0,2,3]]] ), 
    print >> log_file, ' '.join( [str(np.round(t,5)) for t in loss_valid_mean[-1][[0,2,3]]]), 
    print >> log_file, ' '.join(map(str, np.round(acc_train_mean[-1].mean(0),4))),
    print >> log_file, ' '.join(map(str, np.round(acc_valid_mean[-1].mean(0),4))),
    print >> log_file, time.time() - start_time
log_file.close()

epoch 0 with lr = 0.001
4004/4758 for train	8.62694 2.00917 6.61777 0.015625
103/400 for valid	1.95672 1.95672 6.60978 0.0
epoch 1 with lr = 0.001
4004/4758 for train	8.61298 2.00792 6.60506 0.015625
103/400 for valid	1.95598 1.95598 6.59676 0.0
epoch 2 with lr = 0.001
4004/4758 for train	8.59857 2.00666 6.59191 0.015625
103/400 for valid	1.95528 1.95528 6.58341 0.0
epoch 3 with lr = 0.001
4004/4758 for train	8.58398 2.00552 6.57846 0.015625
103/400 for valid	1.95469 1.95469 6.56982 0.0
epoch 4 with lr = 0.001
4004/4758 for train	8.56937 2.00456 6.5648 0.015625
103/400 for valid	1.95422 1.95422 6.55606 0.0
epoch 5 with lr = 0.0001
4004/4758 for train	8.55944 2.00389 6.55555 0.015625
103/400 for valid	1.95418 1.95418 6.55467 0.0
epoch 6 with lr = 0.0001
4004/4758 for train	8.55798 2.00382 6.55416 0.015625
103/400 for valid	1.95414 1.95414 6.55326 0.0
epoch 7 with lr = 0.0001
4004/4758 for train	8.55651 2.00376 6.55275 0.015625
103/400 for valid	1.9541 1.9541 6.55185 0.0
epoch 8 with lr 

In [88]:
b = False
str(b)

'False'