In [6]:
import logging
import sys
logging.basicConfig(
    stream=sys.stdout,
    level=logging.DEBUG,
    format='%(asctime)s %(name)s-%(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S')

import numpy as np
from scipy.spatial.distance import squareform
sys.path.append('modules/')
import Compute_Relevance
import matplotlib.pyplot as plt

logger = logging.getLogger("")

In [7]:
'''

INPUT DATA and INPUT PARAMETERS

'''

home_dir = '/media/mkasimova/Data2/Anton.PRODUCTION_RUN/COMBINED/analysis/CLUSTERING.CHOSEN.CV/NEURAL.NETWORK/marina-relevance-prop/VSD_SC/'
# Load clusters indices and the array with collective variables
clustering = np.loadtxt('/media/mkasimova/Data2/Anton.PRODUCTION_RUN/COMBINED/analysis/CLUSTERING.CHOSEN.CV/NEURAL.NETWORK/marina-relevance-prop/BIAS.CVs/cluster_indices_mean.0.sigma.0.67.2.txt')
data = np.load(home_dir+'frame_i_j_contacts_dt1.npy')
# Set number of iterations
n_iter = 10
# Set number of halves to use for relevance estimation
n_halves = 2
# Set parameters of the neural network
hidden_layer_sizes = (100,)
# Set points to keep for further analysis (some points should be discarded if clustering is not clean)
points_to_keep = [[0,1450],\
                 [1750,3650],\
                 [4150,5900],\
                 [6800,-1]]

In [None]:
'''

RUN the CODE

'''

# Keep only chosen points in the input data
data_keep, clustering_keep = Compute_Relevance.keep_datapoints(data,clustering,points_to_keep)

# Run NN and relevance propagation
data_vect = Compute_Relevance.vectorize(data_keep)

# Output relevance[number of clusters, number of features, av/std]
# Output error[n_iter*n_halves]

relevance, error = Compute_Relevance.perform_relevance_propagation(data_vect,\
                                                                   clustering_keep,\
                                                                   hidden_layer_sizes,\
                                                                   n_iter,\
                                                                   n_halves)

2018-10-10 14:10:26 Compute Relevance-INFO: Performing relevance propagation for the dataset with 18915 features and 9003 samples
2018-10-10 14:10:26 Compute Relevance-INFO: Number of clusters is 2
2018-10-10 14:10:26 Compute Relevance-INFO: Running iteration 1 ...
2018-10-10 14:10:27 Compute Relevance-INFO: 			Scaling the input dataset ...
2018-10-10 14:10:32 Compute Relevance-INFO: 			Training the neural network ...
2018-10-10 14:10:41 Compute Relevance-INFO: 			Checking for overfit ...
2018-10-10 14:10:43 Compute Relevance-INFO: 			Overfit error is 0.0444345700955
2018-10-10 14:10:43 Compute Relevance-INFO: 			Error is less than 5%, therefore computing relevance ...
2018-10-10 14:10:47 Compute Relevance-INFO: 			Rescaling relevance according to min and max in each frame ...
2018-10-10 14:10:47 Compute Relevance-INFO: 			... and averaging it over each cluster
2018-10-10 14:10:47 Compute Relevance-INFO: 			Scaling the input dataset ...
2018-10-10 14:10:53 Compute Relevance-INFO: 			Tr

In [5]:
'''

WRITE RESULTS

'''

relevance_per_residue = []

for i in range(relevance.shape[0]):
    for j in range(2):
        relevance_per_residue.append(np.sum(squareform(relevance[i,:,j]),axis=1))

relevance_per_residue = np.asarray(relevance_per_residue)
np.savetxt(home_dir+'results.2.dat',relevance_per_residue.T)