# Init

In [1]:
from __future__ import absolute_import, division, print_function

import logging
import sys
logging.basicConfig(
    stream=sys.stdout,
    level=logging.DEBUG,
    format='%(asctime)s %(name)s-%(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S')
import os
import numpy as np
from modules import utils, feature_extraction as fe, postprocessing
import matplotlib.pyplot as plt

logger = logging.getLogger("beta2")

## Load data 

In [2]:
working_dir = os.path.expanduser("~/projects/gpcr/mega/Result_Data/beta2-dror/clustering/")
raw_data = np.load(working_dir + "frame_distances_CA_inv.npy")
scale_data = True
#data = np.load(home_dir + "training_samples_CA_inv.npy")
cluster_indices = np.loadtxt(working_dir + 'cluster_indices_.txt')
data = utils.vectorize(raw_data) # Our training data to classifiers
labels = utils.transform_to_matrix(cluster_indices)  #Our training labels to classifiers
index_to_residue_mapping = None #Mapping from residue Id to the index of that residue in the data (for proteins with missing residues)
logger.info("Loaded data of shape %s and %s clusters", data.shape, len(set(cluster_indices)))

2018-10-26 17:25:11 beta2-INFO: Loaded data of shape (857, 40186) and 3 clusters


## Define the different methods to use

In [3]:
n_iterations, n_splits = 2, 2
feature_extractors = [
    #fe.ElmFeatureExtractor(data, labels, n_splits=n_splits, n_iterations=n_iterations),
    fe.KLFeatureExtractor(data, labels, n_splits=n_splits, n_iterations=n_iterations),
    #fe.PCA_feature_extract(data, labels, n_splits=n_splits, n_iterations=n_iterations),
    #fe.RF_feature_extract(data, labels, n_splits=n_splits, n_iterations=n_iterations),
    #fe.MlpFeatureExtractor(data, labels, n_splits=2, n_iterations=n_iterations),
]
logger.info("Done. using %s feature extractors", len(feature_extractors))

2018-10-26 17:25:11 beta2-INFO: Done. using 1 feature extractors


# Run the relevance analysis

In [4]:
results = []
for extractor in feature_extractors:
    logger.info("Computing relevance for extractors %s", extractor.name)
    relevance_avg, relevance_std, errors = extractor.extract_features()
    results.append((extractor, relevance_avg, relevance_std))
logger.info("Done")

2018-10-26 17:25:11 beta2-INFO: Computing relevance for extractors KL
2018-10-26 17:25:11 Extracting feature-DEBUG: Iteration 1 of 4
2018-10-26 17:25:15 Extracting feature-DEBUG: Error below 5% - computing feature importance.
2018-10-26 17:25:15 Extracting feature-DEBUG: Iteration 2 of 4
2018-10-26 17:25:20 Extracting feature-DEBUG: Error below 5% - computing feature importance.
2018-10-26 17:25:21 Extracting feature-DEBUG: Iteration 3 of 4
2018-10-26 17:25:25 Extracting feature-DEBUG: Error below 5% - computing feature importance.
2018-10-26 17:25:28 Extracting feature-DEBUG: Iteration 4 of 4
2018-10-26 17:25:31 Extracting feature-DEBUG: Error below 5% - computing feature importance.
2018-10-26 17:25:34 beta2-INFO: Done


# Remap and persist results 

In [None]:
for (extractor, relevance_avg, relevance_std) in results:
    postprocessing.average_and_persist(extractor, relevance_avg, relevance_std, cluster_indices, working_dir, visualize=True)

plt.show()
logger.info("Done")































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.




























































IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.




























































IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.












































































































