In [1]:
import sys
from itertools import repeat
import data, data.gene_datasets
import sklearn, sklearn.model_selection, sklearn.metrics, sklearn.linear_model, sklearn.neural_network, sklearn.tree
import numpy as np
import matplotlib, matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import gene_inference, gene_inference.utils
from gene_inference.models import lr, mlp, decision_tree
from gene_inference.infer_genes import infer_gene, infer_all_genes, sample_neighbors


  from ._conv import register_converters as _register_converters


In [2]:
tcgatissue = data.gene_datasets.TCGATissue(data_dir='./genomics/TCGA/')
class Object(object):
    pass

opt = Object()
opt.seed = 0
opt.nb_class = None
opt.nb_examples = None
opt.nb_nodes = None
opt.graph = "pathway"
opt.dataset = tcgatissue

tcgatissue.set_graph(opt)
g = nx.from_numpy_matrix(tcgatissue.adj)
mapping = dict(zip(range(0, len(tcgatissue.df.columns)), tcgatissue.df.columns))
g = nx.relabel_nodes(g, mapping)

Converting one-hot labels to integers
'Graph' object has no attribute 'labels'


In [3]:
def predict_gene_from_samples(gene, method, max_samples):
    full_results = pd.DataFrame([])
    first_degree_results = pd.DataFrame([])
    first_degree = set(g.neighbors(gene))
    first_degree.add(gene)
    first_degree_graph = g.subgraph(first_degree)

    for num_samples in range(10, max_samples, 10):
        full_results = full_results.append(infer_gene(method, tcgatissue.df[:1000], gene, g, num_samples, penalty=True)).reset_index(drop=True)
        full_results.loc[full_results.index[-1], 'samples'] = num_samples
        first_degree_results = first_degree_results.append(infer_gene(method, tcgatissue.df.loc[:1000, first_degree], gene, first_degree_graph, num_samples, penalty=True)).reset_index(drop=True)
        first_degree_results.loc[first_degree_results.index[-1], 'samples'] = num_samples
    return full_results, first_degree_results


In [9]:
full_results_1, first_degree_results_1 = predict_gene_from_samples("RPL5", lr, 60)
print "1"
full_results_2, first_degree_results_2 = predict_gene_from_samples("RPL5", mlp, 60)
print "2"
full_results_3, first_degree_results_3 = predict_gene_from_samples("RPL5", decision_tree, 60)
print "3"

1




2
3


In [10]:
# cgn

In [None]:
plt.figure()

line1 = plt.errorbar(full_results_1.index, full_results_1['auc'], xerr=0, yerr=full_results_1['std'])
line2 = plt.errorbar(full_results_2.index, full_results_2['auc'], xerr=0, yerr=full_results_2['std'])
line3 = plt.errorbar(full_results_3.index, full_results_3['auc'], xerr=0, yerr=full_results_3['std'])

width = 0.2
ticks_to_skip =0
plt.xticks(list(full_results_1.iloc[::5, :].index), full_results_1.iloc[::num_ticks, :]['samples'], rotation=70)
plt.title("Gene Inference with varying numbers of Samples and Methods")
plt.ylabel("AUC")
plt.xlabel("Number of Samples")
plt.legend((line1[0], line2[0], line3[0]), ('LR', 'MLP', 'Decision Tree'), bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., fontsize=10)

--Return--
> <ipython-input-12-1c5332d66d66>(9)<module>()->None
-> import pdb; pdb.set_trace()
(Pdb) full_results_1.iloc[::5, :]
    auc  first_degree_auc  first_degree_diff  first_degree_std gene_name  \
0  0.52              0.62               -0.1              0.06      RPL5   

   samples  second_degree_auc  second_degree_diff  second_degree_std   std  
0     10.0               0.56               -0.04               0.05  0.03  
(Pdb) full_results_1.iloc[::1, :]
    auc  first_degree_auc  first_degree_diff  first_degree_std gene_name  \
0  0.52              0.62              -0.10              0.06      RPL5   
1  0.57              0.72              -0.15              0.02      RPL5   
2  0.60              0.74              -0.14              0.02      RPL5   
3  0.61              0.75              -0.14              0.02      RPL5   
4  0.63              0.75              -0.12              0.02      RPL5   

   samples  second_degree_auc  second_degree_diff  second_degree_std   st