In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import heapq
import pandas as pd


In [98]:
def sq_err(a, b):
    assert(a.shape == b.shape)
    return (sum(np.square(a - b)))

def mse_err(a, b):
    return sq_err(a, b) * 1.0 / a.shape[0]

def items_by_sqe(x, items):
    items_heap = []
    for i, item in enumerate(items):
        if sum(item) == 0:
            continue
        heapq.heappush(items_heap, (sq_err(x, item), i))
    return items_heap

def read_genelist():
    return pd.DataFrame.from_csv('./data/genelist.csv')

def read_csv(item):
    df_train = pd.DataFrame.from_csv('./data/' + item + '_Train_SteadyState.csv')
    df_test = pd.DataFrame.from_csv('./data/' + item + '_Test_SteadyState.csv')
    return df_train, df_test

def save_top3(df_plot, y_pred, indices, gene, train_test='train'):
    fig = plt.figure()
    ax = df_plot.iloc[:, np.append(indices, 100)].plot() #100 is yt, the correct value
    if y_pred is not None:
        ax.plot(y_pred, label='Predicted')
    plt.savefig('./exp_results/figures/g1_g10_g15/' + train_test + '/' + gene + '_top3.png', bbox_inches='tight')
    plt.close(fig)

# from train and test dataframes
# return X, y, Xt, yt
def get_train_test(tr, te):
    return tr.iloc[:, 0:100].values, \
            tr.iloc[:, 100].values, \
            te.iloc[:,0:100].values, \
            te.iloc[:, 100].values

# Rows of training data that show suppression of any input Gene
# def get_suppresed_row_indices(W):
#     vals, _, counts = np.unique(np.where(W[:][:] == 0)[0], return_counts=True, return_index=True)
#     return vals[counts == 2]

def get_suppresed_genes_and_rows(W, cur_gene, df_genelist):
    vals, _, counts = np.unique(np.where(W[:][:] == 0)[0], return_counts=True, return_index=True)

    #Rows with exactly one other Gene suppression
    rows = vals[counts==2]
    
    #Dataset of those rows
    B = W[vals[counts==2]]
    
    #Genes with 0 values
    D = np.where(B[:][:] == 0)[1]
    
    #Not the current gene
    other_gene_indices = df_genelist.loc[df_genelist['0'] != cur_gene].index.values
    
    #print('other indx', other_gene_indices)
    #print('d', D)
    
    # Suppressed genes
    F = [x for x in D if x in other_gene_indices]
    #print('f', F)
    return np.reshape(df_genelist.iloc[F].values, (-1,)), rows



In [99]:
df_genelist = read_genelist()
df_genelist.head() #df_genelist.head().values[0]

Unnamed: 0,0
0,G1
1,G10
2,G15
3,G23
4,G25


In [101]:
### G1
curr_gene = df_genelist.iloc[0]
df_train, df_test = read_csv(curr_gene.values[0])
X, y, Xt, yt = get_train_test(df_train, df_test)
get_suppresed_genes_and_rows(X, curr_gene.values[0], df_genelist)
#get_suppresed_genes_and_rows(X, 'G1', df_genelist.values )

(array(['G2', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10', 'G11', 'G12',
        'G13', 'G14', 'G15', 'G16', 'G18', 'G19', 'G21', 'G22', 'G24',
        'G25', 'G26', 'G28', 'G29', 'G30', 'G33', 'G34', 'G35', 'G36',
        'G38', 'G40', 'G41', 'G42', 'G43', 'G44', 'G45', 'G46', 'G47',
        'G48', 'G49', 'G50', 'G51', 'G52', 'G55', 'G56', 'G57', 'G58',
        'G59', 'G61', 'G62', 'G63', 'G64', 'G65', 'G66', 'G67', 'G68',
        'G70', 'G71', 'G72', 'G73', 'G74', 'G75', 'G76', 'G78', 'G79',
        'G80', 'G81', 'G82', 'G83', 'G85', 'G86', 'G87', 'G88', 'G89',
        'G90', 'G91', 'G92', 'G93', 'G94', 'G95', 'G96', 'G97', 'G98', 'G99'], dtype=object),
 array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
        53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
        70, 71, 72, 73, 74, 75, 76

In [102]:
### G10
curr_gene = df_genelist.iloc[1]
df_train, df_test = read_csv(curr_gene.values[0])
X, y, Xt, yt = get_train_test(df_train, df_test)
get_suppresed_genes_and_rows(X, curr_gene.values[0], df_genelist)


(array(['G1', 'G2', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G11', 'G12', 'G13',
        'G14', 'G15', 'G16', 'G18', 'G19', 'G21', 'G22', 'G24', 'G25',
        'G26', 'G28', 'G29', 'G30', 'G33', 'G34', 'G35', 'G36', 'G38',
        'G40', 'G41', 'G42', 'G43', 'G44', 'G45', 'G46', 'G47', 'G48',
        'G49', 'G50', 'G51', 'G52', 'G55', 'G56', 'G57', 'G58', 'G59',
        'G61', 'G62', 'G63', 'G64', 'G65', 'G66', 'G67', 'G68', 'G70',
        'G71', 'G72', 'G73', 'G74', 'G75', 'G76', 'G78', 'G79', 'G80',
        'G81', 'G82', 'G83', 'G85', 'G86', 'G87', 'G88', 'G89', 'G90',
        'G91', 'G92', 'G93', 'G94', 'G95', 'G96', 'G97', 'G98', 'G99'], dtype=object),
 array([ 1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
        53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
        70, 71, 72, 73, 74, 75, 76,

In [103]:
### G15
curr_gene = df_genelist.iloc[2]
df_train, df_test = read_csv(curr_gene.values[0])
X, y, Xt, yt = get_train_test(df_train, df_test)
get_suppresed_genes_and_rows(X, curr_gene.values[0], df_genelist)


(array(['G1', 'G2', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10', 'G11', 'G12',
        'G13', 'G14', 'G16', 'G18', 'G19', 'G21', 'G22', 'G24', 'G25',
        'G26', 'G28', 'G29', 'G30', 'G33', 'G34', 'G35', 'G36', 'G38',
        'G40', 'G41', 'G42', 'G43', 'G44', 'G45', 'G46', 'G47', 'G48',
        'G49', 'G50', 'G51', 'G52', 'G55', 'G56', 'G57', 'G58', 'G59',
        'G61', 'G62', 'G63', 'G64', 'G65', 'G66', 'G67', 'G68', 'G70',
        'G71', 'G72', 'G73', 'G74', 'G75', 'G76', 'G78', 'G79', 'G80',
        'G81', 'G82', 'G83', 'G85', 'G86', 'G87', 'G88', 'G89', 'G90',
        'G91', 'G92', 'G93', 'G94', 'G95', 'G96', 'G97', 'G98', 'G99'], dtype=object),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
        53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
        70, 71, 72, 73, 74, 75, 76,