<br>
<font color="#500000" size=15 style="margin:2cm"> AGGREGATIBACTER FUSE</font>



In [1]:
# importing modules
import matplotlib.pyplot as plt
import numpy as np
from skfusion import fusion
from cross_validation import *
import pickle
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 8.0)

#Matrices import

Load in all the matrices!

In [2]:
prot_rna = pickle.load(open('../matrices/pickles/cr_z-score_raw.pkl', 'rb'))
secretion_prot = pickle.load(open('../matrices/pickles/signalP.pkl', 'rb'))
rna_expression = pickle.load(open('../matrices/pickles/MA_exp.pkl', 'rb'))


# Data transformation

In [3]:
# remove data for genes with no catrapid prediction
selection = prot_rna.any(axis=1)
prot_rna = prot_rna[selection,:]
secretion_prot = secretion_prot[:, selection]

# Addd pseudo rows 
# TODO: remove when fixed!
secretion_prot = np.vstack((secretion_prot, secretion_prot, secretion_prot, secretion_prot, secretion_prot))

rna_expression = rna_expression.reshape(rna_expression.shape[0],1)
rna_expression = np.hstack((rna_expression, rna_expression, rna_expression, rna_expression, rna_expression))


# Display shapes
print('secretion_prot:', secretion_prot.shape)
print('rna_expression:', rna_expression.shape)
print('prot_rna:', prot_rna.shape)

secretion_prot: (5, 2001)
rna_expression: (7097, 5)
prot_rna: (2001, 7097)


# Data fusion setup

In [4]:
# Define names
secretion = fusion.ObjectType('SignalP', 1)
prot = fusion.ObjectType('AAprot', 100)
genes = fusion.ObjectType('HSgenes', 200)
# TODO: lower when possible
expr = fusion.ObjectType('HSexpression', 2)

# Define relations
relations = [fusion.Relation(secretion_prot, secretion, prot),
                 fusion.Relation(prot_rna, prot, genes, name='catrapid'),
                 fusion.Relation(rna_expression, genes, expr)]



# Validation Functions

In [5]:
# Full build
def row_cross_validate(relations, cv_fold=10, th=2.5, random_state=1):
    """
    Performs cross valiation on give first matrix of relation and returns
    lists ouf AUC score and average deviation.
    """
    auc_vals = []
    avr_dev = []
    orig_matrix = relations[0].data
    for cv_matrix, elements in matrix_cv_setup(orig_matrix, n_fold=cv_fold, alternative=0, by_row=True):
        relations[0].data = cv_matrix
        reconst_mat = dfmf_n_reconstruct(relations, random_state=random_state)

        auc_vals.append(auc_value(orig_matrix, reconst_mat, elements, treshold=th, by_row=True))
        avr_dev.append(avr_res(orig_matrix, reconst_mat, elements, by_row=True))
        # print(auc_vals[-1],"auc")
        # print(avr_dev[-1], "dev")
    print('\tAUC value is:', np.mean(auc_vals))
    print('\t\tall values:', auc_vals)
    print()
    print('\tAverage deviation is:', np.mean(avr_dev))
    print('\t\tall values:', avr_dev)
    relations[0].data = orig_matrix

def dfmf_n_reconstruct(relationships, random_state=1):
    """
    Given relationships, reconstructs the matrix
    """
    fusion_graph = fusion.FusionGraph(relations)

    fuser = fusion.Dfmf(random_state=random_state)
    fuser.fuse(fusion_graph)
    return fuser.complete(fusion_graph['catrapid'])


# Predictions

In [6]:
auc_th = 2.5

relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid'),
             fusion.Relation(secretion_prot, secretion, prot),
             fusion.Relation(rna_expression, genes, expr)]

print('Interactions, Secretion and Expression:')
row_cross_validate(relations, cv_fold=10, th=auc_th)
print()

Interactions, Secretion and Expression:
	AUC value is: 0.499202459158
		all values: [0.49299728051102504, 0.48189394219957837, 0.5026528653429615, 0.48331549128340939, 0.51765856429126589, 0.52067484938108388, 0.48410225294666215, 0.48344028974129322, 0.5081775488679704, 0.51711150701366948]

	Average deviation is: 0.599164929645
		all values: [0.61175414906712233, 0.60344401015861981, 0.57859295774004771, 0.58926221731213235, 0.58969118040220403, 0.59938168951850657, 0.61833393220343502, 0.59908768864879414, 0.61050125802545729, 0.59160021337290636]



In [7]:
print('with Secretion:')
relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid'),
             fusion.Relation(secretion_prot, secretion, prot)]

row_cross_validate(relations, cv_fold=10, th=auc_th)
print()

with Secretion:
	AUC value is: 0.500626621621
		all values: [0.48711068702215249, 0.48451894735789197, 0.49917505247709332, 0.48892691752571182, 0.52272869378796116, 0.52512780986236152, 0.48545454314240361, 0.48644859297172055, 0.51286883757190893, 0.51390613448646161]

	Average deviation is: 0.599165792024
		all values: [0.61175269202802163, 0.60344542403846146, 0.57859235028266454, 0.58926546202081931, 0.58969483536709166, 0.59938113956485661, 0.61833442817948447, 0.59909100640246504, 0.61050121919302514, 0.59159936316451489]



In [8]:
print('with Expression:')

relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid'),
             fusion.Relation(rna_expression, genes, expr)]

relations.append(fusion.Relation(rna_expression, genes, expr))
row_cross_validate(relations, cv_fold=10, th=auc_th)
print()

with Expression:
	AUC value is: 0.525844271524
		all values: [0.52149715177646871, 0.50916373235237822, 0.52181368673301121, 0.52247399887591428, 0.52438579002896157, 0.54427607952656676, 0.52957767782128218, 0.52325020876245476, 0.5281254309888167, 0.53387895837034682]

	Average deviation is: 0.599167114218
		all values: [0.61176825420324255, 0.60343899639349763, 0.57859316322305987, 0.5892660378440967, 0.5896979748248411, 0.59938773418479019, 0.61833117830429241, 0.59908505160784975, 0.61050607465055118, 0.59159667694109153]



In [9]:
print('Only Interactions:')
relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid')]
row_cross_validate(relations, cv_fold=10, th=auc_th)
print()

Only Interactions:
	AUC value is: 0.525296186235
		all values: [0.51028930152543694, 0.51982224392704568, 0.52037946092502996, 0.52181328480695899, 0.52427251442677225, 0.5438022245865517, 0.52784037029678543, 0.52405751787719645, 0.52959236951084399, 0.53109257446904057]

	Average deviation is: 0.599167113956
		all values: [0.61176825246553079, 0.60343899601621009, 0.57859316305901187, 0.58926603946871814, 0.58969797513351419, 0.59938773432253578, 0.61833117728607312, 0.59908505143796209, 0.61050607417677283, 0.59159667619714529]

