In [6]:
from relationGraph import Relation, RelationGraph, MatrixOfRelationGraph
from autoencoder import seedy, AutoEncoder
from main import test_build_relation_graph_with_symertic_data, test_convert_graph_to_2D_matrix, test_get_matix_for_autoencoder, test_autoencoder
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from base import load_source
from os.path import join

import utilityFunctions as uf
import multiprocessing
import numpy as np

In [7]:
gene = 'Gene'
go_term = 'GO term'
exprc = 'Experimental condition'

data, rn, cn = load_source(join('dicty', 'dicty.gene_annnotations.csv.gz'))
data = uf.normalization(data)
ann = Relation(data=data, x_name=gene, y_name=go_term, name='ann',
               x_metadata=rn, y_metadata=cn)
print(np.min(data))
print(np.max(data))
print()

data, rn, cn = load_source(join('dicty', 'dicty.gene_expression.csv.gz'))
expr = Relation(data=data, x_name=gene, y_name=exprc, name='expr',
                x_metadata=rn, y_metadata=cn)
expr.matrix = np.log(np.maximum(expr.matrix, np.finfo(np.float).eps))
expr.matrix = uf.normalization(expr.matrix)
print(np.min(expr.matrix))
print(np.max(expr.matrix))
print()

data, rn, cn = load_source(join('dicty', 'dicty.ppi.csv.gz'))
data = uf.normalization(data)
ppi = Relation(data=data, x_name=gene, y_name=gene, name='ppi',
               x_metadata=rn, y_metadata=cn)
print(np.min(data))
print(np.max(data))

ann_t = ann.transpose()
expr_t = expr.transpose()

relationGraph = RelationGraph()
relationGraph.add_relations([ann, expr, ppi, ann_t, expr_t])
relationGraph.display_objects()
graph = relationGraph

0.0
1.0

0.0
1.0

0.0
1.0
-------------RelationGraph-------------
GO term	116
1	ann_T-(116, 1219)
1	ann-(1219, 116)
Experimental condition	282
1	expr_T-(282, 1219)
1	expr-(1219, 282)
Gene	1219
3	ann-(1219, 116), expr-(1219, 282), ppi-(1219, 1219)
2	ann_T-(116, 1219), expr_T-(282, 1219)



In [27]:
def sample_generator(matrix, num_of_samples=1, density=0.8):
    # vhod v NN je celotna matrika
    x_size, y_size = matrix.shape
    data = np.empty((0, x_size * y_size))
    print('Random seed: '  + str(np.random.randint(y_size)))
    
    while num_of_samples > 0:
        tmp_matrix = np.copy(matrix)
        for row in tmp_matrix:
            indices = np.random.choice(y_size, round(y_size * density), replace=False)
            mask = np.isin(np.arange(y_size), indices, invert=True)
            row[mask] = 0
            
        data = np.r_[data, tmp_matrix.reshape(1, x_size * y_size)]                
        num_of_samples -= 1

    return data


mrg = MatrixOfRelationGraph(graph=graph)
mrg.convert_to_2D_matrix()
mrg.display_metadata_2D_matrix()

# list_of_density = [.05, .1, .15, .2, .25, .3, .35, .4, .45, .5, .55, .6, .65, .7, .75, .8, .85, .9, .95, .1]
list_of_density = [.05, .1, .15, .2, .25,]
for density in list_of_density: 
    data = mrg.density_data(density)
    print('Input: ' + str(data.shape))

    # save original data
    fn = '/data/samples/dicty/' + str(data.shape[0]) + '_org_data.npz'
    data_package = sample_generator(data, 100, 1)
    print('Package: ' + str(data_package.shape))
    f = uf.my_savez(fn)
    f.savez(data_package)
    f.close()
    
    
    f = np.load(fn)
    data_x = f[f.files[0]]
    print('Read: ' + str(data_x.shape))
    print()

    # generated test data 
    cpu = 6 
    density = .8
    num_samples = 50000

#     fn = '/data/samples/dicty/' + str(data.shape[0]) + '_data.npz'
#     uf.data_generator(data, num_samples, cpu, density, fn)

print('Finnish!!!')

-----------ann_T (116, 1219)-----------
(116, 1219)

-----------ann (1219, 116)-----------
x != y
(1335, 1335)

-----------expr_T (282, 1219)-----------
y == 0
(1617, 1335)

-----------expr (1219, 282)-----------
x == 0
(1617, 1617)

-----------ppi (1219, 1219)-----------
x == y
(1617, 1617)

-------------2D Matrix-------------
Objects: GO term: (0, (0, 115)), Experimental condition: (1, (116, 397)), Gene: (2, (398, 1616))
[0. 0. 1.]
[0. 0. 1.]
[1. 1. 1.]

GO term: 6
Experimental condition: 14
Gene: 61
Input: (81, 81)
Random seed: 66
Package: (100, 6561)
Read: (100, 6561)

GO term: 12
Experimental condition: 28
Gene: 122
Input: (162, 162)
Random seed: 54
Package: (100, 26244)
Read: (100, 26244)

GO term: 17
Experimental condition: 42
Gene: 183
Input: (242, 242)
Random seed: 220
Package: (100, 58564)
Read: (100, 58564)

GO term: 23
Experimental condition: 56
Gene: 244
Input: (323, 323)
Random seed: 287
Package: (100, 104329)
Read: (100, 104329)

GO term: 29
Experimental condition: 70
Ge