In [1]:
from relationGraph import Relation, RelationGraph, MatrixOfRelationGraph
from autoencoder import seedy, AutoEncoder, load_encoder, load_decoder
import utilityFunctions as uf
from main import test_build_relation_graph_with_symertic_data, test_convert_graph_to_2D_matrix, test_get_matix_for_autoencoder, test_autoencoder
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from base import load_source
from os.path import join
import numpy as np

In [2]:
gene = 'Gene'
go_term = 'GO term'
exprc = 'Experimental condition'

data, rn, cn = load_source(join('dicty', 'dicty.gene_annnotations.csv.gz'))
ann = Relation(data=data, x_name=gene, y_name=go_term, name='ann',
               x_metadata=rn, y_metadata=cn)

data, rn, cn = load_source(join('dicty', 'dicty.gene_expression.csv.gz'))
expr = Relation(data=data, x_name=gene, y_name=exprc, name='expr',
                x_metadata=rn, y_metadata=cn)
expr.matrix = np.log(np.maximum(expr.matrix, np.finfo(np.float).eps))

data, rn, cn = load_source(join('dicty', 'dicty.ppi.csv.gz'))
ppi = Relation(data=data, x_name=gene, y_name=gene, name='ppi',
               x_metadata=rn, y_metadata=cn)

ann_t = ann.transpose()
expr_t = expr.transpose()

relationGraph = RelationGraph()
relationGraph.add_relations([ann, expr, ppi, ann_t, expr_t])
relationGraph.display_objects()
graph = relationGraph

-------------RelationGraph-------------
Experimental condition	282
1	expr_T-(282, 1219)
1	expr-(1219, 282)
GO term	116
1	ann_T-(116, 1219)
1	ann-(1219, 116)
Gene	1219
3	ann-(1219, 116), expr-(1219, 282), ppi-(1219, 1219)
2	ann_T-(116, 1219), expr_T-(282, 1219)



In [3]:
mrg = MatrixOfRelationGraph(graph=graph)
mrg.convert_to_2D_matrix()
data = mrg.density_data(0.1)
print(data.shape)

expr_T (282, 1219)
expr (1219, 282)
ann_T (116, 1219)
ann (1219, 116)
ppi (1219, 1219)
Experimental condition: 28
Gene: 122
GO term: 12
(162, 162)


In [4]:
import multiprocessing
import time
from pathlib import Path

def mp_worker(arr):
#     new_data = uf.sample_generator3(data, num_of_samples=100, density=0.7)
    new_data = uf.sample_generator3(arr[0], num_of_samples=arr[1], density=arr[2])
    return new_data

def mp_handler(data, n_samples, density):
    p = multiprocessing.Pool(8)
    gen_samples = np.empty((0, data.shape[0] * data.shape[1]))
    iterations = round(n_samples/100) if  n_samples > 100 else 1
    
    params = [[data, 100, density] for x in range(iterations)]
    for result in p.imap(mp_worker, params):
        print(result.shape)
        gen_samples = np.r_[gen_samples, result]
    
    return gen_samples

In [5]:
        
# start = time.clock()
# train_data = mp_handler(data)
# train_data = np.arange(100).reshape(10,10)
# fn =  'samples_483.txt'

# with open(fn, 'w') as f:
#     np.savetxt(f, train_data)
    
# np.savetxt(fn, train_data)
# print(time.clock() - start)

# break
# train_data = uf.sample_generator2(matrix=data, num_of_samples=10, density=0.7)
# train_data= uf.sample_generator3(data, num_of_samples=1000, density=0.7)
# train_data = data
# print(train_data.shape)

# data = uf.sample_generator3(data, num_of_samples=10, density=0.7)

x, y = data.shape
data=data.reshape(1, x * y)
ae = AutoEncoder(encoding_dim=20, data)
ae.encoder_decoder()
# ae.fit(batch_size=250, epochs=100)
ae.fit_generator()
ae.save()

encoder = load_encoder()
decoder = load_decoder()

# test_data = np.asarray([data[0].flatten()])
test_data = np.asarray([data.flatten()])
print(test_data.shape)

x = encoder.predict(test_data)
y = decoder.predict(x)

mse = mean_squared_error(test_data, y)
print('MSE: ' + str(mse))

[[3.84551989 5.89807652 5.17124507 ... 0.         0.         0.        ]
 [4.2006846  6.54779638 5.98108335 ... 0.         0.         0.        ]
 [3.73492777 5.74797159 5.10874156 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 2.0841803  1.01740223 0.81713316]
 [0.         0.         0.         ... 2.3218974  4.79163316 3.95670952]
 [0.         0.         0.         ... 2.66179682 3.04514129 3.05220712]]
(162, 162)
162
Epoch 1/100
100
100


ValueError: Error when checking input: expected input_3 to have shape (162,) but got array with shape (26244,)

In [33]:
import multiprocessing
import time

fn = '/mag/scripts/temp.txt'

def mp_worker(data):
    new_data = uf.sample_generator3(data, num_of_samples=100, density=0.7)
    return new_data

def mp_handler():
    p = multiprocessing.Pool(6)
#     numbers = list(range(1000))
    numbers = [data for x in range(100)]
#     with open(fn, 'w') as f:
    for result in p.imap(mp_worker, numbers):
        print(result.shape)
#             f.write('%d\n' % result)

if __name__=='__main__':
    start = time.clock()
    mp_handler()
    print('Finish')
    print(time.clock() - start)

100
100
100
100
100
100
(100, 233289)
(100, 233289)
(100, 233289)
(100, 233289)
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
100
100
(100, 233289)
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
100
(100, 233289)
100
(100, 233289)
(100, 233289)
100
(100, 233289)
100
(100, 233289)
100
(100, 

Process ForkPoolWorker-50318:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._sem