In [6]:
from relationGraph import Relation, RelationGraph, MatrixOfRelationGraph
from autoencoder import seedy, AutoEncoder
import utilityFunctions as uf
from main import test_build_relation_graph_with_symertic_data, test_convert_graph_to_2D_matrix, test_get_matix_for_autoencoder, test_autoencoder
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from base import load_source
from os.path import join
import numpy as np

In [7]:
gene = 'Gene'
go_term = 'GO term'
exprc = 'Experimental condition'

data, rn, cn = load_source(join('dicty', 'dicty.gene_annnotations.csv.gz'))
data = uf.normalization(data)
ann = Relation(data=data, x_name=gene, y_name=go_term, name='ann',
               x_metadata=rn, y_metadata=cn)
print(np.min(data))
print(np.max(data))
print()

data, rn, cn = load_source(join('dicty', 'dicty.gene_expression.csv.gz'))
expr = Relation(data=data, x_name=gene, y_name=exprc, name='expr',
                x_metadata=rn, y_metadata=cn)
expr.matrix = np.log(np.maximum(expr.matrix, np.finfo(np.float).eps))
expr.matrix = uf.normalization(expr.matrix)
print(np.min(expr.matrix))
print(np.max(expr.matrix))
print()

data, rn, cn = load_source(join('dicty', 'dicty.ppi.csv.gz'))
data = uf.normalization(data)
ppi = Relation(data=data, x_name=gene, y_name=gene, name='ppi',
               x_metadata=rn, y_metadata=cn)
print(np.min(data))
print(np.max(data))

ann_t = ann.transpose()
expr_t = expr.transpose()

relationGraph = RelationGraph()
relationGraph.add_relations([ann, expr, ppi, ann_t, expr_t])
relationGraph.display_objects()
graph = relationGraph

0.0
1.0

0.0
1.0

0.0
1.0
-------------RelationGraph-------------
Experimental condition	282
1	expr_T-(282, 1219)
1	expr-(1219, 282)
GO term	116
1	ann_T-(116, 1219)
1	ann-(1219, 116)
Gene	1219
3	ann-(1219, 116), expr-(1219, 282), ppi-(1219, 1219)
2	ann_T-(116, 1219), expr_T-(282, 1219)



In [20]:
def normalization(data, _min=0, _max=1):
    if _min >= _max:
        raise ValueError('Attribute \'min\' must be lower than \'min\'.')
    if _min > 0 or _max < 0:
        raise ValueError('This operation is not supported!')
    
    min_val = np.min(data)    
    if min_val < 0:
        data = data + np.abs(min_val)
        data[np.where(data == np.abs(min_val))] = 0
        
    max_val = np.max(data)
    if max_val > 1:
        data = data / max_val
    elif max_val < 1:
        factor = 1/max_val
        data = data * factor
        
    return data

print(expr.matrix.shape)
print(np.count_nonzero(ppi.matrix))
test = normalization(ppi.matrix)
print(np.count_nonzero(test))
print(np.min(ppi.matrix))
print(np.max(ppi.matrix))
print()
print(np.min(test))
print(np.max(test))


(1219, 282)
45959
Min: -0.0999
Max: 0.0
45933
-0.0999
0.0

0.0
1.0


In [9]:
mrg = MatrixOfRelationGraph(graph=graph)
mrg.convert_to_2D_matrix()
mrg.display_metadata_2D_matrix()
data = mrg.density_data(.2)
print(data.shape)
fn = '/data/samples/dicty/' + str(data.shape[0]) + '_data.npz'
# fn = '/data/samples/' + str(data.shape[0]) + '_ord_data.npz'    // original data for prediciton
print(fn)
print(data.shape)

# f = np.load('/data/samples/dicty/162_org_data.npz')
# data = f[f.files[0]]

print(data.shape)

-----------ann_T (116, 1219)-----------
(116, 1219)

-----------ann (1219, 116)-----------
x != y
(1335, 1335)

-----------expr (1219, 282)-----------
x == 0
(1335, 1617)

-----------ppi (1219, 1219)-----------
x == y
(1335, 1617)

-----------expr_T (282, 1219)-----------
y == 0
(1617, 1617)

-------------2D Matrix-------------
Objects: GO term: (0, (0, 115)), Gene: (1, (116, 1334)), Experimental condition: (2, (1335, 1616))
[0. 1. 0.]
[1. 1. 1.]
[0. 1. 0.]

GO term: 23
Gene: 244
Experimental condition: 56
(323, 323)
/data/samples/dicty/323_data.npz
(323, 323)
(323, 323)


In [13]:
fn = '/data/samples/dicty/' + str(data.shape[0]) + '_data.npz'
print(data.shape)
# uf.data_generator(data, 50000, 14, 0.8, fn)
# fn = '/data/samples/dicty/' + str(data.shape[0]) + '_org_data.npz'
# f = uf.my_savez(fn)
# f.savez(data)
# f.close()

print('Finnish!!!')

(323, 323)
Finnish!!!


In [10]:
fn = '/data/samples/162_org_data.npz'
f = np.load(fn)
data = f[f.files[0]]
print(data.shape)

# fn = '/data/samples/162_data.npz'

x, y = data.shape
# data=data.reshape(1, x * y)
ae = AutoEncoder(encoding_dim=20, data=data)
ae.encoder_decoder()
# ae.fit(batch_size=250, epochs=100)
# fn = '/mag/483_data.npz'
ae.fit_generator(fn, n_packs=300, epochs=200)
ae.save()

encoder = ae.load_encoder()
decoder = ae.load_decoder()

# test_data = np.asarray([data[0].flatten()])
# test_data = np.asarray([data.flatten()])
test_data = data
# print(test_data)
# np.random.shuffle(test_data[0])
# print(test_data)
print(test_data.shape)

x = encoder.predict(test_data)
y = decoder.predict(x)

mse = mean_squared_error(test_data, y)
print('MSE: ' + str(mse))

(162, 162)
[[ 5.00661389  4.83041553  4.8573831  ... -0.0227      0.
   0.        ]
 [ 4.86537828  4.71475975  4.96717789 ... -0.0245      0.
   0.        ]
 [ 4.36110939  4.20911566  4.58180944 ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  1.50696212  4.75418484
   3.51393065]
 [ 0.          0.          0.         ... -0.55686956  5.19988833
   2.52003245]
 [ 0.          0.          0.         ...  0.79615493  4.66824827
   4.0636104 ]]
(162, 162)
162
/data/weights/12
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoc

In [None]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Masking
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import autoencoder as ae
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

# num = 81   # most densely filled  0.05
# num = 162   # most densely filled  0.1
# num = 323   # most densely filled  0.2
# num = 483   # most densely filled  0.3
# num = 645   # most densely filled  0.4
# num = 807   # most densely filled  0.5
num = 1614  # all data

fn = '/data/samples/dicty/' + str(num) + '_org_data.npz'
f = np.load(fn)
data = f[f.files[0]]

fn = '/data/samples/dicty/' + str(num) + '_data.npz'
        
x,y = data.shape
data=data.reshape(1, x * y)
input_dim = data.shape[1]
epochs = 200
encoding_dim = 30
n_packs = 50

model = Sequential()
model.add(Masking(mask_value=0, input_shape=(input_dim, )))
# model.add(Dense(int(input_dim / 2), activation='relu'))
model.add(Dense(encoding_dim, input_shape=(input_dim, ), activation='relu'))
# model.add(Dense(int(input_dim / 2), activation='relu'))
model.add(Dense(input_dim))
model.compile(loss='mse', optimizer='sgd')

# model = Sequential()
# model.add(Masking(mask_value=0, input_shape=(input_dim, )))
# model.add(Dense(int(input_dim / 2), activation='relu'))
# model.add(Dense(int(input_dim / 4), activation='relu'))
# model.add(Dense(encoding_dim, activation='relu'))
# model.add(Dense(int(input_dim / 4), activation='relu'))
# model.add(Dense(int(input_dim / 2), activation='relu'))
# model.add(Dense(input_dim))
# model.compile(loss='mse', optimizer='sgd')
model.summary()


log_dir = '/data/logs/'
callbacks = [
            TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True),
            EarlyStopping(monitor='loss', min_delta=0, patience=3, verbose=0, mode='auto')
        ]

model.fit_generator(ae.data_generator(fn, n_packs), steps_per_epoch=n_packs, epochs=epochs, callbacks=callbacks)

model.save('/data/sequential/weights/' + str(num) + '_model.h5')

decoded_imgs = model.predict(data)

mse = mean_squared_error(data, decoded_imgs)
print('MSE: ' + str(mse))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_2 (Masking)          (None, 2604996)           0         
_________________________________________________________________
dense_4 (Dense)              (None, 30)                78149910  
_________________________________________________________________
dense_5 (Dense)              (None, 2604996)           80754876  
Total params: 158,904,786
Trainable params: 158,904,786
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200

In [45]:
# num = 162   # most densely filled  0.1
num = 323   # most densely filled  0.2
# num = 483   # most densely filled  0.3
# num = 645   # most densely filled  0.4
# num = 807   # most densely filled  0.5

f = np.load('/data/samples/' + str(num) + '_org_data.npz')
test_data = np.asarray([f[f.files[0]].flatten()])
# test_data = np.asarray([data.flatten()])

# prediction with normal data
model = load_model('/data/sequential/weights/' + str(num) + '_model.h5')
y = model.predict(test_data)
mse = mean_squared_error(test_data, y)
print(test_data[0])
print(y[0])
print()
print(test_data.shape)
print(y.shape)
print()
print('MSE org data: ' + str(mse))


# prediction with shuffled data
np.random.shuffle(test_data[0])
y = model.predict(test_data)
mse = mean_squared_error(test_data, y)
print('MSE shuffled data: ' + str(mse))
print()
# print('Mean predict data: ' + str(np.mean(y[0])))
# print(test_data[0])
# print(y[0])
print()
print('Min org data:' + str(np.min(test_data)))
print('Max org data:' + str(np.max(test_data)))
print('Mean org data: ' + str(np.mean(test_data)))
print()
print('Min predict:' + str(np.min(y)))
print('Max predict:' + str(np.max(y)))
print('Mean predict: ' + str(np.mean(y)))
print()

[4.31138956 4.33326959 4.7106287  ... 4.80063006 2.11492933 4.1626105 ]
[4.2677693 4.4455185 4.820116  ... 4.86551   2.086997  4.0896974]

(1, 104329)
(1, 104329)

MSE org data: 0.012063925241763864
MSE shuffled data: 45.73822851526672


Min org data:-36.04365338911715
Max org data:10.068331257647785
Mean org data: 1.4092668375654196

Min predict:-2.080417
Max predict:0.77436584
Mean predict: 0.060567502

