In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from models import create_mlp, create_cnn
from preprocess import load_n_pre
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from geno_2_img import *

# load data
test_size = .1
X_train, X_test, y_train, y_test, env_train, env_test = load_n_pre("../data/yeast/geno.txt", "../data/yeast/feno.txt",
                                                                   env_indxs = [17, 18, 29, 37],
                                                                    norm_mode="og", test_size = .1)

Multicore TSNE not found
Loading data...
Preprocessing...
1 of 46 environments processed (Cadmium_Chloride) | Normalization type : og.
2 of 46 environments processed (Caffeine) | Normalization type : og.
3 of 46 environments processed (Calcium_Chloride) | Normalization type : og.
4 of 46 environments processed (Cisplatin) | Normalization type : og.
5 of 46 environments processed (Cobalt_Chloride) | Normalization type : og.
6 of 46 environments processed (Congo_red) | Normalization type : og.
7 of 46 environments processed (Copper) | Normalization type : og.
8 of 46 environments processed (Cycloheximide) | Normalization type : og.
9 of 46 environments processed (Diamide) | Normalization type : og.
10 of 46 environments processed (E6_Berbamine) | Normalization type : og.
11 of 46 environments processed (Ethanol) | Normalization type : og.
12 of 46 environments processed (Formamide) | Normalization type : og.
13 of 46 environments processed (Galactose) | Normalization type : og.
14 of 46 

In [None]:
img_shape = (200, 200)
X_train_img, X_test_img = transform_train_test(X_train, X_test, img_shape = img_shape, norm = "whole",
                                               method = "fermat")

Transformation parameters
-------------------------
2D method:  fermat
Image shape:  (200, 200)
Interpolation mode:  mean
Imputation value:  -1
Normalization mode:  whole
calculating euclidean distances
calculating fermat approx distances
fitting TSNE
Max overlapping features:  1
0 of 39530 samples transformed. Time elapsed: 0 sec.
250 of 39530 samples transformed. Time elapsed: 18 sec.
500 of 39530 samples transformed. Time elapsed: 36 sec.
750 of 39530 samples transformed. Time elapsed: 55 sec.
1000 of 39530 samples transformed. Time elapsed: 73 sec.
1250 of 39530 samples transformed. Time elapsed: 91 sec.
1500 of 39530 samples transformed. Time elapsed: 109 sec.
1750 of 39530 samples transformed. Time elapsed: 127 sec.
2000 of 39530 samples transformed. Time elapsed: 145 sec.
2250 of 39530 samples transformed. Time elapsed: 164 sec.
2500 of 39530 samples transformed. Time elapsed: 182 sec.
2750 of 39530 samples transformed. Time elapsed: 200 sec.
3000 of 39530 samples transformed. T

In [None]:
for i in range(10):
    plt.figure(figsize = (10, 10))
    plt.imshow(X_train_img[i, :, :], cmap = "gray")
    plt.colorbar()
    plt.savefig( f"output/yeast_fermat_{i}", transparent=True,dpi=300)
    plt.show()

In [None]:
R2_nati = np.array([0.797, 0.250, 0.268, 0.338, 0.460, 0.504, 0.456, 0.529, 
           0.498, 0.412, 0.518, 0.350, 0.235, 0.399, 0.225, 0.336, 
           0.480, 0.568, 0.582, 0.711, 0.278, 0.519, 0.809, 0.255, 
           0.432, 0.614, 0.496, 0.383, 0.411, 0.424, 0.515, 0.634, 
           0.471, 0.636, 0.397, 0.552, 0.315, 0.516, 0.543, 0.195, 
           0.356, 0.556, 0.432, 0.711, 0.485, 0.495])
def plot_learning(hist):
    
    plt.figure()
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.legend(["train", "val"])
    plt.xlabel("epoch")
    plt.ylabel("mse")
    plt.grid(axis = "y")
    plt.title("Learning plot")

def test_results(model, X_test, env_test, y_test, R2_nati = R2_nati, max_env = 46):
    
    y_pred = model.predict({"geno" : X_test, "env" : env_test})
    metrics = {"r2" : [],
               "best_r2_nati" : R2_nati[[17, 18, 29, 37]],
               "mse" : []}
    #target = [17, 18, 29, 37]
    for env in range(max_env):#target:
        y_pred_e = y_pred[np.where(env_test[:, env] == 1)].reshape((-1,))
        y_test_e = y_test[np.where(env_test[:, env] == 1)].reshape((-1,))
        metrics["r2"].append(np.round(pearsonr(y_pred_e, y_test_e)[0]**2, 3))
        metrics["mse"].append(np.mean((y_pred_e - y_test_e)**2))
    
    return pd.DataFrame(metrics, index = ["Lactate", "Lactose", "Sorbitol", "Xylose"])

def choose_env(X, envs, target_envs):
    env = envs[:,target_envs]
    count = 0
    for j in range(len(target_envs)):
        X_env = X[np.where(env[:, j] == 1)]
        env_env = env[np.where(env[:, j] == 1)]
        if count ==0:
            X_out = X_env
            env_out = env_env
        else:
            X_out = np.concatenate((X_out, X_env))
            env_out = np.concatenate((env_out, env_env))
        count+=1
    return X_out, env_out


In [None]:
np.save()

In [None]:
cnn = create_cnn(img_height = img_shape[0], img_width = img_shape[1], filters = [2, 4, 8, 16], 
                 kernel_sizes = [2, 4, 8, 10], strides = [2, 2, 2, 2], final_sizes = [8, 4], dropout = .25)

Inp = layers.Input((env_train.shape[1], ))
conc = layers.concatenate([Inp, cnn.output])

x = layers.Dense(4, activation="relu")(conc)
x = layers.Dense(1, activation="linear")(x)

model = keras.Model(inputs = [Inp, cnn.input], outputs = x)

opt = optimizers.Adam(lr = 0.0005, decay = 1e-3/200, epsilon = .1)

model.compile(loss = "mse", optimizer = opt)

model.summary()

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)

h = model.fit(x = [env_train, X_train_img], y = y_train, 
              validation_data = ([env_test, X_test_img], y_test),
	            epochs = 10000, batch_size = 128, callbacks=[callback])

In [None]:
plot_learning(h)

In [None]:
results = test_results(model, X_test_img, env_test, y_test, max_env = 4); results

In [None]:
model.save('fermat_yeast_model.h5')

In [None]:
l_rand = h.history["val_loss"]

In [None]:
l_tsne = h.history["val_loss"]

In [None]:
h.history.keys()

In [None]:
'''
plt.figure(figsize = (12.5, 7.5))
for l in [l_rand, l_tsne, l_kpca]:
  plt.plot(l)
plt.grid(axis = "y")
plt.title("validation loss per epoch")
plt.xlabel("epoch")
plt.ylabel("val. mse")
plt.legend(["random", "tsne", "kpca"])
'''

In [None]:
'''
l_kpca = h.history["val_loss"]
'''

In [None]:
del(model)
x = keras.layers.Flatten()(Res.output)
x = keras.layers.BatchNormalization(axis = -1)(x)
x = keras.layers.Dropout(rate = .33)(x)
x = keras.layers.Dense(32, activation = "relu")(x)
x = keras.layers.Dense(16, activation = "relu")(x)
x = keras.layers.Dense(8, activation = "relu")(x)
x = keras.layers.Dense(4, activation = "relu")(x)
conc = keras.layers.concatenate([Inp, x])
x = keras.layers.Dense(4, activation = "relu")(conc)
x = keras.layers.Dense(1, activation = "linear")(x)

model = keras.Model(inputs = [Inp, Res.input], outputs = x)

opt = optimizers.Adam(lr = 0.0005, decay = 1e-3/200, epsilon = .1)
model.compile(loss = "mse", optimizer = opt)

model.summary()

In [None]:
h = model.fit(x = [env_train, X_train_res], y = y_train, 
              validation_data = ([env_test, X_test_res], y_test),
	            epochs = 100, batch_size = 64)