In [None]:
#Weights and Biases eğitim verilerini kaydetmek için
!pip install --upgrade wandb
#wandb kütüphanesinin çağrılması
#ve API için giriş
import wandb
from wandb.keras import WandbCallback
!wandb login

In [None]:
#Gerekli kütüphanelerin çağrılması
import numpy as np
import pandas as pd
import os
import random
from sklearn import preprocessing
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [None]:
#veriseti
merge_df = pd.read_csv('../data/double_expression_dataframe.txt')

In [None]:
#Verisetin tensör haline gelmesi için
#önce "array" formatına dönüştürülmesi
#daha sonra reshape edilmesi
gene_df_values = preprocessing.normalize(merge_df.iloc[:,2:176].values)
target_df_values = preprocessing.normalize(merge_df.iloc[:,176:350].values)

In [None]:
lst = []
for i in range(len(gene_df_values)):
    x_arr = [gene_df_values[i],target_df_values[i]]
    lst.append(x_arr)
X = np.asarray(lst)
pair_info = merge_df[["gene","target","pair"]]
y = pair_info["pair"].values
X = X.reshape(18592,2,174,1)

In [None]:
#OneHotEncoder ile etiketin uygun formata dönüştürülmesi
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder(sparse=False)
y_reshape = y.reshape(len(y), 1)
y = onehotencoder.fit_transform(y_reshape)
#Data eğitim ve test diye ikiye ayırıldı
#modelin görmediği datalar için nasıl sonuç
#verdiği bu şekilde anlaşılabilir
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

print('Shape of x_train: ', X_train.shape)
print('Shape of y_train: ', y_train.shape)
print('Shape of x_test: ', X_test.shape)
print('Shape of y_test: ', y_test.shape)

In [None]:
#Model yapısının kurulması
def Model():
  inputs = keras.layers.Input(shape=(2, 174,1))

  x = keras.layers.SeparableConv2D(filters=32, kernel_size=(3,3), activation='relu',padding="same")(inputs)
  x = keras.layers.Dropout(.1)(x)
  x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
  x = keras.layers.SeparableConv2D(filters=16, kernel_size=(3,3), activation='relu',padding="same")(x)
  x = keras.layers.Dropout(.1)(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.Flatten()(x)
  x = keras.layers.Dropout(.1)(x)
  x = keras.layers.Dense(8,activation="relu")(x)
  outputs = keras.layers.Dense(2, activation='sigmoid')(x)

  return keras.models.Model(inputs=inputs, outputs=outputs)

In [None]:
# wandb'nin başlatılması
run = wandb.init(project='tf-target-prediction-2',
                 config={  # hiperparametre ve metadata'nın ayarlanması
                     "learning_rate": 0.001,
                     "epochs": 100,
                     "batch_size":216,
                     "loss_function": "binary_crossentropy",
                     "architecture": "CNN"
                 })
config = wandb.config  # 

# Modeli başlatma
tf.keras.backend.clear_session()
model = Model()
model.summary()

# Modeli "compile" etme
#optimizasyonu ayarlama
optimizer = tf.keras.optimizers.RMSprop(config.learning_rate) 
model.compile(optimizer, config.loss_function, metrics=['acc'])

In [None]:
# We train with our beloved model.fit
# Notice WandbCallback is used as a regular callback
# We again use config
history = model.fit(X_train, y_train,
          epochs=config.epochs, 
          batch_size=config.batch_size,
          validation_data=(X_test, y_test),
          callbacks=[WandbCallback()])

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print('Test Error Rate: ', round((1 - accuracy) * 100, 2))

# With wandb.log, we can easily pass in metrics as key-value pairs.
wandb.log({'Test Error Rate': round((1 - accuracy) * 100, 2)})

run.join()

In [None]:
# summarize history for accuracy
plt.style.use("ggplot")
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
from itertools import combinations
from sklearn import preprocessing
gedf = pd.read_csv("../data/genecoexpression.txt")

def rSubset(arr, r=2):
    if len(arr) >= 2:
        pair_lst = list(combinations(arr, 2))
        return pair_lst
    else:
        print("Gene list have to be longer than 2 genes.")
        

def find_expression(tf, target): 
    if (tf in gedf.gene.values) == True:
        if (target in gedf.gene.values) == True:
            e1 = gedf[gedf["gene"] == "{}".format(tf)].iloc[:,1:].values
            e2 = gedf[gedf["gene"] == "{}".format(target)].iloc[:,1:].values
            e1_n = preprocessing.normalize(e1)
            e2_n = preprocessing.normalize(e2)
            pair_expression = np.concatenate((e1_n,e2_n)).reshape(2,174,1)
            return pair_expression, tf, target
        else:
            print("{} unkown expression level".format(target))
    else:
        print("{} unkown expression level".format(tf))
            

def find_expression_from_list(arr):
    pair_list = rSubset(arr)
    pair_list = np.char.upper(pair_list)
    expression_array = []
    tf_target_pair = []
    for pair in pair_list:
        tf = pair[0]
        target = pair[1]
        pair_expression, tf, target = find_expression(tf, target)
        tf_target = np.array([tf,target])
        expression_array.append((pair_expression))
        tf_target_pair.append(tf_target)
    return np.asarray(expression_array), tf_target_pair

def make_prediction(genes):
  xnew, tf_target_pair = find_expression_from_list(arr=genes)
  pred = model.predict(xnew)
  pred = onehotencoder.inverse_transform(pred)
  for i in range(len(xnew)):
    print("TF-Target Pair=%s, Predicted=%s" % (tf_target_pair[i],pred[i]))

In [None]:
make_prediction(["HNF1B","DPP4","ACE2","SPP1"])