<a href="https://colab.research.google.com/github/sowmyamanojna/CS6024-Algorithmic-Approaches-to-Computational-Biology-Project/blob/master/dae_tf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

import os
import seaborn as sns
import io

In [None]:
%matplotlib inline
plt.style.use('seaborn-notebook')
sns.set(style="white", color_codes=True)
sns.set_context("paper", rc={"font.size":14,"axes.titlesize":15,"axes.labelsize":20,'xtick.labelsize':14, 'ytick.labelsize':14})

In [None]:
np.random.seed(123)

In [None]:
pcos_df = pd.read_csv('common_normalized.csv')
pcos_df = pcos_df.drop(['sample_id'], axis=1)
# Split 10% test set randomly
test_set_percent = 0.1
pcos_test_df = pcos_df.sample(frac=test_set_percent)
pcos_train_df = pcos_df.drop(pcos_test_df.index)
print(pcos_train_df.head(2))
print(pcos_test_df.head(2))
print(pcos_train_df.shape)
print(pcos_test_df.shape)

In [None]:
class Autoencoder(Model):
  def __init__(self, latent_dim, original_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim   
    self.encoder = tf.keras.Sequential([layers.Dense(latent_dim, activation='relu')])
    self.decoder = tf.keras.Sequential([layers.Dense(original_dim, activation='sigmoid')])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

In [None]:
original_dim = pcos_df.shape[1]
latent_dim = 100
epochs = 10

In [None]:
autoencoder = Autoencoder(latent_dim, original_dim) 

In [None]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
hist = autoencoder.fit(pcos_train_df, pcos_train_df,
                epochs=epochs,
                shuffle=True,
                batch_size=4,
                validation_data=(pcos_test_df, pcos_test_df))

In [None]:
# Visualize training performance
history_df = pd.DataFrame(hist.history)
ax = history_df.plot()
ax.set_xlabel('Epochs')
ax.set_ylabel('DAE Loss')
fig = ax.get_figure()
fig.savefig("hist_plot_file.png")

In [None]:
pcos_df = pd.read_csv('common_normalized.csv')
pcos_df = pcos_df.drop(['sample_id'], axis=1)
# Split 10% test set randomly
test_set_percent = 0.1
pcos_test_df = pcos_df.sample(frac=test_set_percent)
pcos_train_df = pcos_df.drop(pcos_test_df.index)
print(pcos_train_df.head(2))
print(pcos_test_df.head(2))

In [None]:
pcos_train = pcos_train_df.to_numpy()
pcos_test = pcos_test_df.to_numpy()
print(pcos_train.shape)
print(pcos_test.shape)
pcos_train = pcos_train.reshape(157, 1669, 1)
pcos_test = pcos_test.reshape(18, 1669, 1)
print(pcos_train.shape)
print(pcos_test.shape)

# Adding random noise to the data
noise_factor = 0.2
pcos_train_noisy = pcos_train + noise_factor * tf.random.normal(shape=pcos_train.shape) 
pcos_test_noisy = pcos_test + noise_factor * tf.random.normal(shape=pcos_test.shape) 

pcos_train_noisy = tf.clip_by_value(pcos_train_noisy, clip_value_min=0., clip_value_max=1.)
pcos_test_noisy = tf.clip_by_value(pcos_test_noisy, clip_value_min=0., clip_value_max=1.)

In [None]:
class Denoise(Model):
  def __init__(self):
    super(Denoise, self).__init__()    
    self.encoder = tf.keras.Sequential([layers.Dense(latent_dim, activation='relu')])
    self.decoder = tf.keras.Sequential([layers.Dense(original_dim, activation='sigmoid')])
    
  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

In [None]:
dae = Denoise()
dae.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [None]:
hist = dae.fit(pcos_train_noisy, pcos_train,
                epochs=epochs,
                shuffle=True,
                batch_size=4,
                validation_data=(pcos_test_noisy, pcos_test))

In [None]:
# Visualize training performance
history_df = pd.DataFrame(hist.history)
ax = history_df.plot()
ax.set_xlabel('Epochs')
ax.set_ylabel('DAE Loss')
fig = ax.get_figure()
fig.savefig("hist_plot_file_dae.png")

In [None]:
print(dae.encoder.summary())
print(dae.decoder.summary())

In [None]:
encoded_imgs = dae.encoder(pcos_test).numpy()
decoded_imgs = dae.decoder(encoded_imgs).numpy()

In [None]:
print(encoded_imgs.shape)

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(encoded_imgs[:][:][0], encoded_imgs[:][:][1])
plt.xlabel('Latent Feature 1')
plt.ylabel('Latent Feature 2')
plt.savefig('node_activation_2_latent.png')