In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K

In [None]:
import keras

In [None]:
data_dir = "../input/aml-fab-with-names/"

In [None]:
df = pd.read_csv(data_dir + "GSE147515_FAB_mat.csv")

In [None]:
df = df.T
df.head()

In [None]:
df.shape

In [None]:
df.columns = df.iloc[-1]
df.drop(["Unnamed: 0"], axis=0, inplace=True)
df.head()


In [None]:
df.shape

In [None]:
lbl = pd.read_csv(data_dir + "GSE147515_FAB_lbl.csv")
lbl.head()

In [None]:
lbl.FAB.value_counts()

In [None]:
#lbl_filtered = lbl.loc[lbl.FAB.isin(["CTRL", "M2"])]

In [None]:
lbl.set_index(lbl.X, inplace=True)
lbl.drop(["Unnamed: 0", "X"], axis=1, inplace=True)
lbl.FAB = lbl.FAB.map({
    "CTRL": 6,
    "M0": 0,
    "M1": 1,
    "M2": 2,
    "M3": 3,
    "M4": 4,
    "M5": 5,
})
lbl.head()

In [None]:
#map

In [None]:
df.tail()

In [None]:
df.drop(["name"], axis=0, inplace=True)
df.tail()

In [None]:
X = df.values
y = lbl.values

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train_noise = X_train + np.random.normal(0, 0.05, (X_train.shape[1],))
X_test_noise = X_test + np.random.normal(0, 0.05, (X_test.shape[1],))

In [None]:
#y_train = keras.utils.to_categorical(y_train, 7)
#y_test = keras.utils.to_categorical(y_test, 7)

In [None]:
from keras.layers import Activation
from keras.utils.generic_utils import get_custom_objects

In [None]:
def custom_gelu(x):
    return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
get_custom_objects().update({
    "custom_gelu" : Activation(custom_gelu)
})

# Train Model

In [None]:
stacked_encoder = keras.models.Sequential([
    keras.layers.Dense(512, input_shape=(X_train.shape[1],)),
    keras.layers.AlphaDropout(0.1),
    keras.layers.Dense(256, activation="custom_gelu"),
    keras.layers.AlphaDropout(0.1),
    keras.layers.Dense(128, activation="custom_gelu"),
    keras.layers.ActivityRegularization(l1=0.00008)  
])

stacked_decoder = keras.models.Sequential([
    keras.layers.Dense(128, activation="custom_gelu", input_shape=(128,)),
    keras.layers.AlphaDropout(0.1),
    keras.layers.Dense(256, activation="custom_gelu"),
    keras.layers.AlphaDropout(0.1),
    keras.layers.Dense(512, activation="custom_gelu"),
    keras.layers.Dense(X_train.shape[1], activation="softmax")
])

In [None]:
stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder])

stacked_ae.compile(
    optimizer=keras.optimizers.Adam(lr=0.00008),
    loss="categorical_crossentropy")

history = stacked_ae.fit(X_train, X_train, batch_size=16, epochs=200, validation_data=(X_test, X_test))

In [None]:
x_compressed = stacked_encoder.predict(X_test)

In [None]:
from sklearn.manifold import TSNE

In [None]:
tsne = TSNE(n_jobs=4)
X_compressed_2d = tsne.fit_transform(x_compressed)

In [None]:
label_dict = {
    6: "CTRL",
    0: "M0",
    1: "M1",
    2: "M2",
    3: "M3",
    4: "M4",
    5: "M5",
}
labels = np.vectorize(label_dict.get)(y_test)

In [None]:
fig = plt.figure(figsize = (12, 7))
cmap = plt.get_cmap("viridis", 7)
sc = plt.scatter(X_compressed_2d[:, 0], X_compressed_2d[:, 1], c = y_test, alpha = .85, cmap = cmap)
cax = plt.colorbar(sc, ticks=np.arange(0,10))
cax.set_ticklabels(["M0", "M1", "M2", "M3", "M4", "M5", "CTRL"])
plt.xlabel('tsne 1')
plt.ylabel('tsne 2')

In [None]:
fig.savefig("gene2.png")

In [None]:
f = open("../input/aml-top-genes/compile.txt", "r")
top_genes = f.read().split(",")

In [None]:
top_genes

In [None]:
new_df = df.loc[:, df.columns.isin(top_genes)]

In [None]:
new_df.head()

In [None]:
X = new_df.values
y = lbl.values

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)