In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
import scipy
import glob
import sklearn 
from sklearn.model_selection import train_test_split
from keras import optimizers
from keras.layers import Input, Activation, Dense, LeakyReLU
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [38]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
            print(e)

# UNFILTERED

In [39]:
# Data import, wt and mutant
accuracy = 84
importance = 25

wt_unfiltered =glob.glob('wildtype*.lccdata')

# filtered wt LCC data import
wt_un_var_names = []

for window in range(3,24):
    wt_un_var_names.append('wt_'+str(window))

wt_un_var_names.append('wt_25')
    
for var, file in zip(wt_un_var_names, wt_unfiltered):
    globals()[var] = pd.read_csv(file,sep='\t').drop(columns='Unnamed: 0')

# filtered mutant LCC data import

D132H_unfiltered =glob.glob('myc*.lccdata')

D132H_un_var_names = []

for window in range(3,24):
    D132H_un_var_names.append('D132H_'+str(window))
    
D132H_un_var_names.append('D132H_25')

    
for var, file in zip(D132H_un_var_names, D132H_unfiltered):
    globals()[var] = pd.read_csv(file,sep='\t').drop(columns='Unnamed: 0')

In [40]:
# Concateneate wt and mutant dataframes

wt_unf = pd.concat([wt_3, wt_4, wt_5, wt_6, wt_7, wt_8, wt_9, 
                    wt_10, wt_11, wt_12, wt_13, wt_14, wt_15, 
                    wt_16, wt_17, wt_18, wt_19, wt_20, wt_21, 
                    wt_22, wt_23, wt_25], axis = 1)
    
D132H_unf = pd.concat([D132H_3, D132H_4, D132H_5, D132H_6, D132H_7, 
                       D132H_8, D132H_9, D132H_10 D132H_11, D132H_12, 
                       D132H_13, D132H_14, D132H_15, D132H_16, D132H_17, 
                       D132H_18, D132H_19, D132H_20, D132H_21, D132H_22, 
                       D132H_23, D132H_25], axis = 1)

colnames = [*range(0,246)]
colnames
wt_unf.columns = colnames
D132H_unf.columns = colnames


In [41]:
# Data pre processing

def labels(wt, mutant):
    
    wt_label = np.zeros(len(wt)) # Set wt labels to 0
    
    mutant_label = np.ones(len(mutant))
    
    # Concatenate data frames and label arrays

    X_train_full = pd.concat([wt, mutant])
    y_train_full = np.concatenate((wt_label, mutant_label))

    return X_train_full, y_train_full

In [42]:
# Data pre processing

def preprocessing(wt, mutant):
    
    wt_label = np.zeros(len(wt)) # Set wt labels to 0
    
    mutant_label = np.ones(len(mutant))
    
    # Concatenate data frames and label arrays

    X_train_full = pd.concat([wt.reset_index(), mutant.reset_index()])
    y_train_full = np.concatenate((wt_label, mutant_label))
    
    # Separate training and validation sets and print relevant shapes
    
    X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, stratify=y_train_full, test_size=0.2)
    
    print('Window size:', str(window))
    print(X_train.shape)
    print(X_valid.shape)
    print(y_train.shape)
    print(y_valid.shape)
    
    return X_train, X_valid, y_train, y_valid

In [43]:
X_train_unf, X_valid_unf, y_train_unf, y_valid_unf = preprocessing(wt_unf, D132H_unf)

Window size: 23
(64000, 247)
(16000, 247)
(64000,)
(16000,)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


In [47]:
def get_ae(train_data, LeReLU_alpha=0.01):
    
    #Input layer
    input_layer = Input(shape=(train_data.shape[1]), name='ae_input')
    
    encoder = Dense(256, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e1')(input_layer)
    encoder = Dense(128, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e2')(encoder)
    encoder = Dense(64, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e3')(encoder)
    encoder = Dense(32, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e4')(encoder)
    encoder = Dense(16, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e5')(encoder)
    encoder = Dense(8, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_e6')(encoder)

    encoded = Dense(2, activation=LeakyReLU(alpha=LeReLU_alpha), name='ae_latent')(encoder)
    
    decoder = Dense(8, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d1')(encoded)
    decoder = Dense(16, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d2')(decoder)
    decoder = Dense(32, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d3')(decoder)
    decoder = Dense(64, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d4')(decoder)
    decoder = Dense(128, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d5')(decoder)
    decoder = Dense(256, activation=LeakyReLU(alpha=LeReLU_alpha), name='dense_d6')(decoder)

    output_layer = Dense(train_data.shape[1], activation=LeakyReLU(alpha=LeReLU_alpha), name='ae_output')(decoder)
    
    model = Model(input_layer, output_layer)

    return model   

In [48]:
autoencoder = get_ae(X_train_unf)

In [49]:
autoencoder.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ae_input (InputLayer)       [(None, 247)]             0         
                                                                 
 dense_e1 (Dense)            (None, 256)               63488     
                                                                 
 dense_e2 (Dense)            (None, 128)               32896     
                                                                 
 dense_e3 (Dense)            (None, 64)                8256      
                                                                 
 dense_e4 (Dense)            (None, 32)                2080      
                                                                 
 dense_e5 (Dense)            (None, 16)                528       
                                                                 
 dense_e6 (Dense)            (None, 8)                 136 

In [50]:
# Compile the model
autoencoder.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate = 0.005))

In [51]:
X_valid.shape

(16000, 247)

In [52]:
history = autoencoder.fit(X_train_unf, X_train_unf, batch_size = 1280, 
                              epochs = 10, validation_data = (X_valid_unf, X_valid_unf))
name = "unFILT_a87_i50_SAE_complex1_0.2_lr5_10e5_"
    # convert history object to dataframe and plot rates
training_history = pd.DataFrame(history.history)
plt.plot (training_history);
file_name_0 = name + "_training_history" + str(counts)
training_history.to_pickle(file_name_0)
file_name_1 = name + str(1) + "_#1.png"
plt.savefig(file_name_1, dpi=300)
plt.clf()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<Figure size 432x288 with 0 Axes>

In [63]:
X_valid.shape

(16000, 247)

In [74]:
a=np.array([X_valid.iloc[3]])

In [75]:
a.shape

(1, 247)

In [68]:
dr_model = tf.keras.models.Model(inputs  = autoencoder.get_layer('ae_input').input, 
                                 outputs = autoencoder.get_layer('ae_latent').output)
dr_model.summary()
    
    # put the validation data through current latent layer model
x = []
y = []
z = []
for i in range(16):
    z.append(y_valid_unf[i])
    op = dr_model.predict(np.array([X_valid_unf.iloc[i]]))
    x.append(op[0][0])
    y.append(op[0][1])

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ae_input (InputLayer)       [(None, 247)]             0         
                                                                 
 dense_e1 (Dense)            (None, 256)               63488     
                                                                 
 dense_e2 (Dense)            (None, 128)               32896     
                                                                 
 dense_e3 (Dense)            (None, 64)                8256      
                                                                 
 dense_e4 (Dense)            (None, 32)                2080      
                                                                 
 dense_e5 (Dense)            (None, 16)                528       
                                                                 
 dense_e6 (Dense)            (None, 8)                 136 

In [71]:
    df = pd.DataFrame()
    df['x'] = x
    df['y'] = y
    df['z'] = ["trajectory-" + str(k) for k in z]
 
    plt.figure(figsize = (8, 6));
    fig = sns.scatterplot(x = 'x', y='y', hue='z', data=df, s=10)
    file_name_2 = name + str(1) + "_#2.png"
    fig.figure.savefig(file_name_2, dpi = 300)
    plt.clf()

<Figure size 576x432 with 0 Axes>

In [None]:
%%capture
plt.clf()
for counts in range (0,10000): #this determines the number of epoch sets
    history = autoencoder.fit(X_train_unf, X_train_unf, batch_size = 1280, 
                              epochs = 10000, validation_data = (X_valid_unf, X_valid_unf))
    name = "unFILT_a87_i50_SAE_complex1_0.2_lr5_10e5_"
    # convert history object to dataframe and plot rates
    training_history = pd.DataFrame(history.history)
    plt.plot (training_history);
    file_name_0 = name + "_training_history" + str(counts)
    training_history.to_pickle(file_name_0)
    file_name_1 = name + str(counts) + "_#1.png"
    plt.savefig(file_name_1, dpi=300)
    plt.clf()
    
    # read in latent layer
    dr_model = tf.keras.models.Model(inputs  = autoencoder.get_layer('ae_input').input, 
                                     outputs = autoencoder.get_layer('ae_latent').output)
    dr_model.summary()
    
    # put the validation data through current latent layer model
    x = []
    y = []
    z = []
    for i in range(16000):
        z.append(y_valid_unf[i])
        op = dr_model.predict(np.array([X_valid_unf.iloc[i]]))
        x.append(op[0][0])
        y.append(op[0][1])

    df = pd.DataFrame()
    df['x'] = x
    df['y'] = y
    df['z'] = ["trajectory-" + str(k) for k in z]
 
    plt.figure(figsize = (8, 6));
    fig = sns.scatterplot(x = 'x', y='y', hue='z', data=df, s=10)
    file_name_2 = name + str(counts) + "_#2.png"
    fig.figure.savefig(file_name_2, dpi = 300)
    plt.clf()
    
    #
    file_name_3 = str(counts)
    df.to_pickle(file_name_3 )

    file_name = 'models/saved_model_unFILT_' + str(counts)
    autoencoder.save(file_name)