<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import numpy as np
import pandas as pd
%load_ext autoreload
%autoreload 2

batch_size = 128
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000,28,28,1)
x_test = x_test.reshape(10000,28,28,1)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


def remove_numbers(x,y,list_of_numbers_to_keep):
    y_df = pd.DataFrame(y,columns=['Label'])
    y_df = y_df[y_df['Label'].apply(str).isin(list_of_numbers_to_keep)]
    x = x[y_df.index,:,:,:]
    y = y_df.to_numpy()
    return x,y.flatten()

num_to_keep = ['0','1']
num_classes = len(num_to_keep)
x_train,y_train = remove_numbers(x_train,y_train,num_to_keep)
x_test,y_test = remove_numbers(x_test,y_test,num_to_keep)

# # convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
    

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [101]:
def baseline_model(name,num_classes=2):
    model = Sequential(name=name)
    model.add(Conv2D(2, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=(28,28,1)))
    model.add(MaxPooling2D(pool_size=(7, 7)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

baseline_model('test').summary()

## Original model before pruning
# model = Sequential()
# model.add(Conv2D(32, kernel_size=(3, 3),
#                  activation='relu',
#                  input_shape=(28,28,1)))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(10, activation='softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

Model: "test"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 2)         20        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 3, 3, 2)           0         
_________________________________________________________________
dropout (Dropout)            (None, 3, 3, 2)           0         
_________________________________________________________________
flatten (Flatten)            (None, 18)                0         
_________________________________________________________________
dense (Dense)                (None, 2)                 38        
Total params: 58
Trainable params: 58
Non-trainable params: 0
_________________________________________________________________


In [102]:
import random
rand_numbers = random.sample(range(0, 1000), 100)
rand_numbers
x_test2 = x_test[rand_numbers,:,:,:]
y_test2 = y_test[rand_numbers,:]

In [103]:
from utils import *

In [110]:
train_model_iteratively(baseline_model,x_train,y_train,x_test2,y_test2,'./data/tmp_cnn',epochs=5)

Train: (12665, 28, 28, 1) (12665, 2)
Test: (100, 28, 28, 1) (100, 2)
model.0
Saving model.0 epoch 1 ./data/tmp_cnn/model.0-1-2.68.hdf5
Saving model.0 epoch 4 ./data/tmp_cnn/model.0-4-0.21.hdf5
model.1
Saving model.1 epoch 1 ./data/tmp_cnn/model.1-1-13.67.hdf5
Saving model.1 epoch 4 ./data/tmp_cnn/model.1-4-1.34.hdf5
model.2
Saving model.2 epoch 1 ./data/tmp_cnn/model.2-1-3.41.hdf5
Saving model.2 epoch 4 ./data/tmp_cnn/model.2-4-2.08.hdf5
model.3
Saving model.3 epoch 1 ./data/tmp_cnn/model.3-1-2.39.hdf5
Saving model.3 epoch 4 ./data/tmp_cnn/model.3-4-0.27.hdf5
model.4
Saving model.4 epoch 1 ./data/tmp_cnn/model.4-1-2.91.hdf5
Saving model.4 epoch 4 ./data/tmp_cnn/model.4-4-1.26.hdf5
model.5
Saving model.5 epoch 1 ./data/tmp_cnn/model.5-1-1.37.hdf5
Saving model.5 epoch 4 ./data/tmp_cnn/model.5-4-0.60.hdf5
model.6
Saving model.6 epoch 1 ./data/tmp_cnn/model.6-1-9.54.hdf5
Saving model.6 epoch 4 ./data/tmp_cnn/model.6-4-2.02.hdf5
model.7
Saving model.7 epoch 1 ./data/tmp_cnn/model.7-1-10.50.

In [None]:
yDF = pd.DataFrame(y_test2,columns=['Actual Label 0','Actual Label 1'])
yDF['point']=range(100)
y_pred = pd.read_csv('./data/tmp_cnn/predictions.csv')
y_pred.rename({y_pred.columns[0]:'point'},axis=1,inplace=True)
y_pred2 = pd.merge(yDF,y_pred,left_on='point',right_on='point')
y_pred2.to_csv('./data/tmp_cnn/predictions2.csv')

In [111]:
model_weights=get_model_weights('./data/tmp_cnn')

model.10-1-0.57.hdf5
model.16-1-2.21.hdf5
model.6-4-2.02.hdf5
model.12-1-0.55.hdf5
model.18-1-0.72.hdf5
model.14-1-0.12.hdf5
model.1-1-1.90.hdf5
model.4-4-1.26.hdf5
model.15-1-1.98.hdf5
model.16-4-1.25.hdf5
model.0-1-3.27.hdf5
model.5-1-1.37.hdf5
model.15-1-2.50.hdf5
model.15-4-0.62.hdf5
model.19-1-0.10.hdf5
model.19-1-2.40.hdf5
model.4-1-2.91.hdf5
model.10-4-2.14.hdf5
model.14-1-3.28.hdf5
model.2-1-1.53.hdf5
model.13-1-2.46.hdf5
model.4-1-1.80.hdf5
model.7-4-0.50.hdf5
model.8-4-0.40.hdf5
model.12-4-0.52.hdf5
model.14-4-1.22.hdf5
model.17-1-1.88.hdf5
model.3-1-5.16.hdf5
model.8-1-1.57.hdf5
model.19-4-0.46.hdf5
model.2-1-3.41.hdf5
model.6-1-9.54.hdf5
model.1-1-13.67.hdf5
model.11-1-0.77.hdf5
model.6-1-1.59.hdf5
model.9-1-0.29.hdf5
model.17-4-0.44.hdf5
model.1-4-1.34.hdf5
model.13-1-5.40.hdf5
model.10-1-12.64.hdf5
model.5-1-0.73.hdf5
model.18-4-0.42.hdf5
model.12-1-3.10.hdf5
model.0-1-2.68.hdf5
model.11-4-0.12.hdf5
model.17-1-1.57.hdf5
model.2-4-2.08.hdf5
model.16-1-2.74.hdf5
model.0-4-0

In [112]:
model_weights[list(model_weights.keys())[0]]['conv_kernel_0'].flatten().shape

(18,)

In [113]:
model_weights[list(model_weights.keys())[0]]['conv_kernel_0'].shape

(3, 3, 1, 2)

In [115]:
convert_weight_dict_to_dataframe(model_weights)['epoch'].min()

'1'

In [90]:
df = pd.read_csv('/Users/meslami/Downloads/experiment.ginkgo.42587_ReadCountMatrix_preCAD_FPKM_transposed.csv')

In [92]:
df2 = df[(df['QC_gcorr_BOOL']==True)&(df['QC_nmap_BOOL']==True)]

In [96]:
df3 = pd.DataFrame(df2[['Timepoint','IPTG','Strain']].value_counts(),columns=['cnt'])

In [98]:
df3[df3['cnt']>1]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cnt
Timepoint,IPTG,Strain,Unnamed: 3_level_1
23.0,0.000,EcN_NOTgate_AmeR,4
24.5,0.001,EcN_NOTgate_AmeR,4
23.0,0.001,EcN_SensorArrayOnly,4
23.0,0.001,EcN_NOTgate_LitR,4
36.0,0.000,EcN_NOTgate_BM3R1,4
...,...,...,...
37.5,0.000,EcN_NOTgate_AmeR,2
36.0,0.001,EcN_SensorArrayOnly,2
36.0,0.001,EcN_NOTgate_BetI,2
24.5,0.001,EcN_NOTgate_HlyIIR,2


In [99]:
df[['Timepoint','IPTG','Strain']].value_counts()

Timepoint  IPTG   Strain                    
37.5       0.001  Escherichia_coli_Nissle_WT    4
                  EcN_SensorArrayOnly           4
24.5       0.000  EcN_NOTgate_AmtR              4
                  EcN_NOTgate_BM3R1             4
                  EcN_NOTgate_BetI              4
                                               ..
36.0       0.001  EcN_NOTgate_LitR              4
                  EcN_NOTgate_PhlF              4
                  EcN_NOTgate_PsrA              4
                  EcN_NOTgate_QacR              4
23.0       0.000  EcN_NOTgate_AmeR              4
Length: 96, dtype: int64