In [None]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [3]:
mutation_file = "path_to_cell_line_mutations/cell_line_mutations.txt"

drug_file = "path_drug_line_responses/drug_response.txt"

In [None]:
mutation_cells = pd.read_csv(mutation_file, sep='\t')
drug_responses = pd.read_csv(drug_file, sep='\t')

In [4]:
drug_responses['category'] = np.where(drug_responses['lnIC50'] <= 1, "low",
                                      np.where(drug_responses['lnIC50'] >= 4.2, "high", "medium"))

In [5]:
print(mutation_cells.T)

           Gene1  Gene2  Gene3  Gene4  Gene5  Gene6  Gene7  Gene8  Gene9  \
Sample1        0      1      1      0      1      1      0      0      0   
Sample10       1      0      1      1      1      1      0      0      0   
Sample100      0      1      0      0      0      1      1      0      1   
Sample11       0      0      0      0      1      0      1      1      0   
Sample12       0      1      0      1      0      1      0      1      0   
...          ...    ...    ...    ...    ...    ...    ...    ...    ...   
Sample95       0      1      0      0      0      0      0      1      1   
Sample96       1      1      0      0      0      1      1      1      1   
Sample97       0      1      1      0      0      0      1      0      1   
Sample98       1      1      1      1      1      1      1      1      1   
Sample99       1      0      0      0      0      0      0      1      1   

           Gene10  ...  Gene41  Gene42  Gene43  Gene44  Gene45  Gene46  \
Sample1      

In [6]:
drug_responses.head()

Unnamed: 0,sample,lnIC50,category
0,Sample1,-0.383397,low
1,Sample10,3.963216,medium
2,Sample100,0.783226,low
3,Sample11,4.138341,medium
4,Sample12,0.287961,low


In [7]:
print(drug_responses.shape)

(100, 3)


In [8]:
# encode the three possible drug categories in an integer frame
encoder = LabelEncoder()
encoder.fit(drug_responses['category'])
encoded_Y = encoder.transform(drug_responses['category'])
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [9]:
# create the sequential neural network model
# use two dense layers that correspond to the number of input dimensions
# use relu as the common activation function for the first layer and softmax for the second
# measure the model based on accuracy
def baseline_model():
    # initialize a sequential model and add layers to it
    model = Sequential()
    model.add(Dense(6, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
    return model

In [10]:
X_train, X_test, y_train, y_test = train_test_split(mutation_cells.T, dummy_y, test_size=0.25, random_state=1)
print('Training Dimensions: ', X_train.shape, y_train.shape)
print('Testing Dimensions:', X_test.shape, y_test.shape)

Training Dimensions:  (75, 50) (75, 3)
Testing Dimensions: (25, 50) (25, 3)


In [11]:
epochs_to_try = [10, 20, 50, 100, 200]
batch_size = [5, 10, 20, 30, 50, 100]
model_performance = []
for i in epochs_to_try: 
    for j in batch_size:
        first_model = baseline_model()
        first_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        first_model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
        _, accuracy = first_model.evaluate(X_test, y_test, verbose = 0)
        model_performance.append("Epochs:{}, Batch Size:{}, Accuracy:{}".format(i, j, accuracy*100))



In [12]:
print('\n'.join(model_performance))

Epochs:10, Batch Size:5, Accuracy:63.999998569488525
Epochs:10, Batch Size:10, Accuracy:72.00000286102295
Epochs:10, Batch Size:20, Accuracy:75.99999904632568
Epochs:10, Batch Size:30, Accuracy:68.00000071525574
Epochs:10, Batch Size:50, Accuracy:72.00000286102295
Epochs:10, Batch Size:100, Accuracy:72.00000286102295
Epochs:20, Batch Size:5, Accuracy:68.00000071525574
Epochs:20, Batch Size:10, Accuracy:63.999998569488525
Epochs:20, Batch Size:20, Accuracy:68.00000071525574
Epochs:20, Batch Size:30, Accuracy:72.00000286102295
Epochs:20, Batch Size:50, Accuracy:68.00000071525574
Epochs:20, Batch Size:100, Accuracy:72.00000286102295
Epochs:50, Batch Size:5, Accuracy:63.999998569488525
Epochs:50, Batch Size:10, Accuracy:72.00000286102295
Epochs:50, Batch Size:20, Accuracy:68.00000071525574
Epochs:50, Batch Size:30, Accuracy:68.00000071525574
Epochs:50, Batch Size:50, Accuracy:68.00000071525574
Epochs:50, Batch Size:100, Accuracy:68.00000071525574
Epochs:100, Batch Size:5, Accuracy:75.99999