In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

import keras.metrics as metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load dataset 

full_dataset = pd.read_csv("/Users/nickpark/Desktop/codon-data/codon_usage.csv", low_memory=False)
dataset = full_dataset[full_dataset["DNAtype"].eq(0) | full_dataset["DNAtype"].eq(1) | full_dataset["DNAtype"].eq(2)]

# Remove irrelevant columns

dataset = dataset.drop(["Kingdom", "SpeciesID", "Ncodons", "SpeciesName"], axis=1)

# Remove weird rows

dataset = dataset.apply(pd.to_numeric, errors='coerce')
dataset = dataset[~dataset.applymap(np.isnan).any(1)]
dataset = dataset.to_numpy(dtype='float64')

dataset

array([[0.000e+00, 1.654e-02, 1.203e-02, ..., 2.510e-03, 5.000e-04,
        0.000e+00],
       [0.000e+00, 2.714e-02, 1.357e-02, ..., 2.710e-03, 6.800e-04,
        0.000e+00],
       [0.000e+00, 1.974e-02, 2.180e-02, ..., 3.910e-03, 0.000e+00,
        1.440e-03],
       ...,
       [1.000e+00, 1.423e-02, 3.321e-02, ..., 3.560e-03, 1.190e-03,
        2.017e-02],
       [0.000e+00, 1.757e-02, 2.028e-02, ..., 9.900e-04, 7.900e-04,
        1.560e-03],
       [1.000e+00, 1.778e-02, 3.724e-02, ..., 1.560e-03, 1.140e-03,
        2.161e-02]])

In [3]:
X = dataset[:,1:]
y = dataset[:,0].astype('int')
print(X.shape)
print(y.shape)

(12980, 64)
(12980,)


In [4]:
# Multi-class classification with Keras

# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)

# Convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

# Define baseline model
def baseline_model():
	# create model
	model = Sequential()
	model.add(Dense(15, input_dim=64, activation='relu'))
	model.add(Dense(3, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=100, batch_size=5, verbose=2)
kfold = KFold(n_splits=10, shuffle=True)

In [5]:
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/100
2337/2337 - 3s - loss: 0.3581 - accuracy: 0.8850
Epoch 2/100
2337/2337 - 2s - loss: 0.1271 - accuracy: 0.9548
Epoch 3/100
2337/2337 - 2s - loss: 0.0823 - accuracy: 0.9745
Epoch 4/100
2337/2337 - 2s - loss: 0.0608 - accuracy: 0.9811
Epoch 5/100
2337/2337 - 2s - loss: 0.0488 - accuracy: 0.9862
Epoch 6/100
2337/2337 - 2s - loss: 0.0415 - accuracy: 0.9884
Epoch 7/100
2337/2337 - 2s - loss: 0.0370 - accuracy: 0.9888
Epoch 8/100
2337/2337 - 2s - loss: 0.0343 - accuracy: 0.9896
Epoch 9/100
2337/2337 - 2s - loss: 0.0322 - accuracy: 0.9902
Epoch 10/100
2337/2337 - 2s - loss: 0.0310 - accuracy: 0.9905
Epoch 11/100
2337/2337 - 2s - loss: 0.0290 - accuracy: 0.9915
Epoch 12/100
2337/2337 - 2s - loss: 0.0283 - accuracy: 0.9913
Epoch 13/100
2337/2337 - 2s - loss: 0.0274 - accuracy: 0.9921
Epoch 14/100
2337/2337 - 2s - loss: 0.0265 - accuracy: 0.9920
Epoch 15/100
2337/2337 - 2s - loss: 0.0259 - accuracy: 0.9927
Epoch 16/100
2337/2337 - 2s - loss: 0.0251 - accuracy: 0.9934
Epoch 17/100
2337

Epoch 33/100
2337/2337 - 2s - loss: 0.0197 - accuracy: 0.9940
Epoch 34/100
2337/2337 - 2s - loss: 0.0196 - accuracy: 0.9942
Epoch 35/100
2337/2337 - 2s - loss: 0.0188 - accuracy: 0.9946
Epoch 36/100
2337/2337 - 2s - loss: 0.0189 - accuracy: 0.9948
Epoch 37/100
2337/2337 - 2s - loss: 0.0189 - accuracy: 0.9944
Epoch 38/100
2337/2337 - 2s - loss: 0.0187 - accuracy: 0.9945
Epoch 39/100
2337/2337 - 2s - loss: 0.0183 - accuracy: 0.9946
Epoch 40/100
2337/2337 - 2s - loss: 0.0180 - accuracy: 0.9953
Epoch 41/100
2337/2337 - 2s - loss: 0.0176 - accuracy: 0.9947
Epoch 42/100
2337/2337 - 2s - loss: 0.0176 - accuracy: 0.9948
Epoch 43/100
2337/2337 - 2s - loss: 0.0177 - accuracy: 0.9949
Epoch 44/100
2337/2337 - 2s - loss: 0.0175 - accuracy: 0.9949
Epoch 45/100
2337/2337 - 2s - loss: 0.0167 - accuracy: 0.9954
Epoch 46/100
2337/2337 - 2s - loss: 0.0170 - accuracy: 0.9950
Epoch 47/100
2337/2337 - 2s - loss: 0.0167 - accuracy: 0.9953
Epoch 48/100
2337/2337 - 2s - loss: 0.0166 - accuracy: 0.9949
Epoch 49

Epoch 65/100
2337/2337 - 2s - loss: 0.0145 - accuracy: 0.9959
Epoch 66/100
2337/2337 - 2s - loss: 0.0144 - accuracy: 0.9961
Epoch 67/100
2337/2337 - 2s - loss: 0.0142 - accuracy: 0.9961
Epoch 68/100
2337/2337 - 2s - loss: 0.0137 - accuracy: 0.9965
Epoch 69/100
2337/2337 - 2s - loss: 0.0137 - accuracy: 0.9961
Epoch 70/100
2337/2337 - 2s - loss: 0.0139 - accuracy: 0.9960
Epoch 71/100
2337/2337 - 2s - loss: 0.0137 - accuracy: 0.9961
Epoch 72/100
2337/2337 - 2s - loss: 0.0136 - accuracy: 0.9958
Epoch 73/100
2337/2337 - 2s - loss: 0.0133 - accuracy: 0.9963
Epoch 74/100
2337/2337 - 2s - loss: 0.0135 - accuracy: 0.9957
Epoch 75/100
2337/2337 - 2s - loss: 0.0134 - accuracy: 0.9960
Epoch 76/100
2337/2337 - 2s - loss: 0.0130 - accuracy: 0.9962
Epoch 77/100
2337/2337 - 2s - loss: 0.0132 - accuracy: 0.9963
Epoch 78/100
2337/2337 - 2s - loss: 0.0130 - accuracy: 0.9961
Epoch 79/100
2337/2337 - 2s - loss: 0.0128 - accuracy: 0.9964
Epoch 80/100
2337/2337 - 2s - loss: 0.0130 - accuracy: 0.9962
Epoch 81

Epoch 97/100
2337/2337 - 2s - loss: 0.0126 - accuracy: 0.9959
Epoch 98/100
2337/2337 - 2s - loss: 0.0123 - accuracy: 0.9961
Epoch 99/100
2337/2337 - 2s - loss: 0.0123 - accuracy: 0.9966
Epoch 100/100
2337/2337 - 2s - loss: 0.0121 - accuracy: 0.9965
260/260 - 0s - loss: 0.0203 - accuracy: 0.9969
Epoch 1/100
2337/2337 - 2s - loss: 0.3670 - accuracy: 0.8783
Epoch 2/100
2337/2337 - 2s - loss: 0.1274 - accuracy: 0.9588
Epoch 3/100
2337/2337 - 2s - loss: 0.0812 - accuracy: 0.9759
Epoch 4/100
2337/2337 - 2s - loss: 0.0605 - accuracy: 0.9813
Epoch 5/100
2337/2337 - 3s - loss: 0.0490 - accuracy: 0.9852
Epoch 6/100
2337/2337 - 2s - loss: 0.0421 - accuracy: 0.9875
Epoch 7/100
2337/2337 - 2s - loss: 0.0382 - accuracy: 0.9880
Epoch 8/100
2337/2337 - 2s - loss: 0.0353 - accuracy: 0.9895
Epoch 9/100
2337/2337 - 3s - loss: 0.0333 - accuracy: 0.9900
Epoch 10/100
2337/2337 - 2s - loss: 0.0316 - accuracy: 0.9897
Epoch 11/100
2337/2337 - 2s - loss: 0.0298 - accuracy: 0.9909
Epoch 12/100
2337/2337 - 2s - l

KeyboardInterrupt: 