In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

import keras.metrics as metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load dataset 

full_dataset = pd.read_csv("/Users/nickpark/Desktop/codon-data/codon_usage.csv", low_memory=False)
dataset = full_dataset[full_dataset["DNAtype"].eq(0) | full_dataset["DNAtype"].eq(1) | full_dataset["DNAtype"].eq(2)]

# Remove irrelevant columns

dataset = dataset.drop(["Kingdom", "SpeciesID", "Ncodons", "SpeciesName"], axis=1)

# Remove weird rows

dataset = dataset.apply(pd.to_numeric, errors='coerce')
dataset = dataset[~dataset.applymap(np.isnan).any(1)]
dataset = dataset.to_numpy(dtype='float64')

dataset

array([[0.000e+00, 1.654e-02, 1.203e-02, ..., 2.510e-03, 5.000e-04,
        0.000e+00],
       [0.000e+00, 2.714e-02, 1.357e-02, ..., 2.710e-03, 6.800e-04,
        0.000e+00],
       [0.000e+00, 1.974e-02, 2.180e-02, ..., 3.910e-03, 0.000e+00,
        1.440e-03],
       ...,
       [1.000e+00, 1.423e-02, 3.321e-02, ..., 3.560e-03, 1.190e-03,
        2.017e-02],
       [0.000e+00, 1.757e-02, 2.028e-02, ..., 9.900e-04, 7.900e-04,
        1.560e-03],
       [1.000e+00, 1.778e-02, 3.724e-02, ..., 1.560e-03, 1.140e-03,
        2.161e-02]])

In [3]:
X = dataset[:,1:]
y = dataset[:,0].astype('int')
print(X.shape)
print(y.shape)

(12980, 64)
(12980,)


In [4]:
# Multi-class classification with Keras

# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)

# Convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

# Define baseline model
def baseline_model():
	# create model
	model = Sequential()
	model.add(Dense(15, input_dim=64, activation='relu'))
	model.add(Dense(3, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=100, batch_size=5, verbose=2)
kfold = KFold(n_splits=5, shuffle=True)

accuracies = cross_val_score(estimator, X, dummy_y, cv=kfold, verbose=1)
print("Accuracy:", accuracies.mean())

rocs = cross_val_score(estimator, X, dummy_y, cv=kfold, scoring='roc_auc_ovr', verbose=1)
print("AUROC:", rocs.mean())

macros = cross_val_score(estimator, X, dummy_y, cv=kfold, scoring='f1_macro', verbose=1)
print("Macro F1:", macros.mean())


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 1/100
2077/2077 - 2s - loss: 0.4197 - accuracy: 0.8658
Epoch 2/100
2077/2077 - 2s - loss: 0.1522 - accuracy: 0.9463
Epoch 3/100
2077/2077 - 1s - loss: 0.1005 - accuracy: 0.9687
Epoch 4/100
2077/2077 - 2s - loss: 0.0752 - accuracy: 0.9783
Epoch 5/100
2077/2077 - 2s - loss: 0.0605 - accuracy: 0.9827
Epoch 6/100
2077/2077 - 1s - loss: 0.0509 - accuracy: 0.9864
Epoch 7/100
2077/2077 - 2s - loss: 0.0449 - accuracy: 0.9871
Epoch 8/100
2077/2077 - 2s - loss: 0.0411 - accuracy: 0.9883
Epoch 9/100
2077/2077 - 2s - loss: 0.0380 - accuracy: 0.9888
Epoch 10/100
2077/2077 - 2s - loss: 0.0358 - accuracy: 0.9896
Epoch 11/100
2077/2077 - 2s - loss: 0.0341 - accuracy: 0.9900
Epoch 12/100
2077/2077 - 2s - loss: 0.0328 - accuracy: 0.9899
Epoch 13/100
2077/2077 - 2s - loss: 0.0316 - accuracy: 0.9911
Epoch 14/100
2077/2077 - 2s - loss: 0.0306 - accuracy: 0.9909
Epoch 15/100
2077/2077 - 2s - loss: 0.0299 - accuracy: 0.9907
Epoch 16/100
2077/2077 - 2s - loss: 0.0290 - accuracy: 0.9914
Epoch 17/100
2077

Epoch 33/100
2077/2077 - 3s - loss: 0.0213 - accuracy: 0.9936
Epoch 34/100
2077/2077 - 3s - loss: 0.0213 - accuracy: 0.9936
Epoch 35/100
2077/2077 - 3s - loss: 0.0211 - accuracy: 0.9937
Epoch 36/100
2077/2077 - 2s - loss: 0.0200 - accuracy: 0.9940
Epoch 37/100
2077/2077 - 2s - loss: 0.0205 - accuracy: 0.9941
Epoch 38/100
2077/2077 - 2s - loss: 0.0202 - accuracy: 0.9938
Epoch 39/100
2077/2077 - 2s - loss: 0.0202 - accuracy: 0.9939
Epoch 40/100
2077/2077 - 2s - loss: 0.0201 - accuracy: 0.9942
Epoch 41/100
2077/2077 - 2s - loss: 0.0192 - accuracy: 0.9950
Epoch 42/100
2077/2077 - 2s - loss: 0.0196 - accuracy: 0.9942
Epoch 43/100
2077/2077 - 2s - loss: 0.0192 - accuracy: 0.9946
Epoch 44/100
2077/2077 - 2s - loss: 0.0188 - accuracy: 0.9949
Epoch 45/100
2077/2077 - 3s - loss: 0.0184 - accuracy: 0.9947
Epoch 46/100
2077/2077 - 2s - loss: 0.0184 - accuracy: 0.9946
Epoch 47/100
2077/2077 - 2s - loss: 0.0183 - accuracy: 0.9948
Epoch 48/100
2077/2077 - 2s - loss: 0.0182 - accuracy: 0.9948
Epoch 49

Epoch 65/100
2077/2077 - 2s - loss: 0.0133 - accuracy: 0.9960
Epoch 66/100
2077/2077 - 2s - loss: 0.0129 - accuracy: 0.9964
Epoch 67/100
2077/2077 - 2s - loss: 0.0126 - accuracy: 0.9961
Epoch 68/100
2077/2077 - 2s - loss: 0.0130 - accuracy: 0.9963
Epoch 69/100
2077/2077 - 2s - loss: 0.0127 - accuracy: 0.9963
Epoch 70/100
2077/2077 - 2s - loss: 0.0125 - accuracy: 0.9960
Epoch 71/100
2077/2077 - 2s - loss: 0.0127 - accuracy: 0.9965
Epoch 72/100
2077/2077 - 2s - loss: 0.0125 - accuracy: 0.9961
Epoch 73/100
2077/2077 - 2s - loss: 0.0125 - accuracy: 0.9961
Epoch 74/100
2077/2077 - 2s - loss: 0.0122 - accuracy: 0.9964
Epoch 75/100
2077/2077 - 2s - loss: 0.0121 - accuracy: 0.9965
Epoch 76/100
2077/2077 - 2s - loss: 0.0121 - accuracy: 0.9964
Epoch 77/100
2077/2077 - 2s - loss: 0.0120 - accuracy: 0.9966
Epoch 78/100
2077/2077 - 2s - loss: 0.0118 - accuracy: 0.9964
Epoch 79/100
2077/2077 - 2s - loss: 0.0118 - accuracy: 0.9968
Epoch 80/100
2077/2077 - 2s - loss: 0.0114 - accuracy: 0.9968
Epoch 81

Epoch 97/100
2077/2077 - 2s - loss: 0.0107 - accuracy: 0.9974
Epoch 98/100
2077/2077 - 2s - loss: 0.0109 - accuracy: 0.9965
Epoch 99/100
2077/2077 - 2s - loss: 0.0105 - accuracy: 0.9967
Epoch 100/100
2077/2077 - 2s - loss: 0.0107 - accuracy: 0.9970
520/520 - 0s - loss: 0.0390 - accuracy: 0.9900
Epoch 1/100
2077/2077 - 2s - loss: 0.4031 - accuracy: 0.8622
Epoch 2/100
2077/2077 - 3s - loss: 0.1326 - accuracy: 0.9560
Epoch 3/100
2077/2077 - 2s - loss: 0.0868 - accuracy: 0.9738
Epoch 4/100
2077/2077 - 2s - loss: 0.0656 - accuracy: 0.9804
Epoch 5/100
2077/2077 - 2s - loss: 0.0531 - accuracy: 0.9841
Epoch 6/100
2077/2077 - 2s - loss: 0.0454 - accuracy: 0.9871
Epoch 7/100
2077/2077 - 2s - loss: 0.0403 - accuracy: 0.9879
Epoch 8/100
2077/2077 - 2s - loss: 0.0367 - accuracy: 0.9885
Epoch 9/100
2077/2077 - 2s - loss: 0.0345 - accuracy: 0.9902
Epoch 10/100
2077/2077 - 2s - loss: 0.0326 - accuracy: 0.9901
Epoch 11/100
2077/2077 - 2s - loss: 0.0309 - accuracy: 0.9902
Epoch 12/100
2077/2077 - 2s - l

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 16.1min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


2077/2077 - 2s - loss: 0.3782 - accuracy: 0.8748
Epoch 2/100
2077/2077 - 2s - loss: 0.1256 - accuracy: 0.9602
Epoch 3/100
2077/2077 - 2s - loss: 0.0807 - accuracy: 0.9757
Epoch 4/100
2077/2077 - 2s - loss: 0.0602 - accuracy: 0.9819
Epoch 5/100
2077/2077 - 2s - loss: 0.0493 - accuracy: 0.9868
Epoch 6/100
2077/2077 - 2s - loss: 0.0429 - accuracy: 0.9879
Epoch 7/100
2077/2077 - 2s - loss: 0.0385 - accuracy: 0.9889
Epoch 8/100
2077/2077 - 2s - loss: 0.0352 - accuracy: 0.9898
Epoch 9/100
2077/2077 - 2s - loss: 0.0338 - accuracy: 0.9899
Epoch 10/100
2077/2077 - 2s - loss: 0.0319 - accuracy: 0.9896
Epoch 11/100
2077/2077 - 2s - loss: 0.0308 - accuracy: 0.9911
Epoch 12/100
2077/2077 - 2s - loss: 0.0293 - accuracy: 0.9908
Epoch 13/100
2077/2077 - 2s - loss: 0.0285 - accuracy: 0.9909
Epoch 14/100
2077/2077 - 2s - loss: 0.0275 - accuracy: 0.9913
Epoch 15/100
2077/2077 - 2s - loss: 0.0271 - accuracy: 0.9923
Epoch 16/100
2077/2077 - 2s - loss: 0.0263 - accuracy: 0.9920
Epoch 17/100
2077/2077 - 2s -

2077/2077 - 2s - loss: 0.0189 - accuracy: 0.9948
Epoch 29/100
2077/2077 - 2s - loss: 0.0190 - accuracy: 0.9947
Epoch 30/100
2077/2077 - 2s - loss: 0.0190 - accuracy: 0.9948
Epoch 31/100
2077/2077 - 2s - loss: 0.0185 - accuracy: 0.9947
Epoch 32/100
2077/2077 - 2s - loss: 0.0186 - accuracy: 0.9948
Epoch 33/100
2077/2077 - 2s - loss: 0.0179 - accuracy: 0.9946
Epoch 34/100
2077/2077 - 2s - loss: 0.0177 - accuracy: 0.9957
Epoch 35/100
2077/2077 - 2s - loss: 0.0174 - accuracy: 0.9955
Epoch 36/100
2077/2077 - 2s - loss: 0.0172 - accuracy: 0.9953
Epoch 37/100
2077/2077 - 2s - loss: 0.0169 - accuracy: 0.9951
Epoch 38/100
2077/2077 - 2s - loss: 0.0168 - accuracy: 0.9951
Epoch 39/100
2077/2077 - 2s - loss: 0.0165 - accuracy: 0.9954
Epoch 40/100
2077/2077 - 2s - loss: 0.0164 - accuracy: 0.9961
Epoch 41/100
2077/2077 - 2s - loss: 0.0161 - accuracy: 0.9957
Epoch 42/100
2077/2077 - 2s - loss: 0.0158 - accuracy: 0.9951
Epoch 43/100
2077/2077 - 2s - loss: 0.0157 - accuracy: 0.9957
Epoch 44/100
2077/207

Epoch 61/100
2077/2077 - 2s - loss: 0.0167 - accuracy: 0.9957
Epoch 62/100
2077/2077 - 2s - loss: 0.0166 - accuracy: 0.9957
Epoch 63/100
2077/2077 - 2s - loss: 0.0161 - accuracy: 0.9956
Epoch 64/100
2077/2077 - 2s - loss: 0.0161 - accuracy: 0.9956
Epoch 65/100
2077/2077 - 2s - loss: 0.0164 - accuracy: 0.9955
Epoch 66/100
2077/2077 - 2s - loss: 0.0166 - accuracy: 0.9960
Epoch 67/100
2077/2077 - 2s - loss: 0.0162 - accuracy: 0.9961
Epoch 68/100
2077/2077 - 2s - loss: 0.0159 - accuracy: 0.9955
Epoch 69/100
2077/2077 - 2s - loss: 0.0160 - accuracy: 0.9960
Epoch 70/100
2077/2077 - 2s - loss: 0.0157 - accuracy: 0.9960
Epoch 71/100
2077/2077 - 2s - loss: 0.0160 - accuracy: 0.9960
Epoch 72/100
2077/2077 - 2s - loss: 0.0157 - accuracy: 0.9953
Epoch 73/100
2077/2077 - 2s - loss: 0.0154 - accuracy: 0.9962
Epoch 74/100
2077/2077 - 2s - loss: 0.0158 - accuracy: 0.9960
Epoch 75/100
2077/2077 - 2s - loss: 0.0153 - accuracy: 0.9956
Epoch 76/100
2077/2077 - 2s - loss: 0.0151 - accuracy: 0.9959
Epoch 77

2077/2077 - 2s - loss: 0.0137 - accuracy: 0.9961
Epoch 94/100
2077/2077 - 2s - loss: 0.0139 - accuracy: 0.9960
Epoch 95/100
2077/2077 - 2s - loss: 0.0140 - accuracy: 0.9961
Epoch 96/100
2077/2077 - 2s - loss: 0.0143 - accuracy: 0.9955
Epoch 97/100
2077/2077 - 2s - loss: 0.0139 - accuracy: 0.9957
Epoch 98/100
2077/2077 - 2s - loss: 0.0139 - accuracy: 0.9957
Epoch 99/100
2077/2077 - 2s - loss: 0.0139 - accuracy: 0.9960
Epoch 100/100
2077/2077 - 2s - loss: 0.0137 - accuracy: 0.9961
520/520 - 0s
Epoch 1/100
2077/2077 - 2s - loss: 0.3661 - accuracy: 0.8803
Epoch 2/100
2077/2077 - 2s - loss: 0.1265 - accuracy: 0.9593
Epoch 3/100
2077/2077 - 2s - loss: 0.0845 - accuracy: 0.9732
Epoch 4/100
2077/2077 - 2s - loss: 0.0636 - accuracy: 0.9810
Epoch 5/100
2077/2077 - 2s - loss: 0.0513 - accuracy: 0.9854
Epoch 6/100
2077/2077 - 2s - loss: 0.0441 - accuracy: 0.9874
Epoch 7/100
2077/2077 - 2s - loss: 0.0389 - accuracy: 0.9886
Epoch 8/100
2077/2077 - 2s - loss: 0.0356 - accuracy: 0.9892
Epoch 9/100
207

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 21.7min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


2077/2077 - 2s - loss: 0.3867 - accuracy: 0.8691
Epoch 2/100
2077/2077 - 2s - loss: 0.1340 - accuracy: 0.9543
Epoch 3/100
2077/2077 - 2s - loss: 0.0871 - accuracy: 0.9730
Epoch 4/100
2077/2077 - 2s - loss: 0.0653 - accuracy: 0.9808
Epoch 5/100
2077/2077 - 2s - loss: 0.0529 - accuracy: 0.9843
Epoch 6/100
2077/2077 - 2s - loss: 0.0449 - accuracy: 0.9869
Epoch 7/100
2077/2077 - 2s - loss: 0.0402 - accuracy: 0.9889
Epoch 8/100
2077/2077 - 2s - loss: 0.0370 - accuracy: 0.9888
Epoch 9/100
2077/2077 - 2s - loss: 0.0343 - accuracy: 0.9895
Epoch 10/100
2077/2077 - 2s - loss: 0.0327 - accuracy: 0.9897
Epoch 11/100
2077/2077 - 2s - loss: 0.0311 - accuracy: 0.9902
Epoch 12/100
2077/2077 - 2s - loss: 0.0298 - accuracy: 0.9907
Epoch 13/100
2077/2077 - 2s - loss: 0.0288 - accuracy: 0.9914
Epoch 14/100
2077/2077 - 2s - loss: 0.0279 - accuracy: 0.9918
Epoch 15/100
2077/2077 - 2s - loss: 0.0270 - accuracy: 0.9925
Epoch 16/100
2077/2077 - 2s - loss: 0.0261 - accuracy: 0.9930
Epoch 17/100
2077/2077 - 2s -

ValueError: Classification metrics can't handle a mix of multilabel-indicator and multiclass targets

In [None]:
print("Accuracy:", accuracies.mean())
print("AUROC:", rocs.mean())
print("Macro F1:", macros.mean())