In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import sys
import pandas as pd
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [2]:
num_classes = str(json.load(open('config.json'))['num_classes'])
dataset_file_name = 'human_dataset_' + num_classes + '.txt'
labels = pd.read_table('../Datasets/' + dataset_file_name)['class'].to_numpy()
dna_spectral_representation = np.load('../Saved Data/dna_spectral_representation.npy')

In [3]:
k = int(json.load(open('config.json'))['k'])
input_vector_size = np.power(4, k)

In [4]:
X_train_full, X_test, y_train_full, y_test = train_test_split(dna_spectral_representation, labels, test_size = 0.2,
                                                                random_state=20)

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size = 0.1,
                                                        random_state=20)
np.save('../Saved Data/Test Set/X_test.npy', X_test)
np.save('../Saved Data/Test Set/y_test.npy', y_test)

# Model : LeNet5 like Architecture CNN for Text Classification

In [5]:
model = keras.Sequential()

model.add(keras.layers.Conv1D(filters=30, kernel_size=(5), activation='relu', input_shape=(input_vector_size,1),padding="same"))
model.add(keras.layers.MaxPooling1D())

model.add(keras.layers.Conv1D(filters=15, kernel_size=(5), activation='relu', input_shape=(int(input_vector_size / 2),1),padding="same"))
model.add(keras.layers.MaxPooling1D())

model.add(keras.layers.Flatten())

model.add(keras.layers.Dense(units=256, activation='relu'))

model.add(keras.layers.Dense(units=128, activation='relu'))

model.add(keras.layers.Dense(units=64, activation='relu'))

model.add(keras.layers.Dense(units=int(num_classes), activation = 'softmax'))

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 1024, 30)          180       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 512, 30)          0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 512, 15)           2265      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 256, 15)          0         
 1D)                                                             
                                                                 
 flatten (Flatten)           (None, 3840)              0         
                                                                 
 dense (Dense)               (None, 256)               9

In [7]:
opt = keras.optimizers.SGD(learning_rate=0.045 ,momentum = 0.0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

In [8]:
earlyStop = keras.callbacks.EarlyStopping(monitor='loss', patience=1 , mode="min")
# class_weight = {0 : 0.121233,
#                 1 : 0.121918,
#                 2 : 0.079680,
#                 3 : 0.153425,
#                 4 : 0.162329,
#                 5 : 0.054795,
#                 6 : 0.306621}
# class_weight = class_weight,
history = model.fit(X_train, y_train, epochs=7 ,validation_data=(X_valid, y_valid),
                    batch_size = 32 , callbacks = [earlyStop])

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [9]:
# data = history.history
# pd.DataFrame(data).plot(figsize=(8, 6))
# plt.grid(True)
# plt.gca().set_ylim(0, 2)
# plt.show()

In [10]:
model.save("../Saved Data/Model/")
model.save("../Saved Data/Model Architecture/model.h5")



INFO:tensorflow:Assets written to: ../Saved Data/Model/assets


INFO:tensorflow:Assets written to: ../Saved Data/Model/assets
