In [1]:
import h5py
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

import os
import math

In [2]:
def create_superct_model(n_features):
  model = Sequential()
  model.add(Dense(800, input_dim = n_features, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(100, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='relu'))
  model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [3]:
def load_dataset_from_dir(file_dir):
    d_df = pd.read_csv(file_dir)
    X = d_df.iloc[:,1:-2]
    y = d_df['target_id']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

In [4]:
def dictionary_list (input_dic):
    return_str = ''
    for k, v in input_dic.items():
        return_str+=str(k)
        return_str+='\n\t'
        float_list = map(str, v) 
        return_str+='\n\t'.join(float_list)
        return_str+='\n'
        return_str+='\n'
    return return_str

In [5]:
def train_and_save(file_dir):
    print('start to process %s'%file_dir)
    X_train, X_test, y_train, y_test = load_dataset_from_dir(file_dir)
    batch_size = 128
    # round up the epches
    num_epoches = math.ceil(X_train.shape[0]/batch_size)
    model = create_superct_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=num_epoches, validation_data=(X_test, y_test))
    file_name = os.path.basename(file_dir)
    file_name = 'superct_v6_'+file_name
    history_name = file_name.replace('.csv', '.txt')
    output_model_name = file_name.replace('.csv', '.hdf5')
    with open(history_name, 'w') as f:
        f.write(file_name)
        f.write('\n')
        f.write('the number of observations: %d'%X_train.shape[0])
        f.write('\n')
        f.write('the number of features: %d'%X_train.shape[1])
        f.write('\n')
        f.write(dictionary_list(history.history))
    model.save(output_model_name)
    print('finished processing %s'%file_name)

In [10]:
 # processed_dge folder contains all new defined datasets
 # get all shapes from processed_dge files
for root, dirs, files in os.walk("/content/drive/MyDrive/Colab Notebooks/pre_processed_datasets", topdown=False):
  for name in files:
    file_dir = os.path.join(root, name)
    train_and_save(file_dir)

print('done')

start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/BoneMarrowcKit_1.csv
Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34
finished processing superct_v6_BoneMarrowcKit_1.csv
start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/NeonatalCalvaria_1.csv
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
finished processing 

KeyError: ignored

In [11]:
def create_superct_model(n_features):
  model = Sequential()
  model.add(Dense(1600, input_dim = n_features, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(800, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(100, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='relu'))
  model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [13]:
def train_and_save(file_dir):
    print('start to process %s'%file_dir)
    X_train, X_test, y_train, y_test = load_dataset_from_dir(file_dir)
    batch_size = 128
    # round up the epches
    num_epoches = math.ceil(X_train.shape[0]/batch_size)
    model = create_superct_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=num_epoches, validation_data=(X_test, y_test))
    file_name = os.path.basename(file_dir)
    file_name = 'superct_v7_'+file_name
    history_name = file_name.replace('.csv', '.txt')
    output_model_name = file_name.replace('.csv', '.hdf5')
    with open(history_name, 'w') as f:
        f.write(file_name)
        f.write('\n')
        f.write('the number of observations: %d'%X_train.shape[0])
        f.write('\n')
        f.write('the number of features: %d'%X_train.shape[1])
        f.write('\n')
        f.write(dictionary_list(history.history))
    model.save(output_model_name)
    print('finished processing %s'%file_name)

In [14]:
 # processed_dge folder contains all new defined datasets
 # get all shapes from processed_dge files
for root, dirs, files in os.walk("/content/drive/MyDrive/Colab Notebooks/pre_processed_datasets", topdown=False):
  for name in files:
    file_dir = os.path.join(root, name)
    train_and_save(file_dir)

print('done')

start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/BoneMarrowcKit_1.csv
Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34
finished processing superct_v7_BoneMarrowcKit_1.csv
start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/NeonatalCalvaria_1.csv
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
finished processing 

In [15]:
def create_superct_model(n_features):
  model = Sequential()
  model.add(Dense(3200, input_dim = n_features, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1600, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(800, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(100, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='relu'))
  model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [16]:
def train_and_save(file_dir):
    print('start to process %s'%file_dir)
    X_train, X_test, y_train, y_test = load_dataset_from_dir(file_dir)
    batch_size = 128
    # round up the epches
    num_epoches = math.ceil(X_train.shape[0]/batch_size)
    model = create_superct_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=num_epoches, validation_data=(X_test, y_test))
    file_name = os.path.basename(file_dir)
    file_name = 'superct_v8_'+file_name
    history_name = file_name.replace('.csv', '.txt')
    output_model_name = file_name.replace('.csv', '.hdf5')
    with open(history_name, 'w') as f:
        f.write(file_name)
        f.write('\n')
        f.write('the number of observations: %d'%X_train.shape[0])
        f.write('\n')
        f.write('the number of features: %d'%X_train.shape[1])
        f.write('\n')
        f.write(dictionary_list(history.history))
    model.save(output_model_name)
    print('finished processing %s'%file_name)

In [17]:
 # processed_dge folder contains all new defined datasets
 # get all shapes from processed_dge files
for root, dirs, files in os.walk("/content/drive/MyDrive/Colab Notebooks/pre_processed_datasets", topdown=False):
  for name in files:
    file_dir = os.path.join(root, name)
    train_and_save(file_dir)

print('done')

start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/BoneMarrowcKit_1.csv
Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34
finished processing superct_v8_BoneMarrowcKit_1.csv
start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/NeonatalCalvaria_1.csv
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
finished processing 

In [18]:
def create_superct_model(n_features):
  model = Sequential()
  model.add(Dense(6400, input_dim = n_features, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(3200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1600, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(800, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(100, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='relu'))
  model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [19]:
def train_and_save(file_dir):
    print('start to process %s'%file_dir)
    X_train, X_test, y_train, y_test = load_dataset_from_dir(file_dir)
    batch_size = 128
    # round up the epches
    num_epoches = math.ceil(X_train.shape[0]/batch_size)
    model = create_superct_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=num_epoches, validation_data=(X_test, y_test))
    file_name = os.path.basename(file_dir)
    file_name = 'superct_v9_'+file_name
    history_name = file_name.replace('.csv', '.txt')
    output_model_name = file_name.replace('.csv', '.hdf5')
    with open(history_name, 'w') as f:
        f.write(file_name)
        f.write('\n')
        f.write('the number of observations: %d'%X_train.shape[0])
        f.write('\n')
        f.write('the number of features: %d'%X_train.shape[1])
        f.write('\n')
        f.write(dictionary_list(history.history))
    model.save(output_model_name)
    print('finished processing %s'%file_name)

In [20]:
 # processed_dge folder contains all new defined datasets
 # get all shapes from processed_dge files
for root, dirs, files in os.walk("/content/drive/MyDrive/Colab Notebooks/pre_processed_datasets", topdown=False):
  for name in files:
    file_dir = os.path.join(root, name)
    train_and_save(file_dir)

print('done')

start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/BoneMarrowcKit_1.csv
Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34
finished processing superct_v9_BoneMarrowcKit_1.csv
start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/NeonatalCalvaria_1.csv
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
finished processing 

In [6]:
def create_superct_model(n_features):
  model = Sequential()
  model.add(Dense(12800, input_dim = n_features, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(6400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(3200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1600, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(800, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(400, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(200, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(100, activation='relu'))
  model.add(Dropout(0.4))
  model.add(Dropout(0.4))
  model.add(Dense(1, activation='relu'))
  model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [7]:
def train_and_save(file_dir):
    print('start to process %s'%file_dir)
    X_train, X_test, y_train, y_test = load_dataset_from_dir(file_dir)
    batch_size = 128
    # round up the epches
    num_epoches = math.ceil(X_train.shape[0]/batch_size)
    model = create_superct_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=num_epoches, validation_data=(X_test, y_test))
    file_name = os.path.basename(file_dir)
    file_name = 'superct_v10_'+file_name
    history_name = file_name.replace('.csv', '.txt')
    output_model_name = file_name.replace('.csv', '.hdf5')
    with open(history_name, 'w') as f:
        f.write(file_name)
        f.write('\n')
        f.write('the number of observations: %d'%X_train.shape[0])
        f.write('\n')
        f.write('the number of features: %d'%X_train.shape[1])
        f.write('\n')
        f.write(dictionary_list(history.history))
    model.save(output_model_name)
    print('finished processing %s'%file_name)

In [8]:
 # processed_dge folder contains all new defined datasets
 # get all shapes from processed_dge files
for root, dirs, files in os.walk("/content/drive/MyDrive/Colab Notebooks/pre_processed_datasets", topdown=False):
  for name in files:
    file_dir = os.path.join(root, name)
    train_and_save(file_dir)

print('done')

start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/BoneMarrowcKit_1.csv
Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34
finished processing superct_v10_BoneMarrowcKit_1.csv
start to process /content/drive/MyDrive/Colab Notebooks/pre_processed_datasets/NeonatalCalvaria_1.csv
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epoch 20/28
Epoch 21/28
Epoch 22/28
Epoch 23/28
Epoch 24/28
Epoch 25/28
Epoch 26/28
Epoch 27/28
Epoch 28/28
finished processing