In [1]:
import h5py
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.utils import HDF5Matrix

In [2]:
data_f = h5py.File('../../datasets/process_dataset.h5ad', 'r')

In [3]:
data_f['X'].shape

(333778, 34947)

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# load X and y
data_f = h5py.File('../../datasets/train_test_dataset.h5ad', 'r')

In [16]:
targets = pd.read_csv('../../datasets/MCA_BatchRemoved_Merge_dge_cellinfo.csv')


In [24]:
targets_labels = targets['louvain']
num_of_targets = len(pd.unique(targets_labels))

In [25]:
num_of_targets

104

In [4]:
data_f.keys()

<KeysViewHDF5 ['test_X', 'test_y', 'train_X', 'train_y']>

In [67]:
test_X = data_f['test_X']
test_y = data_f['test_y']
train_X = data_f['train_X']
train_y = data_f['train_y']

In [59]:
# test_X = HDF5Matrix('../../datasets/train_test_dataset.h5ad', 'test_X')
# test_y = HDF5Matrix('../../datasets/train_test_dataset.h5ad', 'test_y')
# train_X = HDF5Matrix('../../datasets/train_test_dataset.h5ad', 'train_X')
# train_y = HDF5Matrix('../../datasets/train_test_dataset.h5ad', 'train_y')

In [70]:
type(test_X[0,0])

numpy.float64

In [62]:
n_features = train_X.shape[1]

In [63]:
n_features

34947

In [111]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(1, activation = 'relu'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [112]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 200)               6989600   
_________________________________________________________________
dropout_20 (Dropout)         (None, 200)               0         
_________________________________________________________________
dropout_21 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_22 (Dropout)         (None, 100)               0         
_________________________________________________________________
dropout_23 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 1)                

In [113]:
class DataGenerator():
    def __init__(self, file_name, batch_size=1024, data_split=100):
        self.hf = h5py.File(file_name, 'r')
        y_all = self.hf['train_y'][:]
        # print(y_all)
        # print(self.hf['train_X'].shape)
        self.total_len = len(y_all)
        self.batch_size = batch_size
        self.idx = 0
        self.len_segment = int(self.total_len / data_split)
        self.cur_seg_idx = 0
        self.x_cur = self.hf['train_X'][:self.len_segment, :]
        self.y_cur = self.hf['train_y'][:self.len_segment]

    def next_seg(self):
        self.cur_seg_idx += self.len_segment
        self.x_cur = self.hf['train_X'][self.cur_seg_idx:self.cur_seg_idx+self.len_segment, :]
        self.y_cur = self.hf['train_y'][self.cur_seg_idx:self.cur_seg_idx+self.len_segment]
        
    def generate(self):
        while 1:
            idx = self.idx
            if idx >= self.len_segment:
                self.next_seg()
                idx = 0
            
            if idx + self.batch_size >= self.len_segment:
                batch_x = self.x_cur[idx:, :]
                batch_y = self.y_cur[idx:]
            else:
                batch_x = self.x_cur[idx:(idx + self.batch_size), :]
                batch_y = self.y_cur[idx:(idx + self.batch_size)]
            self.idx = idx + self.batch_size
            yield batch_x, batch_y

In [114]:
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()

In [117]:
train_X.shape

(267019, 34947)

In [118]:
267019/1024

260.7607421875

In [120]:
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch=261)

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [122]:
model.save('superct_model_original.hdf5')

In [125]:
history.model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f46e4e27a60>

In [131]:
purposed_model = tf.keras.models.model_from_config('v1_model.h5')

TypeError: string indices must be integers

In [132]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation = 'relu'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [133]:
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch=261)

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [134]:
model.save('superct_model_modified_v1.hdf5')

In [135]:
loaded_model = tf.keras.models.load_model('superct_model_modified_v1.hdf5')

In [137]:
loaded_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 200)               6989600   
_________________________________________________________________
dropout_24 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 1)                 101       
Total params: 7,009,801
Trainable params: 7,009,801
Non-trainable params: 0
_________________________________________________________________


In [139]:
loaded_model = tf.keras.models.load_model('v1_model.h5')

AttributeError: 'list' object has no attribute 'items'

In [142]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(num_of_targets, activation = 'relu'))
model.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [143]:
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch=261)

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [144]:
model.save('superct_model_modified_v2.hdf5')

In [155]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(1, activation = 'relu'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

In [156]:
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [157]:
model.save('superct_model_modified_v3.hdf5')

In [158]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation = 'relu'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)
model.save('superct_model_modified_v4.hdf5')

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [159]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(1, activation = 'relu'))
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)
model.save('superct_model_modified_v5.hdf5')

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [160]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dropout(0.4))
model.add(Dense(num_of_targets, activation = 'relu'))
model.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)
model.save('superct_model_modified_v6.hdf5')

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [161]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(num_of_targets, activation = 'relu'))
model.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)
model.save('superct_model_modified_v7.hdf5')

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350


In [163]:
model = Sequential()
model.add(Dense(200, input_dim = n_features, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(num_of_targets, activation = 'relu'))
model.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
training_generator = DataGenerator('../../datasets/train_test_dataset.h5ad', batch_size=1024).generate()
history = model.fit_generator(generator=training_generator, epochs=350, steps_per_epoch = 261)
model.save('superct_model_modified_v8.hdf5')

ccuracy: 0.0000e+00
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350
