In [None]:
from google.colab import drive 
drive.mount('/content/gdrive/') 

!mkdir "/content/train_data/"
!unzip "/content/gdrive/MyDrive/GOTCHA/training_07_30.zip" -d /content/train_data &>/dev/null &

Mounted at /content/gdrive/


In [None]:

from keras import backend as K
from keras.layers import Input, Dense, Flatten, Activation, Dropout, Bidirectional, Permute, multiply ,Layer
from keras.layers.recurrent import SimpleRNN
from keras.callbacks import CSVLogger
from keras.models import Sequential, Model, load_model
from keras.applications.inception_v3 import InceptionV3
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

import os
import time
import numpy as np
import glob
import cv2
from keras.preprocessing.image import img_to_array
from PIL import Image

import datetime

from keras.applications.inception_v3 import preprocess_input
from keras.callbacks import EarlyStopping


In [None]:
NUM_EPOCHS = 500
VERBOSE = 1
HIDDEN_UNITS = 3

input_data_dir_path = "/content/train_data/training"

In [None]:
def CNN_feature_extractor(input_data_dir_path, output_feature_dir = ""):
  cnn_model = InceptionV3(include_top = False, weights = 'imagenet' )

  y_samples = []
  x_samples = []


  for f in os.listdir(input_data_dir_path):
    file_path = input_data_dir_path + "/" + f
    if not os.path.exists(file_path + "/extracted_features"):
      os.makedirs(file_path + "/extracted_features")
      output_feature_dir = file_path + "/extracted_features"
    for png_path in glob.glob(file_path + "/father/" + "*.png"):

      info = png_path.split("/")
      print('Extracting features from : ', info[-1])
      
      features = []

      temp = [cv2.imread("/".join(info[0:5] + ["proband"] + [info[-1]])), 
              cv2.imread(png_path) ,
              cv2.imread("/".join(info[0:5] + ["mother"] + [info[-1]]))]

      for image in temp:
        input = cv2.resize(image, (224,224))
        input = img_to_array(input)
        input = np.expand_dims(input, axis = 0)
        input = preprocess_input(input, 'channels_first')
        feature = cnn_model.predict(input).ravel()
        features.append(feature)
      

      unscaled_features = np.array(features)

      np.save(output_feature_dir + "/" + info[-1][:-4] + ".npy", unscaled_features)

      x_samples.append(output_feature_dir + "/" + info[-1][:-4] + ".npy")
      y_samples.append(int((info[-1].split("_")[1]).split(".")[1]) - int((info[-1].split("_")[2]).split(".")[1]))
    return x_samples, y_samples

In [None]:
def attention_block(inputs, time_steps):
    a = Permute((2, 1))(inputs)
    a = Dense(time_steps, activation='softmax')(a)
    a_probs = Permute((2, 1), name='attention_vec')(a) ## transpose 후 softmax를 빠져나와 다시 한번 더 transpose를 해줌으로서, 중요한 부분에만 가중치가 가도록 하는 역할.
    output_attention_mul = multiply([inputs, a_probs], name='attention_mul') ## input과 attention probability를 곱함.

    return output_attention_mul ## attention module 을 통과한 것을 내보냄.

In [None]:
def generate_batch(x_samples, y_samples, batch_size):
    num_batches = len(x_samples) // batch_size

    while True:
        for batchIdx in range(0, num_batches):
            start = batchIdx * batch_size
            end = (batchIdx + 1) * batch_size
            x_data = []
            for k in range(start, end):
                x = np.load(x_samples[k])
                indiv = x.shape[0]
                if indiv > 3:
                    x = x[0:3, :]
                    x_data.append(x)
                elif indiv < 3:
                    temp = np.zeros(shape=(3, x.shape[1]))
                    temp[0:indiv, :] = x
                    x_data.append(temp)
                else:
                    x_data.append(x)

            yield np.array(x_data), y_samples[start:end]

In [None]:
class Classifier(object):
  def __init__(self, model_file = None):
    self.num_input_tokens = None
    self.nb_classes = None
    if model_file is None : self.model = None
    else: self.model = load_model(model_file)



  def cnn_attention_lstm(self):
    inputs = Input(shape=(3, self.num_input_tokens,))
    attention_inputs = attention_block(inputs, 3)
    lstm_out = Bidirectional(SimpleRNN(HIDDEN_UNITS, return_sequences=False))(attention_inputs) ## LSTM --> simple RNN
    x = Dense(512, activation='relu')(lstm_out)
    x = Dropout(0.5)(x)
    x = Dense(self.nb_classes, activation='softmax')(x)
    model = Model([inputs], x)
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])
    print(model.summary())
    return model
  
  def fit(self, input_path, output_path, data_set_name = '' , test_size = 0.3, attention = 'cnn_attention_lstm', do_feature_extraction = False):
      batch_size = 128

      cnn_model = InceptionV3(include_top=False, weights='imagenet')

      x_samples, y_samples = CNN_feature_extractor(input_data_dir_path)
      
      labels = dict()
      for y in y_samples:
        if y not in labels:
            labels[y] = len(labels)

      self.num_input_tokens = np.load(x_samples[0]).shape[1]

      self.nb_classes = 64
      y_samples = np_utils.to_categorical(y_samples, self.nb_classes)
        
      config = dict()
      config['labels'] = labels
      config['nb_classes'] = self.nb_classes
      config['num_input_tokens'] = self.num_input_tokens

      np.save(os.path.join(output_path, 'config'), config)


      model = self.cnn_attention_lstm()
      csv_logger = CSVLogger(os.path.join(output_path, 'cnn_attention_lstm.log'), append=True,
                                  separator=';')
      
      Xtrain, Xtest, Ytrain, Ytest = train_test_split(x_samples, y_samples, test_size=test_size,
                                                        random_state=None)
      
      train_gen = generate_batch(Xtrain, Ytrain, batch_size)
      test_gen = generate_batch(Xtest, Ytest, batch_size)

      train_num_batches = len(Xtrain) // batch_size
      test_num_batches = len(Xtest) // batch_size

      history = model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches,
                              epochs=NUM_EPOCHS,
                              verbose=1, validation_data=test_gen, validation_steps=test_num_batches,
                              callbacks=[csv_logger])

      model_file_path = os.path.join(output_path, attention.replace('cnn', str(datetime.datetime.now())) + '.h5')
      model.save(model_file_path)
      accu = history.history['val_acc'][-1]
      print('cnn-{}, attention-{}: accuracy-{}'.format(str(datetime.datetime.now()), attention, accu))

      return accu

In [None]:
classifier = Classifier()
classifier.fit(input_path="/content/train_data/training", output_path=".", data_set_name="")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Extracting features from :  chr17_s.7359874_vp.7359849.png
Extracting features from :  chr8_s.90605018_vp.90605010.png
Extracting features from :  chr3_s.116169906_vp.116169882.png
Extracting features from :  chr13_s.55044485_vp.55044458.png
Extracting features from :  chr9_s.126685679_vp.126685645.png
Extracting features from :  chr13_s.55044490_vp.55044458.png
Extracting features from :  chr13_s.72874125_vp.72874115.png
Extracting features from :  chr12_s.31094768_vp.31094717.png
Extracting features from :  chr4_s.39471580_vp.39471517.png
Extracting features from :  chr13_s.72874159_vp.72874115.png
Extracting features from :  chr13_s.55044460_vp.55044458.png
Extracting features from :  chr1_s.13317947_vp.13317898.png
Extracting features from :  chr9_s.126685698_vp.126685645.png
Extracting features from :  chr14_s.28801171_vp.28801139



Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78



0.01171875