In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv2D, Conv3D
from tensorflow.keras.layers import MaxPooling2D, MaxPooling3D
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.losses import binary_crossentropy
from sklearn.metrics import f1_score
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import load_model

from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image 
import seaborn as sns
import os
import re
import glob
import cv2
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
import tqdm
from numpy import loadtxt
from os import *
from sklearn.utils import class_weight
import shutil

Using TensorFlow backend.


In [2]:
def atoi(text):
    return int(text) if text.isdigit() else text
def natural_keys(text):
    return [atoi(c) for c in re.split('(\d+)', text)]

In [3]:
def draw_confusion_matrix(true,preds):
    conf_matx = confusion_matrix(true, preds)
    sns.heatmap(conf_matx, annot=True,annot_kws={"size": 12},fmt='g', cbar=False, cmap=plt.cm.Blues) #'viridis'
    #plt.savefig('/home/jovyan/conf_matrix.png')
    plt.show()
    
    return conf_matx

In [4]:
def plot_history(model_history, model_name):
    fig = plt.figure(figsize=(15,5), facecolor='w')
    ax = fig.add_subplot(121)
    ax.plot(model_history.history['loss'])
    ax.plot(model_history.history['val_loss'])
    ax.set(title=model_name + ': Model loss', ylabel='Loss', xlabel='Epoch')
    ax.legend(['Train', 'Val'], loc='upper left')
    ax = fig.add_subplot(122)
    ax.plot(model_history.history['accuracy'])
    ax.plot(model_history.history['val_accuracy'])
    ax.set(title=model_name + ': Model Accuracy; test='+ str(np.round(model_history.history['val_accuracy'][-1], 3)),
           ylabel='Accuracy', xlabel='Epoch')
    ax.legend(['Train', 'Val'], loc='upper left')
    #plt.savefig('/home/jovyan/curve.png')
    plt.show()
    
    return fig

In [5]:
def resize(x):
    rescaled = []

    for i in x:

        scale_percent = 140 # percent of original size
        width = int(i.shape[1] / (scale_percent / 100))
        height = int(i.shape[0] / (scale_percent / 100))
        dim = (width, height)
        resized = cv2.resize(i, dim, interpolation = cv2.INTER_LANCZOS4)
        rescaled.append(resized)

    x_orig = np.reshape( rescaled, (len( rescaled), resized.shape[1], resized.shape[1], 3))

    return x_orig

In [6]:
def loadImages(path_data):
    
    
    
    pa_adr = '/home/jovyan/DATA_MASTER_PROJECT/Check_DIFF_T0_T97/ADR_cropped/'
    
    pa_control = '/home/jovyan/DATA_MASTER_PROJECT/Check_DIFF_T0_T97/CONTROL_cropped/'
    
    pa_hrh = '/home/jovyan/DATA_MASTER_PROJECT/Check_DIFF_T0_T97/HRH_cropped/'
    
    image_list = []
    
    
       


    for filename in sorted(path_data, key=natural_keys): 
        
        if 'adr' in filename:
            
            im=cv2.imread(pa_adr + filename)

            imarray = np.array(im)

            image_list.append(imarray)
            
        if 'control' in filename:
            
            im=cv2.imread(pa_control + filename)

            imarray = np.array(im)

            image_list.append(imarray)
            
        if 'hrh' in filename:
            
            im=cv2.imread(pa_hrh + filename)

            imarray = np.array(im)

            image_list.append(imarray)



    x_orig = np.reshape(image_list, (len(image_list), 90, 90, 3))

    return x_orig

In [7]:
def return_count(x):
    name_wel = []
    for i in sorted(x, key = natural_keys):
        name_wel.append(i.split('_')[0])

    z = sorted(list(set(name_wel)))
    r = list(range(len(z)))

    num = []
    for iz in range(len(z)):
        count = 0
        for i in sorted(x, key=natural_keys):
            if z[iz] in i:
                count += 1
        num.append(count)
    return list(zip(z, r, num))

In [8]:
def creat_label(y, indirizo):
    labels = []
    for ix, _ in enumerate(y):
        
        if y[ix][0] == 'adr':
        
            labels.append([[y[ix][0],0]] * y[ix][2])
        
        if y[ix][0] == 'hrh':
            
            labels.append([[y[ix][0],1]] * y[ix][2])
            
        if y[ix][0] == 'control':
            
            labels.append([[y[ix][0],1]] * y[ix][2])
    
    ler = [i for sub in labels for i in sub ]

    df = pd.DataFrame(ler, columns = ['Type', 'Category'])
    df = df.sort_values(by=['Type'])
    df.to_csv(indirizo, sep=',',index=False)
    return 'Done'

In [69]:
p = '/home/jovyan/save_model_final/HRH_ADR/orp.h5'
m4 = load_model(p)

for i, layer in enumerate(m4.layers):
    layer._name = 'layer_' + str(i)


base_model = Model(inputs=m4.input, outputs=m4.get_layer('layer_5').output)




train_path = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/features_train'
os.mkdir(train_path)

test_path = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/features_test'
os.mkdir(test_path)

val_path = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/features_validation'
os.mkdir(val_path)



n_well = p.split('/')[5].split('.h5')[0]

full_path = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/{}'.format(n_well)
os.mkdir(full_path)

time_points = list(map(str, range(1,97,3)))

new_time = []
for i in time_points:
    r = '_' + i + '.'
    new_time.append(r)


path_test = '/home/jovyan/DATA_MASTER_PROJECT/Check_DIFF_T0_T97/HRH_cropped/'

# NAME OF THE WELLS CORRESPONDING TO THE FRUG THAT YOU WANT IN THE TEST SET 

wells_drug = ['D8', 'B2'] 

tes = []

for _,_, filenames in os.walk(path_test):
    
    for filename in sorted(filenames, key = natural_keys):
    
        for w in wells_drug:
            for t in new_time:
                if '{}'.format(w) in filename and '{}tiff'.format(t) in filename:
                    tes.append(filename)

# TRAIN & VALIDATION

In [70]:
groups_list = ['ADR', 'HRH']#, 'CONTROL']

fileds_of_view = ['1','2','3','4']

field_train, field_val = train_test_split(fileds_of_view, test_size=0.2, random_state=int(np.random.randint(1,1000,1)))


tra = []

val = []

group_compounds = []

for group in tqdm.tqdm(groups_list):
    
    pa = '/home/jovyan/DATA_MASTER_PROJECT/Check_DIFF_T0_T97/{}_cropped/'.format(group)
    
    for _,_, filenames in os.walk(pa):
    
        for filename in sorted(filenames, key = natural_keys):

            for t in new_time:

                if '_{}-'.format(wells_drug[0]) not in filename and '_{}-'.format(wells_drug[1]) not in filename and '{}tiff'.format(t) in filename:

                    group_compounds.append(filename)
        
        
            
            
    

for i in group_compounds:
    
    for f in field_train:
        if '-{}_'.format(f) in i:
            tra.append(i)
            
            
    for v in field_val:
        if '-{}_'.format(v) in i:
            val.append(i)
            


  0%|          | 0/2 [00:00<?, ?it/s][A
 50%|█████     | 1/2 [00:05<00:05,  5.89s/it][A
100%|██████████| 2/2 [00:11<00:00,  5.85s/it][A


# LSTM TRAIN & VALIDATIO


In [71]:
data_name = [tra,tes,val]

feat_name = ['train', 'test', 'validation']

for index_name, _ in enumerate(data_name):

    path =  data_name[index_name]

    name_well = []

    for i in path:
        name_well.append(i.split('_id')[0])

    wells = list(set(name_well))
    wells

    for w in tqdm.tqdm(wells):

        time = []


        for filename in sorted(path, key = natural_keys):
            if w in filename: #PAY ATTENTION ID THE IMAGE IS A TIFF OR PNG IMAGE #########
                time.append(filename)

        data_id = {}
        n_id = []
        w_n = []

        for i in time:
            t = i.split('_id_')[1].split('time_')[0]
            f = i.split('_id_')[0].split('time_')[0]
            n_id.append(t)
            w_n.append(f)

        id_cell = set(n_id)


        for ix, i in enumerate(sorted(id_cell, key = natural_keys)):

            id_name = []
            dict_1 = {}

            for t in time:
                if 'id_{}'.format(i) in t:
                    id_name.append(t)

            d = {'id':id_name}
            data = pd.DataFrame(d)

            dict_1[ix]=data 
            data_id.update(dict_1) 

        delete = [i for i, j in data_id.items() if len(j) < 32] # 9 or the length of time span you are traning on 
        for i in delete : del data_id[i]

        len_id = [i for i, j in data_id.items()]

        for le in len_id:    


            e = pd.DataFrame(data_id[le])

            coords = e.values.tolist()
            id_cells = []
            for i in coords:
                for j in i:
                    id_cells.append(j)

            x_orig = loadImages(id_cells)
            x_orig = resize(x_orig)

            x_orig = preprocess_input(x_orig)
            output = base_model.predict(x_orig)
            np.save('/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/features_{}/features_well_{}_id_{}.npy'.format(feat_name[index_name],w_n[0], le), output)


  0%|          | 0/54 [00:00<?, ?it/s][A
  2%|▏         | 1/54 [00:01<01:21,  1.55s/it][A
  4%|▎         | 2/54 [00:05<01:54,  2.21s/it][A
  6%|▌         | 3/54 [00:08<02:13,  2.62s/it][A
  7%|▋         | 4/54 [00:11<02:09,  2.59s/it][A
  9%|▉         | 5/54 [00:13<02:00,  2.46s/it][A
 11%|█         | 6/54 [00:15<01:46,  2.21s/it][A
 13%|█▎        | 7/54 [00:18<01:52,  2.40s/it][A
 15%|█▍        | 8/54 [00:20<01:57,  2.56s/it][A
 17%|█▋        | 9/54 [00:23<01:55,  2.56s/it][A
 19%|█▊        | 10/54 [00:26<01:54,  2.60s/it][A
 20%|██        | 11/54 [00:26<01:22,  1.93s/it][A
 22%|██▏       | 12/54 [00:29<01:34,  2.25s/it][A
 24%|██▍       | 13/54 [00:32<01:41,  2.48s/it][A
 26%|██▌       | 14/54 [00:35<01:48,  2.71s/it][A
 28%|██▊       | 15/54 [00:39<01:57,  3.02s/it][A
 30%|██▉       | 16/54 [00:42<01:56,  3.07s/it][A
 31%|███▏      | 17/54 [00:46<01:56,  3.15s/it][A
 33%|███▎      | 18/54 [00:48<01:48,  3.01s/it][A
 35%|███▌      | 19/54 [00:53<02:01,  3.46s/it]

# LABELS

In [72]:
controll = ['B3', 'D5','D6','F10']
adr = ['C6','F11', 'D11', 'G4', 'F2', 'G10', 'G5', 'B10', 'G3', 'B8']
hrh = ['E4', 'G6', 'G8', 'D10', 'E7', 'B7', 'E10', 'B11', 'D8', 'B2']

n_data = ['validation', 'train', 'test']

indi = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/'

for ix_name, _ in enumerate(n_data):

    directory = '/home/jovyan/DATA_MASTER_PROJECT/LSTM/FINAL_LSTM_LEAVE_ONE_OUT/features_{}/'.format(n_data[ix_name])

    for i in os.listdir(directory):
        for c in controll:
            if c in i:
                os.rename(directory + i, directory + 'control_' + i)


        for a in adr:
            if a in i:
                os.rename(directory + i, directory + 'adr_' + i)

        for h in hrh:
            if h in i:
                os.rename(directory + i, directory + 'hrh_' + i)

    feat = []
    for dire, dir_name, filnames in os.walk(directory):
        for f_name in filnames:
            feat.append(f_name)

    tr = return_count(feat)
    creat_label(tr, indi + '/' + 'lstm_{}.csv'.format(n_data[ix_name]))

tot_path = [train_path, test_path, val_path]

for i in tot_path:
    shutil.move(i, full_path)
    
csv_f = ['lstm_train.csv', 'lstm_validation.csv', 'lstm_test.csv']

for i in csv_f:
    shutil.move(indi+i, full_path)