In [None]:
# read in the all_data file  
import numpy as np
import pandas as pd
df = pd.read_csv('../data_for_image_based_model/fl_data.csv')

In [None]:
# Drop the lines with DMSO 
df = df[df.moa != 'dmso']
assert 'dmso' not in df.values

In [None]:
# shuffle the data 
df = df.sample(frac = 1, random_state = 1)

In [None]:
# A function for DMSO normalization    
dmso_stats_df = pd.read_csv('../data_for_image_based_model/dmso_stats.csv', header = [0,1], index_col = 0)
def dmso_normalization(im, dmso_mean, dmso_std):
    im_norm = (im.astype('float') - dmso_mean) / dmso_std
    return im_norm

In [None]:
# A transform function for resizing  
import albumentations as album
import cv2
image_size = 256
easy_transforms = album.Compose([album.Resize(image_size, image_size)])

In [None]:
base_dir = 'path/to/data' # this has to be set 

In [None]:
# A function for transfering image to numpy array 
def create_all_images(idx):
    row = df.iloc[idx]
    im = []
    for i in range(1,6):
        local_im = cv2.imread(base_dir + row.plate + '/' + row['C' + str(i)], -1)
        dmso_mean = dmso_stats_df[row.plate]['C' + str(i)]['m']
        dmso_std = dmso_stats_df[row.plate]['C' + str(i)]['std']
        local_im = dmso_normalization(local_im, dmso_mean, dmso_std)

        im.append(local_im)
    im = np.array(im).transpose(1, 2, 0).astype("float")
    im = np.array(easy_transforms(image = im)['image'])

    return im

In [None]:
# Write all the images into a big numpy array  
all_images = np.zeros((df.shape[0], 256, 256, 5), dtype = np.float32)
for f in range(df.shape[0]):
    all_images[f] = create_all_images(f)

In [None]:
# Save the big numpy array 
with open('all_images.npy', 'wb') as f:
    np.save(f, all_images)