In [None]:
import numpy as np
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from PIL import Image
from keras import utils
sys.path.append('rxrx1-utils')
import rxrx.io as rio

%matplotlib inline

In [None]:
def parse_all_train(train_csv_path='', RGB=False, num_samples=36515):
    
    #read in train CSV file
    train_df = pd.read_csv(train_csv_path)
    
    #get targets to write out
    y_train = train_df['sirna'].values
    y_train = utils.to_categorical(y_train)
    print("y_train shape: ", y_train.shape)
    
    #get df values
    dataset = 'train'
    experiments = train_df['experiment'].values
    plates = train_df['plate'].values
    wells = train_df['well'].values
    sites = np.array([1,2])
    channels=np.array([1,2,3,4,5,6])
    
    x_id = []  #list to compile x IDs
    y_id = []  #list to compile y IDs
    
    for site in sites:
        for i in tqdm_notebook(range(num_samples)):
            site_img = np.empty((512, 512, 6), dtype=np.float16)
            for channel in channels:
                path = f'../data/{dataset}/{experiments[i]}/Plate{plates[i]}/{wells[i]}_s{site}_w{channel}.png'
                site_img[:, :, channel-1] = Image.open(path)
            
            if RGB == True:
                site_img = np.asarray(rio.convert_tensor_to_rgb(site_img, vmax=255), dtype=np.float16)
            
            #normalize
            site_img /= 255
            
            #write out the image
            np.save(f'../data/train_parsed_RGB/x_{experiments[i]}_{plates[i]}_{wells[i]}_s{site}', site_img, allow_pickle=True)
            np.save(f'../data/train_parsed_RGB/y_{experiments[i]}_{plates[i]}_{wells[i]}_s{site}', y_train[i], allow_pickle=True)
            
            x_id.append(f'x_{experiments[i]}_{plates[i]}_{wells[i]}_s{site}')
            y_id.append(f'y_{experiments[i]}_{plates[i]}_{wells[i]}_s{site}')
    
    return x_id, y_id

In [None]:
x_id, y_id = parse_all_train(train_csv_path='../data/train.csv', RGB=True, num_samples=1000)

np.save('../data/x_id_RGB', x_id, allow_pickle=True)
np.save('../data/y_id_RGB', y_id, allow_pickle=True)