In [1]:
# import sys
# sys.path.insert(0, '..')
# from ClassifyDashboard.mClassifier import ClassifyDashboard
import tensorflow as tf
import keras_preprocessing.image as keras_img
import numpy as np
import glob
import tensorflow_addons as tfa
import pandas as pd
import json
import ifcb
from PIL import Image
from tqdm import tqdm
import os
import datetime as dt

In [2]:
# %load_ext line_profiler

In [9]:
class ClassifyDashboard:
    def __init__(self):
        self.ifcb = "Tina"
        self.baseDir =  "/u/pdaniel/ifcb-scripts/"
        self.model = self.load_model()
        self.classes = self.load_class_labels()


    def load_model(self):
        """ Load Model off of google drive """
        model_dir = "../classifier/ifcb-xception/"
        return tf.keras.models.load_model(model_dir)

    
    def load_class_labels(self):
        """ Load dict of class lables """
        label_dir = "../classifier/xception-class-label.json"
        with open(label_dir) as json_file:
            return json.load(json_file)
    
    
    def generate_image_list(self):
        """Generate a list of ROI files to read image data from"""
        fnames = glob.glob("/opt/ifcb-data/sc-wharf/{}/*.roi".format(self.folder))
        return fnames

    
    def prep_image(self, image):
        """Load and prep images for model, reshape and normalize rgb to greyscale"""
        target_size=(224,224)
        img = keras_img.img_to_array(Image.fromarray(image).resize(target_size))
        img /= 255
        img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
        return img

    
    def build_image_stack(self, fname):
        """ Given a URL, get the zip file and unzip the images into a images stack for the model to read """
        with ifcb.open_raw(fname) as roi_data:
            array_index = 0
            roi_names = []
            img_stack = np.empty(shape=(len(roi_data.images),224,224,3))
            for roi_num, img_data in roi_data.images.items():
                img_stack[array_index,:,:,:] = self.prep_image(img_data)
                array_index += 1
                roi_names.append(roi_num)
                
        return img_stack, roi_names

    
    def get_label(self, sample_value, labels):
        """ Helper function to get the label from the class index"""
        for label, value in labels.items():  # for name, age in dictionary.iteritems():  (for Python 2.x)
            if value == sample_value:
                return(label)

    def run_model(self, image_stack, i):
        """Classify the image stack"""
        yhat = self.model.predict(image_stack[0])
        self.processing_results(yhat,i)

    def processing_results(self, yhat, i):
        """ Covert classifications into counts for each timestep """
        headers = list(self.classes.keys())
        df = pd.DataFrame(data=yhat,columns=headers)
        totals = df.apply(lambda x: x == df.max(axis=1)).sum().values
        self.classData[i,:] = totals
        
    def str_to_dt(self, datetime_str):
            return pd.to_datetime(datetime_str[1:])
    
    def add_datetime(self, fnames):
        """ Get sample times from based on the file names """
        self.output['dateTime'] = [self.str_to_dt(os.path.basename(f).split("_")[0]) for f in fnames]

    def add_metadata(self, fnames):
        """Add header data from syringe sample"""
        self.output[["inhibitTime","runTime","syringeSize",'fileName']] = ""
        inhibitTime = []
        runTime = []
        syringeSize = []
        fileName = []
        for fname in fnames:
            with ifcb.open_raw(fname) as roi_data:
                header = roi_data.headers
                inhibitTime.append(header["inhibitTime"])
                runTime.append(header["runTime"])
                syringeSize.append(header["syringeSize"])
                fileName.append(os.path.basename(fname))

        self.output['runTime'] = runTime
        self.output['inhibitTime'] = inhibitTime
        self.output['syringeSize'] = syringeSize
        self.output['fileName'] = fileName
            
    def save_data(self,fname=None):
        """Save pandas dataframe """
        if fname is None:
            #Create a filename based on datarange
            pass
        else:
            out_name = fname
        self.output.to_csv(os.path.join(self.baseDir,"data/",fname),index=False)

    def run(self, folder="", last_file=None, save_output=False):
        """ Main loop for running the model on all of the data """
        self.folder = folder
        self.roi_fnames = self.generate_image_list()
        self.classData = np.empty((len(self.roi_fnames),50)) # preallocate the totals for each sample

        for i, fname in tqdm(enumerate(self.roi_fnames)):
            img_stack = self.build_image_stack(fname)
            self.run_model(img_stack,i)

        self.output = pd.DataFrame(data=self.classData, columns=self.classes)
        self.add_datetime(self.roi_fnames)
        self.add_metadata(self.roi_fnames)
        if last_file is not None:
            self.output = pd.concat((old_data, self.output))
            
        if save_output:
            out_filename = "classified_" + self.output.iloc[-1].fileName.split(".")[0] + ".csv"
            self.save_data(out_filename)


In [10]:
classifier = ClassifyDashboard()

In [34]:
folders = sorted(glob.glob("/opt/ifcb-data/sc-wharf/*"))
for folder in folders:
    folder = os.path.split(folder)[-1]
    classifier.run(folder)
    classifier.output.to_csv("{}-class.csv".format(folder))

5it [03:53, 46.65s/it]
59it [41:42, 42.41s/it]
61it [41:49, 41.15s/it]
60it [44:46, 44.78s/it]
61it [45:26, 44.70s/it]
60it [40:31, 40.53s/it]
8it [05:23, 40.39s/it]


ValueError: cannot reshape array of size 32986 into shape (170,392)

In [32]:
os.path.split(folders[0])[-1]
# !ls /opt/ifcb-data/sc-wharf/

'D20200423'