# Logistic

In [2]:
%matplotlib inline

In [3]:
import glob
import os.path
import re
import itertools
from collections import Counter
import numpy
import pandas
import pickle

import tensorflow
import keras
import deepometry.model
import sklearn.metrics

import matplotlib.pyplot as plt
import seaborn
#from mpl_toolkits.mplot3d import Axes3D

Using TensorFlow backend.


In [4]:
def collect_pathnames(directories, labels, n_samples):
    """
    :param directories: List of directories to select samples from. Assumes subdirectories of each directory
                        correspond to class labels. Contents of subdirectories are NPY files containing data
                        of that label.
    :param n_samples: How many cells (each class) to be evaluated & visualized
    :return: List of pathnames.
    """
    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))
        
        # transform the files of the same label into directory
        filelist = [glob.glob("{}/*{}*".format(subdirectory,label)) for label in labels for subdirectory in subdirectories ]
        
        subdirectory_pathnames = []
        for i in range(len(labels)):
            a = filelist[i*len(subdirectories):(i+1)*len(subdirectories)]
            subdirectory_pathnames.append( list(itertools.chain.from_iterable(a)) )        

        if n_samples == 'max' :
            nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames])
        else:
            if n_samples == 'min':
                nsamples = min([len(pathnames) for pathnames in subdirectory_pathnames])
            else:
                nsamples = n_samples

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames


def load(pathnames, labels, dates):
    """
    Load training and target data.
    
    Assumes data is stored in a directory corresponding to some class label.

    :param pathnames: List of image pathnames.
    :param labels: List of class labels.
    :return: Tuple (training, target) data, as NumPy arrays.
    """
    #--- if you want to ignore some class of morphology ---#
    #pathnames = [x for x in pathnames if (("renated" in x) and ("oid" not in x))]
    #pathnames = [x for x in pathnames if "Smooth Sphere" not in x]
    
    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)
    
    z = numpy.empty((len(pathnames),), dtype=numpy.uint8)
    
    m = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}
    
    day_to_index = {day: index for index, day in enumerate(day_of_exp)}
    
    label_to_m_index = {"Smooth Disc": 6, "Smooth Sphere": 1, "Crenated Discoid" : 4, "Crenated Disc_" : 5, "Crenated Spheroid": 3, "Crenated Spheres": 2}

    for index, pathname in enumerate(pathnames):
        if os.path.isfile(pathname) == True:

            day = re.search('parsed_data.Bag...(.*)', os.path.dirname(pathname)).group(1)
            
            label = re.search('- (.*)_Total', os.path.basename(pathname) ).group(1)
            if label == 'Crenated Disc':
                label = str(label + "_")

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]
            
            z[index] = day_to_index[day]
            
            m[index] = label_to_m_index[label]
 
    return x, y, z, m

def _shape(pathname):
    """
    Infer the shape of the sample data from a single sample.
    
    :param pathname: Path to a sample.
    :return: Sample dimensions.
    """
    return numpy.load(pathname).shape

def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

def save_metadata_label(label,labels,day,days,file):
    with open(file, 'w') as f:
        f.write('Day\tLabel\n')
        for i in range(label.shape[0]):              
            f.write('{}\t{}\n'.format( days[day[i]] , list(sorted(labels))[label[i]]))    

def save_metadata_numericday(day,file):
    with open(file, 'w') as f:
        for i in range(day.shape[0]):
            f.write('{}\n'.format( day[i] ))      
            
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}

In [5]:
#test_bags = ['A', 'B', 'C', 'D', 'E','F', 'H']
test_bags = ['J']
test_bags = [str(bag + str(i+1)) for bag in test_bags for i in range(3)]

labels = ["Smooth Disc", "Crenated Disc_", "Crenated Discoid", "Crenated Spheroid", "Crenated Spheres","Smooth Sphere"]
day_of_exp = [str('D' + str(i)) for i in list(range(1,46))] 

In [6]:
# build session running on GPU 1
configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "1"
session = tensorflow.Session(config = configuration)

# apply session
keras.backend.set_session(session)

In [7]:
model = deepometry.model.Model(shape=(48,48,2), units=6)

model.compile()

In [8]:
model.model.load_weights('/models/deepometry_BFDF_1xMin_ResNet50_fast_6bags_88/deepometry/data/checkpoint.hdf5')

# Supervised classification and count

In [7]:
output_directory = '/models/BFDF_1xMin_ResNet50_fast_6bags_88/count_and_predict/'

if not os.path.exists(output_directory):
     os.makedirs(output_directory)

In [None]:
abs_count_per_bag = []
dl_count_per_bag = []
for test_bag in test_bags:
    print(test_bag)

    directories = [str('/parsed_data/Bag'+test_bag)]

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")), key = lambda l: int(os.path.basename(l)[1:]))
        
        abs_count_per_day = []
        dl_count_per_day = []
        for subdirectory in subdirectories:
            #print(os.path.basename(subdirectory))
            
            abs_count_per_label = []
            dl_count_per_label = []
            for label in labels:
                #print(label, " :", len(glob.glob("{}/*{}*".format(subdirectory,label))))
                
                pathnames = glob.glob("{}/*{}*".format(subdirectory,label))
                abs_count_per_label.append(len(pathnames))
                
                if len(pathnames) > 0 :

                    xx, y, z, m = load(pathnames, labels, day_of_exp)
                    x = xx[:,:,:,0:3:2]                    

                    predicted = model.predict(
                        batch_size=50,
                        x=x
                    )

                    predicted = numpy.argmax(predicted, -1)

                    expected = y

                    confusion = sklearn.metrics.confusion_matrix(expected, predicted)
                    
                    dl_count_per_label.append(numpy.max(confusion))


                    del(xx,x,y,z,m)
                
            abs_count_per_day.append(abs_count_per_label)
            dl_count_per_day.append(dl_count_per_label)
            
        
        abs_count_per_bag.append(abs_count_per_day)
        dl_count_per_bag.append(dl_count_per_day)

        

In [None]:
# filehandler = open("/models/BFDF_1xMin_ResNet50_fast_6bags_88/count_and_predict/abs_count_per_bag_TEST.sav", "wb")
# pickle.dump(abs_count_per_bag,filehandler)

In [None]:
# filehandler = open("/models/BFDF_1xMin_ResNet50_fast_6bags_88/count_and_predict/dl_count_per_bag_TEST.sav", "wb")
# pickle.dump(dl_count_per_bag,filehandler)

In [None]:
for test_bag in test_bags:
    print(test_bag)

    directories = [str('/media/paul/5c2fed7b-3e9d-4a9c-8c8f-d03b917ab93d/home/paul/Minh/RBC/parsed_BF_DF/Bag'+test_bag)]

    pathnames = collect_pathnames(directories, labels, n_samples = 'max')

    xx, y, z, m = load(pathnames, labels, day_of_exp)
    x = xx[:,:,:,0:3:2]    

    print("x: ", x.shape)
    print("y: ", y.shape)
    print(Counter(y))
    print("z: ",Counter(z))
    print("m: ",Counter(m)) 
    
    #model.evaluate(x, y, batch_size = 64, verbose=1)
        
    predicted = model.predict(
        batch_size=50,
        x=x
    )

    predicted = numpy.argmax(predicted, -1)

    expected = y
    
    confusion = sklearn.metrics.confusion_matrix(expected, predicted)
    
    numpy.save(os.path.join(output_directory, str('confusion_matrix_absolute_TEST_'+test_bag+'.npy')), confusion)
    
#     # Normalized CM

#     confusion = confusion.astype('float') / confusion.sum(axis=1)[:, numpy.newaxis]

#     confusion = pandas.DataFrame(confusion)

#     matplotlib.pyplot.figure(figsize=(12, 8))

#     seaborn.heatmap(confusion, annot=True)

#     matplotlib.pyplot.savefig( os.path.join(output_directory, str('confusion_matrix_percent_TEST_'+test_bag+'.png')) , dpi=300)
    
    del(xx,x,y,z,m)