# Logistic

In [None]:
import glob
import os
import itertools
import re
from collections import Counter

import numpy
import tensorflow
import keras

import deepometry.model

In [None]:
def collect_pathnames(directories, labels):
    """
    :param directories: List of directories to select samples from. Assumes subdirectories of each directory
                        correspond to class labels. Contents of subdirectories are NPY files containing data
                        of that label.
    :return: List of pathnames.
    """
    pathnames = []

    for directory in directories:
        subdirectories = sorted(glob.glob(os.path.join(directory, "*")))
        
        # transform the files of the same label into directory
        filelist = [glob.glob("{}/*{}*".format(subdirectory,label)) for label in labels for subdirectory in subdirectories ]
        
        subdirectory_pathnames = []
        for i in range(len(labels)):
            a = filelist[i*len(subdirectories):(i+1)*len(subdirectories)]
            subdirectory_pathnames.append( list(itertools.chain.from_iterable(a)) )        

        #nsamples = max([len(pathnames) for pathnames in subdirectory_pathnames])
        nsamples = min([len(pathnames) for pathnames in subdirectory_pathnames])

        pathnames += [list(numpy.random.permutation(pathnames)[:nsamples]) for pathnames in subdirectory_pathnames]

    pathnames = sum(pathnames, [])

    return pathnames


def load(pathnames, labels, dates):
    """
    Load training and target data.
    
    Assumes data is stored in a directory corresponding to some class label.

    :param pathnames: List of image pathnames.
    :param labels: List of class labels.
    :return: Tuple (training, target) data, as NumPy arrays.
    """
    #--- To ignore some class ---#
    #pathnames = [x for x in pathnames if (("renated" in x) and ("oid" not in x))]
    #pathnames = [x for x in pathnames if "oid" not in x]
    
    x = numpy.empty((len(pathnames),) + _shape(pathnames[0]), dtype=numpy.uint8)

    y = numpy.empty((len(pathnames),), dtype=numpy.uint8)
    
    z = numpy.empty((len(pathnames),), dtype=numpy.uint8)
    
    m = numpy.empty((len(pathnames),), dtype=numpy.uint8)

    label_to_index = {label: index for index, label in enumerate(sorted(labels))}
    
    day_to_index = {day: index for index, day in enumerate(day_of_exp)}
    
    label_to_m_index = {"Smooth Disc": 6, "Smooth Sphere": 1, "Crenated Discoid" : 4, "Crenated Disc_" : 5, "Crenated Spheroid": 3, "Crenated Spheres": 2}

    for index, pathname in enumerate(pathnames):
        if os.path.isfile(pathname) == True:

            day = re.search('parsed_data.Bag...(.*)', os.path.dirname(pathname)).group(1)
            
            label = re.search('- (.*)_Total', os.path.basename(pathname) ).group(1)
            if label == 'Crenated Disc':
                label = str(label + "_")

            x[index] = numpy.load(pathname)

            y[index] = label_to_index[label]
            
            z[index] = day_to_index[day]
            
            m[index] = label_to_m_index[label]

    print(x.shape)
    print(y.shape)
    print(z.shape)
    print(Counter(y))   
    print(Counter(m)) 
    return x, y, z, m


def _shape(pathname):
    """
    Infer the shape of the sample data from a single sample.
    
    :param pathname: Path to a sample.
    :return: Sample dimensions.
    """
    return numpy.load(pathname).shape


def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]


def save_metadata_label(label,labels,day,days,file):
    with open(file, 'w') as f:
        f.write('Day\tLabel\n')
        for i in range(label.shape[0]):              
            f.write('{}\t{}\n'.format( days[day[i]] , list(sorted(labels))[label[i]]))    

            
def save_metadata_numericday(day,file):
    with open(file, 'w') as f:
        for i in range(day.shape[0]):
            f.write('{}\n'.format( day[i] ))      

            
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}

In [None]:
test_bags = ['A', 'B', 'D', 'E', 'F', 'H']
test_bags = [str(bag + str(i+1)) for bag in test_bags for i in range(3)]

In [None]:
labels = ["Smooth Disc", "Smooth Sphere", "Crenated Discoid", "Crenated Disc_", "Crenated Spheroid", "Crenated Spheres"]
day_of_exp = [str('D' + str(i)) for i in list(range(1,46))] 

In [None]:
directories = ["/parsed_data/BagA1/",
               "/parsed_data/BagA2/",
               "/parsed_data/BagA3/",
              
               "/parsed_data/BagB1/",
               "/parsed_data/BagB2/",
               "/parsed_data/BagB3/",              
               
               
               "/parsed_data/BagD1/",
               "/parsed_data/BagD2/",
               "/parsed_data/BagD3/", 
                                  
               
               "/parsed_data/BagE1/",
               "/parsed_data/BagE2/",
               "/parsed_data/BagE3/", 
               
               "/parsed_data/BagF1/",
               "/parsed_data/BagF2/",
               "/parsed_data/BagF3/",                     
               
               "/parsed_data/BagH1/",
               "/parsed_data/BagH2/",
               "/parsed_data/BagH3/"                  
              ]

# Sampling for training set

In [None]:
pathnames = collect_pathnames(directories, labels)
xx, y, z, m = load(pathnames, labels, day_of_exp)
x = xx[:,:,:,0:3:2]
print("x: ", x.shape)
print("y: ", y.shape)
print(Counter(y))
print("z: ",Counter(z))
print("m: ",Counter(m))

In [None]:
class_weight = get_class_weights(y)

# Start training

In [None]:
# build session running on GPU 1
configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "2"
session = tensorflow.Session(config = configuration)

# apply session
keras.backend.set_session(session)

In [None]:
model = deepometry.model.Model(shape=x.shape[1:], units=len(labels))

model.compile()

model.fit(
    x,
    y,
    class_weight = class_weight,
    batch_size=64,
    epochs=512,
    validation_split=0.3,
    verbose=1
)

# End training and save trained model

In [None]:
model_directory = '/models/resnet'

if not os.path.exists(model_directory):
     os.makedirs(model_directory)
    
model.model.save( os.path.join(model_directory, 'model.h5') )

# Display loss and accuracy curve

In [None]:
%matplotlib inline
import pandas
import pkg_resources
import matplotlib.pyplot

In [None]:
csv = pandas.read_csv(pkg_resources.resource_filename("deepometry", "deepometry/data/training.csv"))

_, (ax0, ax1) = matplotlib.pyplot.subplots(ncols=2, figsize=(16, 4))

ax0.plot(csv["acc"], c="r")
ax0.plot(csv["val_acc"], c="b")

ax1.plot(csv["loss"][30:], c="r")
ax1.plot(csv["val_loss"][30:], c="b");