In [6]:
import os
import sys
import json
import time
from keras.callbacks import *
import keras
from nn_packages import *
from io_functions import *
#from RegClass_Generator import *
import re
import glob
import h5py
import numpy as np
#import keras
from keras.models import Sequential,Model
from keras.layers import Dense, Activation,Input, Dense, Dropout, merge
from keras.callbacks import EarlyStopping, ModelCheckpoint
%matplotlib inline
#from callbacks import SmartCheckpoint

In [7]:
class RegCls_EarlyStop(EarlyStopping):
    #slight modifications on keras' EarlyStopping to enable greater control in stopping for multi-output functions
    '''Stop training when monitored quantity/ies has/have stopped improving.
    # Arguments
        monitor: quantity to be monitored.
        patience: number of epochs with no improvement
            after which training will be stopped.
        verbose: verbosity mode.
        mode: one of {auto, min, max}. In 'min' mode,
            training will stop when the quantity
            monitored has stopped decreasing; in 'max'
            mode it will stop when the quantity
            monitored has stopped increasing.
    '''
    def __init__(self, monitor1='val_loss', monitor2='val_energy_loss', monitor3='val_particle label_loss', patience=0, verbose=0, mode='auto'):
        super(EarlyStopping, self).__init__()

        self.monitor1 = monitor1
        self.monitor2 = monitor2
        self.monitor3 = monitor3
        self.patience = patience
        self.verbose = verbose
        self.wait = 0

        if mode not in ['auto', 'min', 'max']:
            warnings.warn('EarlyStopping mode %s is unknown, '
                          'fallback to auto mode.' % (self.mode),
                          RuntimeWarning)
            mode = 'auto'

        if mode == 'min':
            self.monitor1_op = np.less
            self.monitor2_op = np.less
            self.monitor3_op = np.less
        elif mode == 'max':
            self.monitor1_op = np.greater
            self.monitor2_op = np.greater
            self.monitor3_op = np.greater
        else:
            if 'acc' in self.monitor1:
                self.monitor1_op = np.greater
            else:
                self.monitor1_op = np.less

            if 'acc' in self.monitor2:
                self.monitor2_op = np.greater
            else:
                self.monitor2_op = np.less

            if 'acc' in self.monitor3:
                self.monitor3_op = np.greater
            else:
                self.monitor3_op = np.less

    def on_train_begin(self, logs={}):
        self.wait = 0       # Allow instances to be re-used
        self.best = np.Inf if (self.monitor1_op == np.less or self.monitor2_op == np.less or self.monitor3_op == np.less) else -np.Inf

    def on_epoch_end(self, epoch, logs={}):
        current1 = logs.get(self.monitor1)
        if current1 is None:
            warnings.warn('Early stopping requires %s available!' %
                          (self.monitor1), RuntimeWarning)

        if self.monitor1_op(current1, self.best):
            self.best = current1
            self.wait = 0
        else:
            if self.wait >= self.patience:
                if self.verbose > 0:
                    print('Epoch %05d: early stopping' % (epoch))
                self.model.stop_training = True
            self.wait += 1
            
            
        current2 = logs.get(self.monitor2)
        if current2 is None:
            warnings.warn('Early stopping requires %s available!' %
                          (self.monitor2), RuntimeWarning)

        if self.monitor2_op(current2, self.best):
            self.best = current2
            self.wait = 0
        else:
            if self.wait >= self.patience:
                if self.verbose > 0:
                    print('Epoch %05d: early stopping' % (epoch))
                self.model.stop_training = True
            self.wait += 1
            
            
        current3 = logs.get(self.monitor3)
        if current3 is None:
            warnings.warn('Early stopping requires %s available!' %
                          (self.monitor3), RuntimeWarning)

        if self.monitor3_op(current3, self.best):
            self.best = current3
            self.wait = 0
        else:
            if self.wait >= self.patience:
                if self.verbose > 0:
                    print('Epoch %05d: early stopping' % (epoch))
                self.model.stop_training = True
            self.wait += 1

In [22]:
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import time
import json
import warnings

from collections import deque
#from .utils.generic_utils import Progbar
from keras import backend as K
from pkg_resources import parse_version

class My_ModelCheckpoint(keras.callbacks.Callback):
    '''Save the model after every epoch.
    `filepath` can contain named formatting options,
    which will be filled the value of `epoch` and
    keys in `logs` (passed in `on_epoch_end`).
    For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,
    then multiple files will be save with the epoch number and
    the validation loss.
    # Arguments
        filepath: string, path to save the model file.
        monitor: quantity to monitor.
        verbose: verbosity mode, 0 or 1.
        save_best_only: if `save_best_only=True`,
            the latest best model according to
            the quantity monitored will not be overwritten.
        mode: one of {auto, min, max}.
            If `save_best_only=True`, the decision
            to overwrite the current save file is made
            based on either the maximization or the
            minimization of the monitored quantity. For `val_acc`,
            this should be `max`, for `val_loss` this should
            be `min`, etc. In `auto` mode, the direction is
            automatically inferred from the name of the monitored quantity.
        save_weights_only: if True, then only the model's weights will be
            saved (`model.save_weights(filepath)`), else the full model
            is saved (`model.save(filepath)`).
    '''
    def __init__(self, filepath, monitor='val_loss', verbose=0,
                 save_best_only=False, save_weights_only=False,
                 mode='auto'):
        

        self.monitor = monitor
        self.verbose = verbose
        self.filepath = filepath
        self.filecheck = self.filepath+"_check.h5"
        self.filehist = self.filepath+"_hist.h5"
        #self.save_best_only = save_best_only
        #self.save_weights_only = save_weights_only
        
        ModelCheckpoint.__init__(self, self.filecheck,
                monitor, verbose, save_best_only, mode)

        
    
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        self.energy_losses = []
        self.val_energy_losses = []
        self.plabel_losses = []
        self.val_plabel_losses = []
        try:
            self.model.load(self.filcheck)
            print('Sucessfully loaded weights at ' + self.filecheck)
        except (IOError, EOFError):
            print('Failed to load weights at ' + self.filecheck)
            
        try:
            f = h5py.File(self.filehist,'r')
            self.losses = list(f['loss'])
            self.val_losses = list(f['val_loss'])
            self.energy_losses = list(f['energy_losses'])
            self.val_energy_losses = list(f['val_energy_losses'])
            self.plabel_losses = list(f['plabel_losses'])
            self.val_plabel_losses = list(f['val_plabel_losses'])
            print('Sucessfully loaded history at ' + self.filehist)
        except (IOError, EOFError):
            print('Failed to load history at ' + self.filehist)
        
    
    def on_epoch_end(self, epoch, logs={}):
        
        if self.verbose > 0:
            print('Epoch %05d: saving model to %s' % (epoch, self.filecheck))
            print('Epoch %05d: saving histories to %s' % (epoch, self.filehist))
        self.model.save(filepath+".h5", overwrite=True)
        
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.energy_losses.append(logs.get('energy_loss'))    
        self.val_energy_losses.append(logs.get('val_energy_loss'))
        self.plabel_losses.append(logs.get('particle label_loss'))
        self.val_plabel_losses.append(logs.get('val_particle label_loss'))
        
        f = h5py.File(self.filehist,'w')
        f.create_dataset('loss',data=np.array(self.losses))
        f.create_dataset('val_loss',data=np.array(self.val_losses))
        f.create_dataset('energy_losses',data=np.array(self.energy_losses))
        f.create_dataset('val_energy_losses',data=np.array(self.val_energy_losses))
        f.create_dataset('plabel_losses',data=np.array(self.plabel_losses))
        f.create_dataset('val_plabel_losses',data=np.array(self.val_plabel_losses))
        #filepath = self.filepath.format(epoch=epoch, **logs)
        

In [23]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        self.energy_losses = []
        self.val_energy_losses = []
        self.plabel_losses = []
        self.val_plabel_losses = []
    
    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.energy_losses.append(logs.get('energy_loss'))    
        self.val_energy_losses.append(logs.get('val_energy_loss'))
        self.plabel_losses.append(logs.get('particle label_loss'))
        self.val_plabel_losses.append(logs.get('val_particle label_loss'))
        

In [24]:
input1 = Input(shape = (1, 24, 24, 25))
model1 = Convolution3D(10, 4, 4, 5, input_shape = (1, 24, 24, 25), activation='relu') (input1)
model1 = MaxPooling3D()(model1)
model1 = Flatten()(model1)

input2 = Input(shape = (1, 4, 4, 60))
model2 = Convolution3D(10, 2, 2, 6, input_shape = (1, 4, 4, 60), activation='relu')(input2)
model2 = MaxPooling3D()(model2)
model2 = Flatten()(model2)

## join the two
bmodel = merge([model1,model2], mode='concat')

## fully connected ending
bmodel = (Dense(10000, activation='sigmoid')) (bmodel)
bmodel = (Dropout(0.5)) (bmodel)

bmodel = (Dense(100, activation='sigmoid')) (bmodel)
bmodel = (Dropout(0.5)) (bmodel)

bmodel = (Dense(10, activation='sigmoid')) (bmodel)
#bmodel = (Dropout(0.25)) (bmodel)

oc = Dense(1,activation='sigmoid', name='particle label')(bmodel)
oe = Dense(1,activation='linear', name='energy')(bmodel)

model = Model(input=[input1,input2], output=[oc,oe])
model.compile(loss=['binary_crossentropy','mse'], optimizer='sgd')
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_5 (InputLayer)             (None, 1, 24, 24, 25) 0                                            
____________________________________________________________________________________________________
input_6 (InputLayer)             (None, 1, 4, 4, 60)   0                                            
____________________________________________________________________________________________________
convolution3d_5 (Convolution3D)  (None, 10, 21, 21, 21)810         input_5[0][0]                    
____________________________________________________________________________________________________
convolution3d_6 (Convolution3D)  (None, 10, 3, 3, 55)  250         input_6[0][0]                    
___________________________________________________________________________________________

In [25]:
class RegGen:
    #Data generator for regression over energy 
    def __init__( self, batch_size):
        self.batch_size = batch_size
        self.filelist=[]
        for i in xrange(1,6):
            for j in xrange(1,11):
                self.filelist.append('/scratch/daint/vlimant/LCD/New_Data_Shuffled/New_Data_Shuffled/GammaEscan_%d_%d.h5'%(i,j)) 
        self.train_split = 0.6 
        self.test_split = 0.2 
        self.validation_split = 0.2
        self.fileindex = 0
        self.filesize = 0
        self.position = 0
    #function to call when generating data for training  
    def train(self,modeltype=3):
        length = len(self.filelist)
        #deleting the validation and test set filenames from the filelist
        del self.filelist[np.floor((1-(self.train_split))*length).astype(int):]
        return self.batches(modeltype)
    #function to call when generating data for testing
    def test(self, modeltype=3):
        length = len(self.filelist)
        #deleting the train and validation set filenames from the filelist
        del self.filelist[:np.floor((1-self.test_split)*length).astype(int)+1]
        return self.batches(modeltype)
    #function to call when generating data for validating
    def validation(self, modeltype=3):
        length = len(self.filelist)
        #modifying the filename list to only include files for validation set
        self.filelist = self.filelist[np.floor(self.train_split*length+1).astype(int):np.floor((self.train_split+self.validation_split)*length+1).astype(int)]
        return self.batches(modeltype)
        
    #The function which reads files to gather data until batch size is satisfied
    def batch_helper(self, fileindex, position, batch_size):
        '''
        Yields batches of data of size N
        '''
        f = h5py.File(self.filelist[fileindex],'r')
        self.filesize = np.array(f['ECAL']).shape[0]
        #print(self.filelist[fileindex],'first')
        if (position + batch_size < self.filesize):
            data_ECAL = np.array(f['ECAL'][position : position + batch_size])
            data_HCAL = np.array(f['HCAL'][position : position + batch_size])
            target = np.array(f['target'][position : position + batch_size][:,:,1])
            #target = np.delete(target,0,1)

            position += batch_size
            f.close()
            #print('first position',position)
            return data_ECAL,data_HCAL, target, fileindex, position
        
        else:
            data_ECAL = np.array(f['ECAL'][position : position + batch_size])
            data_HCAL = np.array(f['HCAL'][position : position + batch_size])
            target = np.array(f['target'][position:][:,:,1])
            #target = np.delete(target,0,1)
            f.close()
            
            if (fileindex+1 < len(self.filelist)):
                if(self.batch_size-data_ECAL.shape[0]>0):
                    while(self.batch_size-data_ECAL.shape[0]>0):
                        if(int(np.floor((self.batch_size-data_ECAL.shape[0])/self.filesize))==0):
                            number_of_files=1
                        else:
                            number_of_files=int(np.ceil((self.batch_size-data_ECAL.shape[0])/self.filesize))
                        for i in xrange(0,number_of_files):
                            if(fileindex+i+1>len(self.filelist)):
                                fileindex=0
                                number_of_files=number_of_files-i
                                i=0
                            f = h5py.File(self.filelist[fileindex+i+1],'r')
                            #print(self.filelist[fileindex+i+1],'second')
                            if (self.batch_size-data_ECAL.shape[0]<self.filesize):
                                position = self.batch_size-data_ECAL.shape[0]
                                data_temp_ECAL = np.array(f['ECAL'][position : position + batch_size])
                                data_temp_HCAL = np.array(f['HCAL'][position : position + batch_size])
                                target_temp = np.array(f['target'][:position][:,:,1])
                            else:
                                data_temp_ECAL = np.array(f['ECAL'][position : position + batch_size])
                                data_temp_HCAL = np.array(f['HCAL'][position : position + batch_size])
                                target_temp = np.array(f['target'][:,:,1])
                            f.close()
                    #data_, target_, fileindex, position = self.batch_helper(fileindex + 1, 0, batch_size - self.filesize+position)
                            #print( data.shape,data_.shape)
                            #print( target.shape,target_.shape)
                            data_ECAL = np.concatenate((data_ECAL, data_temp_ECAL), axis=0)
                            data_HCAL = np.concatenate((data_HCAL, data_temp_HCAL), axis=0)
                            target = np.concatenate((target, target_temp), axis=0)
                    if (fileindex +i+1<len(self.filelist)):
                        fileindex = fileindex +i+1
                    else:
                        fileindex = 0
                else:
                    position = 0
                    fileindex=fileindex+1
            else:
                fileindex = 0
                position = 0
            
            return data_ECAL,data_HCAL, target, fileindex, position
    #The function which loops indefinitely and continues to return data of the specified batch size
    def batches(self, modeltype):
        while (self.fileindex < len(self.filelist)):
            data_ECAL,data_HCAL, target, self.fileindex, self.position = self.batch_helper(self.fileindex, self.position, self.batch_size)
            if data_ECAL.shape[0]!=self.batch_size:
                continue
            if modeltype==3:
                data_ECAL = data_ECAL.reshape((data_ECAL.shape[0],)+(1, 24, 24, 25))
                data_HCAL = data_HCAL.reshape((data_HCAL.shape[0],)+(1, 4, 4, 60))
                #data = np.swapaxes(data, 1, 3)
                #data = np.swapaxes(data, 1, 2)
                #data = np.swapaxes(data, 0, 1)
                #data=data.reshape((data.shape[0],1,20,20,25))
            elif modeltype==2:
                data_ECAL = data_ECAL.reshape((data_ECAL.shape[0],)+(24, 24, 25))
                data_ECAL = np.swapaxes(data_ECAL, 1, 3)
                data_HCAL = data_HCAL.reshape((data_HCAL.shape[0],)+(4, 4, 60))
                data_HCAL = np.swapaxes(data_HCAL, 1, 3)
            elif modeltype==1:
                data_ECAL= np.reshape(data_ECAL,(self.batch_size,-1))
                data_HCAL= np.reshape(data_HCAL,(self.batch_size,-1))
            yield ([data_ECAL,data_HCAL],target/500.)
        self.fileindex = 0


In [26]:


ds1 = RegGen(1000)
vs1 = RegGen(1000)
#early1 = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
check = My_ModelCheckpoint(filepath ='trial_bcnn', verbose=1)
hist = model.fit_generator(ds1.train(cnn=True), samples_per_epoch=5000, nb_epoch=1, validation_data= vs1.validation(cnn=True), nb_val_samples=5000, verbose=1, callbacks=[check])

TypeError: unbound method __init__() must be called with ModelCheckpoint instance as first argument (got My_ModelCheckpoint instance instead)

In [None]:
data = json.load("/SmartCheckpoint/trial_bcnn1_regcls_history.json")