#### 1. Import Libraries 

In [2]:
#%reload_ext autoreload
#%autoreload 2
%matplotlib notebook

In [3]:
PATH = "/home/khan74/scratch/new_DL_DES/"
sz=224
batch_size=64

In [4]:
import numpy as np
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.layers import Dropout, Flatten, Dense
from keras.applications import ResNet50, Xception
from keras.models import Model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras import optimizers
from keras.applications.resnet50 import preprocess_input
from keras.models import load_model

Using TensorFlow backend.


In [5]:
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd 
import numpy as np

In [6]:
import os
from os import listdir, makedirs
from os.path import isfile, join, exists

#### 2. Load Data / Create data_generators

In [7]:
train_df = pd.read_csv(PATH + 'deeplearning/data/training_set.csv')
val_df = pd.read_csv(PATH + 'deeplearning/data/validation_set.csv')
HP_crossmatch_df = pd.read_csv(PATH + 'deeplearning/data/high_prob_crossmatch_test_set.csv')
FO_crossmatch_df = pd.read_csv(PATH + 'deeplearning/data/full_overlap_crossmatch_test_set.csv')

##### flow_from_dir

In [8]:
train_data_dir = f'{PATH}deeplearning/data/train/'
validation_data_dir = f'{PATH}deeplearning/data/valid/'

HP_SDSS_test_data_dir = f'{PATH}deeplearning/data/HP_crossmatch_test/sdss/'
HP_DES_test_data_dir = f'{PATH}deeplearning/data/HP_crossmatch_test/des/'

FO_SDSS_test_data_dir = f'{PATH}deeplearning/data/FO_crossmatch_test/sdss/'
FO_DES_test_data_dir = f'{PATH}deeplearning/data/FO_crossmatch_test/des/'

In [9]:
train_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
vertical_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=45)

valid_datagen = ImageDataGenerator(rescale = 1./255)

test_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
vertical_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=45)




train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (sz, sz),
batch_size = batch_size, 
class_mode = "categorical",
shuffle = True,
interpolation = 'nearest')

validation_generator = valid_datagen.flow_from_directory(
validation_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = "categorical",
shuffle = False,
interpolation = 'nearest')




HP_SDSS_test_generator = test_datagen.flow_from_directory(
HP_SDSS_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

HP_DES_test_generator = test_datagen.flow_from_directory(
HP_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)




FO_SDSS_test_generator = test_datagen.flow_from_directory(
FO_SDSS_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

FO_DES_test_generator = test_datagen.flow_from_directory(
FO_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 36620 images belonging to 2 classes.
Found 963 images belonging to 2 classes.
Found 1066 images belonging to 1 classes.
Found 1066 images belonging to 1 classes.
Found 12596 images belonging to 1 classes.
Found 12596 images belonging to 1 classes.


#### 3. Load Model 

In [10]:
model_final = load_model(PATH + 'deeplearning/weights/Xception_Final.h5')

#### 4. Load sklearn / Def metrics 

In [11]:
import itertools
from sklearn import metrics
from sklearn.metrics import confusion_matrix

In [12]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          figure_size = (11, 6),
                          save=0,
                          save_path='/home/khan74'):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    #if normalize:
        #cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    #else:
        #print('Confusion matrix, without normalization')

    #print(cm)

    plt.figure(figsize=figure_size)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    #plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
    if save:
        plt.savefig(save_path)
        #plt.savefig('confusion_matrix.png')

In [13]:
# Define probability threshold mask

def threshold_mask(pred_array, prob_threshold = 0.9999):
    ''' 
    returns the positions in the array where the probability for each class is greater than prob_threshold
    
    pred_array: A one-hot encoded array of softmax probability outputs 
    prob_threshold: Float b/w 0 and 1 to use as a threshold mask
    '''
    pred_class_indices = np.argmax(pred_array, axis=1)
    pos = np.where( pred_array[ range( pred_array.shape[0] ), pred_class_indices ] > prob_threshold )[0]
    return pos


In [14]:
# Define top n predictions mask

def top_pred_mask(pred_array, n_top_predictions = 5000 ):
    '''
    reversedurn the positions of top n most confident predictions
    
    pred_array: A one-reversedencoded array of softmax probability outputs
    n_top_predictions: Num of top predictions
    '''
    pred_class_indices = np.argmax(pred_array, axis=1)
    pos = np.argsort( pred_array[ range( pred_array.shape[0] ), pred_class_indices ] ) 
    mask = np.flip( pos )
    return mask[: n_top_predictions]

# Tensorboard 

## 1. Validation

### A.  Tensorboard Metadata  

In [11]:
from keras.callbacks import TensorBoard

In [12]:
tsne_datagen = ImageDataGenerator(rescale = 1./255)

tsne_generator = tsne_datagen.flow_from_directory(
validation_data_dir,
target_size = (sz, sz),
batch_size = 4506,
class_mode = "categorical",
shuffle = False,
interpolation = 'nearest')

Found 963 images belonging to 2 classes.


In [13]:
x_test, y_test = tsne_generator[0]

In [14]:
labels = np.argmax(y_test, axis=1)

In [19]:
log_dir = PATH + '/deeplearning/tensorboard/metadata/validation/'

if not exists(log_dir):
    makedirs(log_dir)

In [20]:
# save class labels to disk to color data points in TensorBoard accordingly
with open(join(log_dir, 'metadata.tsv'), 'w') as f:
    np.savetxt(f, np.argmax(y_test, axis=1))

In [21]:
tensorboard = TensorBoard(log_dir=log_dir,
                          batch_size=batch_size,
                          embeddings_freq=1,
                          embeddings_layer_names=['second_last_layer'],
                          embeddings_metadata='metadata.tsv',
                          embeddings_data=x_test)

### B.  Calling Tensorboard without fit 

In [22]:
tensorboard.set_model(model_final)

In [23]:
pred = model_final.predict(x_test, verbose=1)



In [24]:
tensorboard.on_epoch_end(epoch=1)

In [25]:
tensorboard.on_train_end('_')

In [26]:
score = model_final.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.004898180560559432
Test accuracy: 0.9979231568016614


In [28]:
log_dir

'/home/khan74/scratch/new_DL_DES//deeplearning/tensorboard/metadata/validation/'

## 2. FO DES 

### Masking 

In [15]:
FO_DES_test_generator_NEW = valid_datagen.flow_from_directory(
FO_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 12596 images belonging to 1 classes.


In [16]:
FO_DES_predictions = model_final.predict_generator(FO_DES_test_generator_NEW, verbose=1)



In [17]:
mask = top_pred_mask(FO_DES_predictions, 5000)

In [18]:
FO_DES_predicted_class_indices=np.argmax(FO_DES_predictions,axis=1)


labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in FO_DES_predicted_class_indices] )

filenames= np.array( FO_DES_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Pred_Labels":predictions[mask],
                     "Predictions":FO_DES_predicted_class_indices[mask]})

In [19]:
results['OBJID'] = results['Filename'].apply(lambda x: x.split('/')[-1][:-4]).astype('int64')
final = pd.merge(results, FO_crossmatch_df, left_on='OBJID', right_on='DES_COADD_OBJECT_ID')
final['Actuals'] = final['P_CS_DEBIASED'] > final['P_EL_DEBIASED']
final['Actual_Labels'] = final.apply(lambda row: 'Spiral' \
                                        if row.P_CS_DEBIASED > row.P_EL_DEBIASED \
                                        else 'Elliptical', axis=1)
final.Filename = final['Filename'].apply(lambda x: x.split('/')[-1] )

In [20]:
final[['OBJID', 'Filename', 'Actuals', 'Predictions', 'Actual_Labels', 'Pred_Labels']].head()

Unnamed: 0,OBJID,Filename,Actuals,Predictions,Actual_Labels,Pred_Labels
0,100005001,100005001.png,True,1,Spiral,spiral
1,138459097,138459097.png,True,1,Spiral,spiral
2,260728929,260728929.png,True,1,Spiral,spiral
3,260775373,260775373.png,True,1,Spiral,spiral
4,260775390,260775390.png,False,0,Elliptical,elliptical


### A.  Tensorboard Metadata  

In [59]:
from keras.callbacks import TensorBoard

In [60]:
tsne_generator = valid_datagen.flow_from_dataframe(
dataframe = final,
directory = FO_DES_test_data_dir+ 'test/',
x_col = 'Filename',
y_col = 'Actual_Labels',
has_ext = True,
target_size = (sz, sz),
batch_size = len(mask),
class_mode = None,
shuffle = False)

Found 5000 images.


In [61]:
x_test = tsne_generator[0]

In [62]:
labels=[]

for file in tsne_generator.filenames:
    labels.append( list(final.Actual_Labels[ final.Filename == file]) )

In [63]:
labels = np.squeeze(labels)

In [64]:
log_dir = PATH + '/deeplearning/tensorboard/metadata/FO_DES/'

if not exists(log_dir):
    makedirs(log_dir)

In [65]:
labels = np.array(labels)

# save class labels to disk to color data points in TensorBoard accordingly
with open(join(log_dir, 'metadata.tsv'), 'w') as f:
    np.savetxt(f, labels, fmt="%s")

In [66]:
tensorboard = TensorBoard(log_dir=log_dir,
                          batch_size=batch_size,
                          embeddings_freq=1,
                          embeddings_layer_names=['second_last_layer'],
                          embeddings_metadata='metadata.tsv',
                          embeddings_data=x_test)

### B.  Calling Tensorboard without fit 

In [70]:
tensorboard.set_model(model_final)

In [71]:
pred = model_final.predict(x_test, verbose=1)



In [72]:
tensorboard.on_epoch_end(epoch=1)

In [73]:
tensorboard.on_train_end('_')

In [74]:
y_test = keras.utils.to_categorical(labels, num_classes=2, dtype='float32')

ValueError: invalid literal for int() with base 10: 'Spiral'

In [31]:
score = model_final.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.39517958889008525
Test accuracy: 0.9754


In [55]:
log_dir

'/home/khan74/scratch/new_DL_DES//deeplearning/tensorboard/metadata/FO_DES/'