In [None]:
import os
import numpy as np
np.random.seed(69)
import pandas as pd
import random
import pickle as pkl
import matplotlib.pyplot as plt
import matplotlib.image as img
import seaborn as sns
import tensorflow as tf
from tqdm.notebook import tqdm
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,concatenate, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, ZeroPadding2D, LeakyReLU, ReLU, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import load_model
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
# import kerastuner as kt
# from kerastuner import HyperModel
import time

In [None]:
df = pd.read_csv("../input/sdss-project-v10/SDSS_Query_v1.0_DF.csv",index_col=0)

regressor = load_model("../input/sdss-project-v10/DNNRegressor.h5")
photodf = df.loc[:,['dered_u', 'deVRad_u', 'psffwhm_u', 'extinction_u',
       'dered_g', 'deVRad_g', 'psffwhm_g', 'extinction_g', 'dered_r',
       'deVRad_r', 'psffwhm_r', 'extinction_r', 'dered_i', 'deVRad_i',
       'psffwhm_i', 'extinction_i', 'dered_z', 'deVRad_z', 'psffwhm_z',
       'extinction_z', 'u_g', 'g_r', 'r_i', 'i_z']]

photodf.loc[:,"redshift"] = regressor.predict(photodf.values)

In [None]:
X = np.load("../input/sdss-project-v10/X_v1.0.npy")
y = np.load("../input/sdss-project-v10/y_v1.0.npy")
objlist = np.load("../input/sdss-project-v10/objlist_v1.0.npy")

In [None]:
dnnx=[]
dnny=[]
for i,objnum in tqdm(enumerate(objlist),total=len(objlist)):
    assert y[i]==df.loc[objnum,"class"]
    dnny.append(y[i])
    dnnx.append(photodf.loc[objnum].values)
dnny=np.array(dnny)
dnnx=np.array(dnnx)
assert np.array_equal(dnny,y)

In [None]:
y, label_strings = pd.factorize(y)
y = to_categorical(y)

In [None]:
zipX = list(zip(X, dnnx))

zipX_train, zipX_test, y_train, y_test = train_test_split(zipX, y, test_size = 0.2,random_state=69)
zipX_train, zipX_val, y_train, y_val = train_test_split(zipX_train, y_train, test_size = 0.25, random_state=69)

X_train, dnnx_train = zip(*zipX_train)
X_val, dnnx_val = zip(*zipX_val)
X_test, dnnx_test = zip(*zipX_test)

X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)

dnnx_train = np.array(dnnx_train)
dnnx_val = np.array(dnnx_val)
dnnx_test = np.array(dnnx_test)

del(zipX,zipX_test,zipX_train,zipX_val, X, dnnx)

In [None]:
del(X_train, dnnx_train)

In [None]:
def get_metrics(y_pred, y_test, labels, to_print=True):
    correct_labels = np.where(y_pred==y_test)[0]
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred,average='macro')
    recall = metrics.recall_score(y_test, y_pred,average='macro')
    f1score = metrics.f1_score(y_test, y_pred,average='macro')
    # rocscore = metrics.roc_auc_score(y_test, y_pred,average='micro',multi_class="ovo")
    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)  
    classification_report = metrics.classification_report(y_test, y_pred)

    if to_print:
        print("Identified {} correct labels out of {} labels".format(len(correct_labels), y_test.shape[0]))
        print("Accuracy:",accuracy)
        print("Precision:",precision)
        print("Recall:",recall)
        print("F1 Score:",f1score)
        # print("ROC AUC Score:",rocscore)
        print(f"Labels are: {labels}")
        print("Confusion Matrix:\n", confusion_matrix)
        print("Classification_Report:\n", classification_report)

    return (correct_labels, accuracy, precision, recall, confusion_matrix, classification_report)

def plot_model_change(history):
    # summarize history for accuracy
    plt.plot(history.history['accuracy'],label="Training Acc")
    plt.plot(history.history['val_accuracy'],label="Val Acc")
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend()
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'],label="Training Loss")
    plt.plot(history.history['val_loss'],label="Val Loss")
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend()
    plt.show()

# Ensemble

In [None]:
cnnmodel = load_model("../input/sdss-project-v10/CNNClassifier.h5")
dnnmodel = load_model("../input/sdss-project-v10/DNNClassifier.h5")

In [None]:
def define_stacked_model(members):
	# update all layers in all models to not be trainable
	for i in range(len(members)):
		model = members[i]
		for layer in model.layers:
			# make not trainable
			layer.trainable = False
			# rename to avoid 'unique layer name' issue
			layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
	# define multi-headed input
	ensemble_visible = [model.input for model in members]
	# concatenate merge output from each model
	ensemble_outputs = [model.output for model in members]
	merge = tf.keras.layers.concatenate(ensemble_outputs)
	hidden = Dense(10, activation='relu')(merge)
	output = Dense(3, activation='softmax')(hidden)
	model = tf.keras.Model(inputs=ensemble_visible, outputs=output)
	# plot graph of ensemble
	plot_model(model, show_shapes=True, to_file='model_graph.png')
	# compile
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [None]:
# define ensemble model
members = [cnnmodel,dnnmodel]
stacked_model = define_stacked_model(members)

In [None]:
filepath="BestEnsemble.h5"

checkpointcb = tf.keras.callbacks.ModelCheckpoint(filepath=filepath,monitor='accuracy',mode='max',save_best_only=True,verbose=1,save_weights_only=False)
cb = [checkpointcb]

In [None]:
history = stacked_model.fit([X_val, dnnx_val],
                            y_val, epochs=100,
                            batch_size=512,
                            callbacks=cb,
                            verbose=1)

In [None]:
plot_model_change(history)

In [None]:
model = load_model("./BestEnsemble.h5")

In [None]:
preds_test = model.predict([X_test,dnnx_test],batch_size=512, verbose = 0)
print(get_metrics(preds_test.argmax(axis=1), y_test.argmax(axis=1),label_strings))