In [None]:
import os
import numpy as np
np.random.seed(69)
import pandas as pd
import random
import pickle as pkl
import matplotlib.pyplot as plt
import matplotlib.image as img
import seaborn as sns
import tensorflow as tf
from tqdm.notebook import tqdm
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, ZeroPadding2D, LeakyReLU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import load_model
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
# import kerastuner as kt
# from kerastuner import HyperModel
import time

In [None]:
dnnx_train = np.load("../input/newregressiondatasetsdss/regdnnx_train.npy")
dnnx_val = np.load("../input/newregressiondatasetsdss/regdnnx_val.npy")

y_train = np.load("../input/newregressiondatasetsdss/regy_train.npy")
y_val = np.load("../input/newregressiondatasetsdss/regy_val.npy")

class_train = np.load("../input/newregressiondatasetsdss/regclass_train.npy")
class_val = np.load("../input/newregressiondatasetsdss/regclass_val.npy")

In [None]:
class_train, label_strings_train = pd.factorize(class_train,sort=True)
class_val, label_strings_val = pd.factorize(class_val,sort=True)

assert np.array_equal(label_strings_train, label_strings_val)
print(label_strings_train)

In [None]:
train_newparams = []

for i in range(len(class_train)):
    cur_class = class_train[i]
    cur_z = y_train[i]
    if cur_class == label_strings_train[0]:
        marr=[cur_z,0,0]
    elif cur_class == label_strings_train[1]:
        marr=[0,cur_z,0]
    else:
        marr=[0,0,cur_z]
    train_newparams.append(marr)
train_newparams = np.array(train_newparams)


val_newparams = []

for i in range(len(class_val)):
    cur_class = class_val[i]
    cur_z = y_val[i]
    if cur_class == label_strings_train[0]:
        marr=[cur_z,0,0]
    elif cur_class == label_strings_train[1]:
        marr=[0,cur_z,0]
    else:
        marr=[0,0,cur_z]
    val_newparams.append(marr)
val_newparams = np.array(val_newparams)

In [None]:
class_train = to_categorical(class_train)
class_val = to_categorical(class_val)

In [None]:
inp_layer = tf.keras.Input(shape=dnnx_train.shape[1])

photozbranch = Dense(1024, activation="sigmoid")(inp_layer)
photozbranch = Dropout(0.25)(photozbranch)

photozbranch = Dense(512, activation="sigmoid")(photozbranch)
photozbranch = Dropout(0.25)(photozbranch)

photozbranch = Dense(256, activation="sigmoid")(photozbranch)
photozbranch = Dropout(0.25)(photozbranch)

photozbranch = Dense(128, activation="sigmoid")(photozbranch)
photozbranch = Dropout(0.25)(photozbranch)

photozbranch = Dense(64, activation="sigmoid")(photozbranch)
photozbranch = Dropout(0.25)(photozbranch)

photozbranch = Dense(32, activation="sigmoid")(photozbranch)
photozbranch = Dropout(0.25)(photozbranch)


classbranch = Dense(1024, activation="sigmoid")(inp_layer)
classbranch = Dropout(0.25)(classbranch)

classbranch = Dense(512, activation="sigmoid")(classbranch)
classbranch = Dropout(0.25)(classbranch)

classbranch = Dense(256, activation="sigmoid")(classbranch)
classbranch = Dropout(0.25)(classbranch)

classbranch = Dense(128, activation="sigmoid")(classbranch)
classbranch = Dropout(0.25)(classbranch)

classbranch = Dense(64, activation="sigmoid")(classbranch)
classbranch = Dropout(0.25)(classbranch)

classbranch = Dense(32, activation="sigmoid")(classbranch)
classbranch = Dropout(0.25)(classbranch)


photoz_out = Dense(3, activation="relu",name="photoz_output")(photozbranch)
class_out = Dense(3, activation="softmax",name="class_output")(photozbranch)

model = tf.keras.Model(inputs=inp_layer, outputs=[photoz_out,class_out])

In [None]:
losses = {
	"photoz_output": "mean_squared_error",
	"class_output": "categorical_crossentropy",
}
lossWeights = {"photoz_output": 1.0, "class_output": 1.0}


model.compile(optimizer="adam", loss=losses, loss_weights=lossWeights,metrics=["accuracy"])

In [None]:
train_newparams.shape

In [None]:
class_train.shape

In [None]:
es = EarlyStopping(monitor='val_loss', verbose=1, patience=100, restore_best_weights=True)
cb = [es]

history = model.fit(x=dnnx_train, y=[train_newparams,class_train],
                    batch_size=2048,
                    epochs = 4000,
                    validation_data = (dnnx_val,[val_newparams,class_val]),
                    callbacks = cb,
                    verbose = 2)

In [None]:
model.save("NewDNNRegressor.h5")

In [None]:
photoz_pred,class_pred = model.predict(dnnx_val)
#val_newparams,class_val

In [None]:
label_strings_train

In [None]:
num = 3421

print(f"True Class = {label_strings_train[class_val[num].argmax()]} and True Redshift = {val_newparams[num]}")
print(f"Pred Class = {label_strings_train[class_pred[num].argmax()]} and Pred Redshift = {photoz_pred[num]}")

In [None]:
star_X = []
star_redshift = []

gal_X = []
gal_redshift = []

qso_X = []
qso_redshift = []


for testobj in range(len(photoz_pred)):
    cur_X = dnnx_val[testobj]
    cur_class_idx = class_pred[testobj].argmax()
    cur_class = label_strings_train[cur_class_idx]
    cur_class_redshift = photoz_pred[testobj][cur_class_idx]
#     print(f"Pred Class = {cur_class} and Pred Redshift = {cur_class_redshift}")
    if cur_class == "STAR":
        star_X.append(cur_X)
        star_redshift.append(cur_class_redshift)
    elif cur_class == "GALAXY":
        gal_X.append(cur_X)
        gal_redshift.append(cur_class_redshift)
    else:
        qso_X.append(cur_X)
        qso_redshift.append(cur_class_redshift)
        
star_X = np.array(star_X)
star_redshift = np.array(star_redshift)

gal_X = np.array(gal_X)
gal_redshift = np.array(gal_redshift)

qso_X = np.array(qso_X)
qso_redshift = np.array(qso_redshift)


In [None]:
true_star_redshift = []

true_gal_redshift = []

true_qso_redshift = []


for testobj in range(len(val_newparams)):
    true_class_idx = class_val[testobj].argmax()
    pred_class_idx = class_pred[testobj].argmax()
    cur_true_class = label_strings_train[pred_class_idx]
    true_redshift = val_newparams[testobj][true_class_idx]
#     print(f"Pred Class = {cur_class} and Pred Redshift = {cur_class_redshift}")
    if cur_true_class == "STAR":
        true_star_redshift.append(true_redshift)
    elif cur_true_class == "GALAXY":
        true_gal_redshift.append(true_redshift)
    else:
        true_qso_redshift.append(true_redshift)
        
true_star_redshift = np.array(true_star_redshift)

true_gal_redshift = np.array(true_gal_redshift)

true_qso_redshift = np.array(true_qso_redshift)


In [None]:
print(f"Star Redshift MSE Error on Test is {metrics.mean_squared_error(star_redshift,true_star_redshift)}")
print(f"Gal Redshift MSE Error on Test is {metrics.mean_squared_error(gal_redshift,true_gal_redshift)}")
print(f"Qso Redshift MSE Error on Test is {metrics.mean_squared_error(qso_redshift,true_qso_redshift)}")


In [None]:
def get_metrics(y_pred, y_test, labels, to_print=True):
    correct_labels = np.where(y_pred==y_test)[0]
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred,average='macro')
    recall = metrics.recall_score(y_test, y_pred,average='macro')
    f1score = metrics.f1_score(y_test, y_pred,average='macro')
    # rocscore = metrics.roc_auc_score(y_test, y_pred,average='micro',multi_class="ovo")
    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)  
    classification_report = metrics.classification_report(y_test, y_pred)

    if to_print:
        print("Identified {} correct labels out of {} labels".format(len(correct_labels), y_test.shape[0]))
        print("Accuracy:",accuracy)
        print("Precision:",precision)
        print("Recall:",recall)
        print("F1 Score:",f1score)
        # print("ROC AUC Score:",rocscore)
        print(f"Labels are: {labels}")
        print("Confusion Matrix:\n", confusion_matrix)
        print("Classification_Report:\n", classification_report)

    return (correct_labels, accuracy, precision, recall, confusion_matrix, classification_report)

def plot_model_change(history):
    # summarize history for accuracy
    plt.plot(history.history['accuracy'],label="Training Acc")
    plt.plot(history.history['val_accuracy'],label="Val Acc")
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend()
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'],label="Training Loss")
    plt.plot(history.history['val_loss'],label="Val Loss")
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend()
    plt.show()

In [None]:
print(get_metrics(class_pred.argmax(axis=1), class_val.argmax(axis=1),label_strings_train))