

<font size="5"> Galaxy Morphology Classification in S-PLUS Images with Deep Learning
    
<font size="2">
    
This notebook was developed for the XI La Plata International School (LAPIS) on Astronomy and Geophysics.
The approach is based on <a href="https://academic.oup.com/mnras/article/507/2/1937/6328504">De Bom et. al. 2021</a>
<!-- <a href="www.clearnightsrthebest.com">clearnightsrthebest.com</a> -->

Notebook Author: Gabriel Teixeira (CBPF - Brazil)

**Contact:** gteixeira@cbpf.br</font>

<font size="2">

Please do not remove this disclaimer.</font>

# 1. Setup & Imports


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
import gc
from google.colab import drive



# 2. Load Preprocessed Dataset


In [None]:
# connect to your google drive
drive.mount('/content/drive')

data = np.load('drive/MyDrive/dataset_morph.npz')# load the dataset
x_data = data['x']
y_data = data['y']



# 3. Utilities


In [None]:
# 0 - Elliptical
# 1 - Spiral

In [None]:
# Cropping the image around the center
def center_crop(image, target_height=128, target_width=128):
    h, w = image.shape[:2]
    start_y = (h - target_height) // 2
    start_x = (w - target_width) // 2
    return image[start_y:start_y + target_height, start_x:start_x + target_width]

img = x_data[0]
img_crop = center_crop(img)# np.array([center_crop(img) for img in x_data]) in case of multiple images at the same time
print(f"Original shape: {img.shape}")
print(f"Cropped shape:  {img_crop.shape}")

plt.imshow(img)
plt.show()
plt.imshow(img_crop)

In [None]:
# Rotations and Flips

# Rotate 180 degrees
def rotate_180(image):
    return np.rot90(image, k=2)

# Flip vertically (up-down)
def flip_vertical(image):
    return np.flipud(image)

# Flip horizontally (left-right)
def flip_horizontal(image):
    return np.fliplr(image)

# Let's assume 'img' is a NumPy array (H, W, C)
img = x_data[np.random.randint(len(x_data))]  # example image

rotated = rotate_180(img)
vflip = flip_vertical(img)
hflip = flip_horizontal(img)

# Plotting
fig, axs = plt.subplots(1, 4, figsize=(12, 3))
axs[0].imshow(img)
axs[0].set_title("Original")
axs[1].imshow(rotated)
axs[1].set_title("Rotated 180°")
axs[2].imshow(vflip)
axs[2].set_title("Vertical Flip")
axs[3].imshow(hflip)
axs[3].set_title("Horizontal Flip")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()


# Preparing the training, test and validation datasets for DL

In [None]:
from sklearn.model_selection import train_test_split

test_percentual = 0.2
x_train, x_test, y_train, y_test = train_test_split(
    x_data, y_data,           # your full dataset
    test_size=test_percentual,# % for testing
    random_state=42,          # for reproducibility
    shuffle=True              # shuffle before splitting
)

val_percentual = 0.1
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train,          # your train dataset
    test_size=val_percentual,  # % of the train data for validation
    random_state=42,           # for reproducibility
    shuffle=True               # shuffle before splitting
)

# Defining our Neural Network

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

num_classes = len(np.unique(y_train))

# Modelo CNN simples
model = models.Sequential([
    layers.Input(shape=x_train.shape[1:]),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.Dropout(0.3),
    layers.MaxPooling2D(),

    layers.Conv2D(16, (3, 3), activation='relu'),
    layers.Dropout(0.3),
    layers.MaxPooling2D(),

    layers.Flatten(),
    layers.Dense(8, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # score between 0 and 1
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
# Treinar
history = model.fit(x_train, y_train,
                    validation_data=(x_val, y_val),
                    epochs=20,
                    batch_size=32)


# Loss Curve

In [None]:

plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()
plt.grid(True)
plt.show()


# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
y_score = model.predict(x_test).ravel()  # 1D array of probabilities

# Convert probabilities to class labels using threshold
y_pred_labels = (y_score >= 0.5).astype(int)

# Create and display confusion matrix
cm = confusion_matrix(y_test, y_pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.title("Confusion Matrix (Sigmoid Output)")
plt.grid(False)
plt.show()


# ROC Curve

In [None]:
from sklearn.metrics import roc_curve, auc

# Predict probabilities for the validation set
y_score = model.predict(x_val).ravel()  # 1D vector with probabilities

# Compute False Positive Rate (FPR), True Positive Rate (TPR), and thresholds
fpr, tpr, _ = roc_curve(y_val, y_score)

# Compute Area Under the Curve (AUC)
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure(figsize=(6, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # diagonal line for random guessing
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Binary Classification')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()


# Questions
1) Are there any issues with your current model? If so, what are they?

2) If there are issues, what could you do to address them?

# Task
Improve the quality of your model based on your observations.
