In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Function to extract features from the largest white blob in the image
def extract_features(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply binary thresholding (black & white)
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    
    # Find contours of all blobs
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if len(contours) == 0:
        return [0, 0, 0]  # If no contour is detected, return zeroed features
    
    # Find the largest contour
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Bounding box of the largest blob
    x, y, w, h = cv2.boundingRect(largest_contour)
    
    # Aspect ratio of the largest blob
    aspect_ratio = float(w) / h
    
    # Create a mask for the largest blob
    mask = np.zeros_like(binary)
    cv2.drawContours(mask, [largest_contour], -1, 255, thickness=cv2.FILLED)
    
    # Calculate white and black pixel percentages
    white_pixels = np.sum(mask == 255)
    total_pixels = mask.size
    black_pixels = total_pixels - white_pixels
    
    white_pixels = np.sum(mask == 255)
    total_pixels = mask.size
    white_percentage = (white_pixels / total_pixels) * 100
    
    return [aspect_ratio, 100 - white_percentage, white_percentage]

In [None]:
# Function to detect largest white blob, draw a bounding box, and classify
def predict_and_visualize(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Extract features from the image for classification
    features = extract_features(image)
    
    # Standardize the features (using the same scaler from training)
    features_scaled = scaler.transform([features])
    
    # Predict the class (returns an index: 0, 1, or 2)
    prediction = model.predict(features_scaled)
    
    # Map index to class name
    predicted_class = categories[prediction[0]]
    
    # Convert to grayscale and apply binary threshold for visualization
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    
    # Find contours and get the largest contour
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) > 0:
        largest_contour = max(contours, key=cv2.contourArea)
        # Draw bounding box around the largest blob
        x, y, w, h = cv2.boundingRect(largest_contour)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    # Display the result with bounding box and predicted class
    plt.figure(figsize=(6, 6))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title(f"Predicted Class: {predicted_class}")
    plt.axis('off')
    plt.show()


In [None]:
# Path to the folders containing the galaxy images
data_dir = "/kaggle/input/galaxies/DataSet"
categories = ["Circular", "Elliptical", "Others"]

# Prepare dataset
X = []
y = []

for category in categories:
    folder_path = os.path.join(data_dir, category)
    label = categories.index(category)  # Assigning numeric label: 0, 1, 2
    
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        image = cv2.imread(img_path)
        
        # Extract features and append to dataset
        features = extract_features(image)
        X.append(features)
        y.append(label)

In [None]:
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ANN Model (Single hidden layer with 10 neurons)
model = MLPClassifier(hidden_layer_sizes=(50,), activation='relu', max_iter=1000, learning_rate_init= 0.001)

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Example usage for a new galaxy image
image_path = "/kaggle/input/galaxies/DataSet/Elliptical/100623.jpg"  # Replace with actual path
predict_and_visualize(image_path)


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'hidden_layer_sizes': [(50,), (50, 30), (100,)],
    'activation': ['logistic', 'relu'],
    'learning_rate_init': [0.001, 0.0005]
}

grid_search = GridSearchCV(MLPClassifier(max_iter=1000), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print(f"Best Parameters: {grid_search.best_params_}")
