In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/galaxy-zoo-the-galaxy-challenge/all_ones_benchmark.zip
/kaggle/input/galaxy-zoo-the-galaxy-challenge/images_test_rev1.zip
/kaggle/input/galaxy-zoo-the-galaxy-challenge/central_pixel_benchmark.zip
/kaggle/input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip
/kaggle/input/galaxy-zoo-the-galaxy-challenge/all_zeros_benchmark.zip
/kaggle/input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip


In [2]:
import pandas as pd
import numpy as np
import zipfile
import os
import cv2

In [19]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2,InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam


In [40]:
import random
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ["TF_DETERMINISTIC_OPS"] = "1"

In [41]:
with zipfile.ZipFile("../input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip", 'r') as zip_ref:
    zip_ref.extractall()

In [42]:
data = pd.read_csv("training_solutions_rev1.csv")
data["GalaxyID"] = data["GalaxyID"].astype(str) + ".jpg"

In [43]:
new_data = data[((data["Class1.1"] >= .8) | (data["Class1.2"] >= .8))].reset_index(drop=True)
new_data.drop_duplicates(inplace=True)


In [44]:
conditions = [new_data["Class1.1"] >= 0.8, new_data["Class1.2"] >= 0.8]
choices = ["elliptical", "spiral"]
new_data["class"] = np.select(conditions, choices, default="Tie")


In [45]:
df_spiral = new_data[new_data["class"] == "spiral"].sample(n=8000, random_state=42)
df_elliptical = new_data[new_data["class"] == "elliptical"].sample(n=8000, random_state=42)
frames = [df_spiral, df_elliptical]
df_galaxies = pd.concat(frames)[["GalaxyID", "class"]]


In [46]:
with zipfile.ZipFile("../input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip", 'r') as zip_ref:
    zip_ref.extractall()

In [47]:
datagen = ImageDataGenerator(rescale=1./255., validation_split=0.2)


In [48]:
train_generator = datagen.flow_from_dataframe(
    dataframe=df_galaxies,
    target_size=(224, 224),
    directory="./images_training_rev1",
    batch_size=32,
    x_col="GalaxyID",
    y_col="class",
    color_mode="rgb",
    class_mode="categorical",
    subset="training",
    shuffle=True,
    seed=42
)

Found 12800 validated image filenames belonging to 2 classes.


In [49]:
valid_generator = datagen.flow_from_dataframe(
    dataframe=df_galaxies,
    target_size=(224, 224),
    directory="./images_training_rev1",
    batch_size=32,
    x_col="GalaxyID",
    y_col="class",
    color_mode="rgb",
    class_mode="categorical",
    subset="validation",
    shuffle=True,
    seed=42
)

Found 3200 validated image filenames belonging to 2 classes.


In [50]:
def evaluate_pretrained_model(base_model):
    # Freeze layers
    for layer in base_model.layers:
        layer.trainable = False

    # Add classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(2, activation='softmax')(x)

    # Build and compile model
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

    # Evaluate on validation (used as test split)
    loss, accuracy = model.evaluate(valid_generator)
    return accuracy

In [53]:
print(" Evaluating MobileNetV2 ...")
mobilenet_base = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
acc_mobilenet = evaluate_pretrained_model(mobilenet_base)
print(f" - MobileNetV2 Accuracy: {acc_mobilenet:.4f}")


 Evaluating MobileNetV2 ...
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 55ms/step - accuracy: 0.5659 - loss: 0.6976
 - MobileNetV2 Accuracy: 0.5775


In [54]:
print(" Evaluating InceptionV3...")
inception_base = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
acc_inception = evaluate_pretrained_model(inception_base)
print(f"  - InceptionV3 Accuracy: {acc_inception:.4f}")

 Evaluating InceptionV3...
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 54ms/step - accuracy: 0.0168 - loss: 1.6494
  - InceptionV3 Accuracy: 0.0137
