In [9]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [10]:
df = df_binary_labeled

In [11]:
X = df.drop(columns=['label']).values
y = df['label'].values

In [12]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)


In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"Training set shape: {X_train_scaled.shape}")
print(f"Validation set shape: {X_val_scaled.shape}")
print(f"Test set shape: {X_test_scaled.shape}")
print(f"Class distribution - Training: {np.bincount(y_train.astype(int))}")

Training set shape: (906, 15)
Validation set shape: (302, 15)
Test set shape: (302, 15)
Class distribution - Training: [262 644]


In [14]:
model = keras.Sequential([
    # Reshape input for CNN (treating features as 1D sequence)
    layers.Reshape((X_train_scaled.shape[1], 1), input_shape=(X_train_scaled.shape[1],)),

    # First convolutional block
    layers.Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),
    layers.Dropout(0.2),

    # Second convolutional block
    layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),
    layers.Dropout(0.3),

    # Third convolutional block
    layers.Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.GlobalMaxPooling1D(),
    layers.Dropout(0.4),

    # Dense layers
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.3),

    # Output layer for binary classification
    layers.Dense(1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [15]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

In [16]:
model.summary()


In [17]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=0.0001
)

In [18]:
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=40,
    batch_size=64,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

Epoch 1/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - accuracy: 0.4428 - loss: 1.1309 - precision: 0.7170 - recall: 0.3300 - val_accuracy: 0.7682 - val_loss: 0.6110 - val_precision: 0.8000 - val_recall: 0.8972 - learning_rate: 0.0010
Epoch 2/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7428 - loss: 0.6262 - precision: 0.8808 - recall: 0.7332 - val_accuracy: 0.7715 - val_loss: 0.5152 - val_precision: 0.7599 - val_recall: 0.9907 - learning_rate: 0.0010
Epoch 3/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7695 - loss: 0.5079 - precision: 0.8596 - recall: 0.8096 - val_accuracy: 0.7715 - val_loss: 0.4937 - val_precision: 0.7599 - val_recall: 0.9907 - learning_rate: 0.0010
Epoch 4/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7909 - loss: 0.5047 - precision: 0.8353 - recall: 0.8623 - val_accuracy: 0.7715 - val_loss: 0.486

In [19]:
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(X_test_scaled, y_test, verbose=0)

print(f"\nTest Results:")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {2 * (test_precision * test_recall) / (test_precision + test_recall):.4f}")


Test Results:
Test Accuracy: 0.9073
Test Precision: 0.9043
Test Recall: 0.9720
Test F1-Score: 0.9369


In [27]:
y_pred_proba = model.predict(X_test_scaled)
y_pred = (y_pred_proba > 0.5).astype(int)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


In [29]:
model.save('/Users/mansahaj/Downloads/exoplanet_cnn_model.h5')
print("\nModel saved successfully!")




Model saved successfully!
