In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

BATCH_DIR = "/content/drive/MyDrive/planet_batches"
OUTPUT_DIR = "/content/drive/MyDrive/exoplanet_data"

BATCH_0 = os.path.join(BATCH_DIR, "batch_0.npz")
BATCH_1 = os.path.join(BATCH_DIR, "batch_1.npz")

KEY = "X"

TRAIN_RATIO = 0.8
RANDOM_STATE = 42

#make output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

#load batches
batch0 = np.load(BATCH_0)[KEY]
batch1 = np.load(BATCH_1)[KEY]

print("Batch 0 shape:", batch0.shape)
print("Batch 1 shape:", batch1.shape)

#combine data
X = np.concatenate([batch0, batch1], axis=0).astype(np.float32)
print("Combined shape:", X.shape)

#train/test split
X_train, X_test = train_test_split(
    X,
    test_size=1 - TRAIN_RATIO,
    random_state=RANDOM_STATE,
    shuffle=True
)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

#save outputs
np.save(os.path.join(OUTPUT_DIR, "planets_train.npy"), X_train)
np.save(os.path.join(OUTPUT_DIR, "planets_test.npy"), X_test)

print("Saved:")
print(" - planets_train.npy")
print(" - planets_test.npy")

Batch 0 shape: (4, 1)
Batch 1 shape: (4, 1)
Combined shape: (8, 1)
Train shape: (6, 1)
Test shape: (2, 1)
Saved:
 - planets_train.npy
 - planets_test.npy
