In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from lightkurve import LightCurve
from torch.utils.data import Dataset, DataLoader
import json

# ---------- CONFIGURACIÓN GENERAL ----------
DATA_DIR = "../data"
MODEL_PATH = "../models/transit_cnn.pt"
META_PATH = "../models/transit_cnn_meta.json"
INPUT_LENGTH = 2000  # longitud fija para todas las curvas
EPOCHS = 20
BATCH_SIZE = 4
LEARNING_RATE = 0.001

# ---------- FUNCIONES DE PREPROCESAMIENTO ----------

def load_and_preprocess(filepath):
    df = pd.read_csv(filepath)
    lc = LightCurve(time=df["time"], flux=df["flux"])
    flattened = lc.flatten(window_length=401)
    flux = flattened.flux.value

    # Normalización de longitud
    if len(flux) < INPUT_LENGTH:
        flux = np.pad(flux, (0, INPUT_LENGTH - len(flux)), mode="constant", constant_values=0)
    else:
        flux = flux[:INPUT_LENGTH]

    return flux.astype(np.float32)

# ---------- DATASET CUSTOM ----------

class ExoplanetDataset(Dataset):
    def __init__(self, filepaths, labels):
        self.filepaths = filepaths
        self.labels = labels

    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        flux = load_and_preprocess(self.filepaths[idx])
        flux_tensor = torch.tensor(flux).unsqueeze(0)  # (1, length)
        label = torch.tensor([self.labels[idx]], dtype=torch.float32)
        return flux_tensor, label

# ---------- MODELO CNN ----------

class TransitCNN(nn.Module):
    def __init__(self, input_length):
        super(TransitCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 8, kernel_size=5)
        self.pool = nn.MaxPool1d(2)
        conv_out_size = (input_length - 4) // 2
        self.fc1 = nn.Linear(8 * conv_out_size, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# ---------- CARGA DE DATOS ----------
# Simulación de ejemplos: 1 positivo, 1 negativo
files = [
    os.path.join(DATA_DIR, "TIC_307210830.csv"),  # supuesto positivo
]
labels = [1, 0]

dataset = ExoplanetDataset(files, labels)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# ---------- ENTRENAMIENTO ----------
model = TransitCNN(INPUT_LENGTH)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(EPOCHS):
    running_loss = 0.0
    for inputs, targets in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {running_loss/len(dataloader):.4f}")

# ---------- GUARDADO ----------
os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), MODEL_PATH)
with open(META_PATH, "w") as f:
    json.dump({"input_length": INPUT_LENGTH}, f)

print(f"Modelo guardado en: {MODEL_PATH}")
print(f"Metadatos guardados en: {META_PATH}")


Epoch 1/20 - Loss: 0.6738
Epoch 2/20 - Loss: 0.0798
Epoch 3/20 - Loss: 0.0129
Epoch 4/20 - Loss: 0.0027
Epoch 5/20 - Loss: 0.0007
Epoch 6/20 - Loss: 0.0002
Epoch 7/20 - Loss: 0.0001
Epoch 8/20 - Loss: 0.0000
Epoch 9/20 - Loss: 0.0000
Epoch 10/20 - Loss: 0.0000
Epoch 11/20 - Loss: 0.0000
Epoch 12/20 - Loss: 0.0000
Epoch 13/20 - Loss: 0.0000
Epoch 14/20 - Loss: 0.0000
Epoch 15/20 - Loss: 0.0000
Epoch 16/20 - Loss: 0.0000
Epoch 17/20 - Loss: 0.0000
Epoch 18/20 - Loss: 0.0000
Epoch 19/20 - Loss: 0.0000
Epoch 20/20 - Loss: 0.0000
Modelo guardado en: ../models/transit_cnn.pt
Metadatos guardados en: ../models/transit_cnn_meta.json
