In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Load and clean data
df = pd.read_csv("../data/clean/synthetic_training_data.csv")
df = df[df["score"].isin([0, 1])]  # asegurar binariedad
df.fillna(df.mean(numeric_only=True), inplace=True)  # rellenar NaN con la media

X = df[[
    "budget_weight", "safety_weight", "youth_weight", "centrality_weight",
    "situation_student", "situation_young_professional", "situation_professional", "situation_other",
    "stay_short_term", "stay_medium_term", "stay_long_term",
    "norm_rent", "norm_crime", "norm_youth", "norm_centrality"
]].values.astype(np.float32)


y = df["score"].values.astype(np.float32).reshape(-1, 1)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Dataset definition
class ScoreDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X)
        self.y = torch.tensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(ScoreDataset(X_train, y_train), batch_size=16, shuffle=True)
val_loader = DataLoader(ScoreDataset(X_val, y_val), batch_size=16)

In [2]:
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

import sys
import os
sys.path.append(os.path.abspath(os.path.join("..")))
from models.model_architecture import ScoreModel

space = [
    Real(1e-4, 1e-1, name='lr', prior='log-uniform'),
    Integer(8, 64, name='hidden_dim_1'),
    Integer(4, 32, name='hidden_dim_2'),
]

@use_named_args(space)
def objective(**params):
    model = ScoreModel(
        hidden_dim_1=params['hidden_dim_1'],
        hidden_dim_2=params['hidden_dim_2']
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    loss_fn = nn.BCELoss()

    for epoch in range(20):
        model.train()
        for X_batch, y_batch in train_loader:
            pred = model(X_batch)
            loss = loss_fn(pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            pred = model(X_batch)
            val_loss += loss_fn(pred, y_batch).item()

    return val_loss

# Run optimization
results = gp_minimize(
    func=objective,
    dimensions=space,
    n_calls=20,
    random_state=42,
    verbose=True
)

print(f"\n✅ Best validation loss: {results.fun:.4f}")
print(f"Best parameters: lr={results.x[0]}, hidden_dim_1={results.x[1]}, hidden_dim_2={results.x[2]}")

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.1286
Function value obtained: 2.7309
Current minimum: 2.7309
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.5097
Function value obtained: 0.5515
Current minimum: 0.5515
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.5026
Function value obtained: 0.6440
Current minimum: 0.5515
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.4901
Function value obtained: 1.0589
Current minimum: 0.5515
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.4870
Function value obtained: 5.1749
Current minimum: 0.5515
Iteration No: 6 started. Evaluating

In [3]:
import torch
import json
import os
from datetime import datetime

def save_model_version(model, val_loss, hyperparameters, notes="", folder="../models", registry_file="model_registry.json"):
    # Ensure folder exists
    os.makedirs(folder, exist_ok=True)

    registry_path = os.path.join(folder, registry_file)

    # Load existing versions
    registry = load_model_registry(registry_path)
    version_number = len(registry) + 1
    version = f"v{version_number}"
    today = datetime.today().strftime("%Y-%m-%d")
    filename = f"score_model_{version}_{today}.pth"
    filepath = os.path.join(folder, filename)

    # Save model
    torch.save(model.state_dict(), filepath)
    print(f"✅ Model saved as {filename}")

    # Append new registry entry
    new_entry = {
        "version": version,
        "filename": filename,
        "date": today,
        "val_loss": float(val_loss),
        "hyperparameters": {k: float(v) if isinstance(v, (float, int)) else v for k, v in hyperparameters.items()},
        "notes": notes
    }

    registry.append(new_entry)

    # Write updated registry
    with open(registry_path, "w") as f:
        json.dump(registry, f, indent=2)
    print(f"📚 Registry updated: {registry_path}")

def load_model_registry(registry_path):
    if os.path.exists(registry_path):
        try:
            with open(registry_path, "r") as f:
                content = f.read().strip()
                return json.loads(content) if content else []
        except json.JSONDecodeError:
            print("Registry file is corrupt. Starting fresh.")
            return []
    return []


In [4]:
# Save final model after tuning
best_params = {
    "lr": results.x[0],
    "hidden_dim_1": results.x[1],
    "hidden_dim_2": results.x[2]
}

final_model = ScoreModel(
    hidden_dim_1=best_params['hidden_dim_1'],
    hidden_dim_2=best_params['hidden_dim_2']
)
optimizer = torch.optim.Adam(final_model.parameters(), lr=best_params["lr"])
loss_fn = nn.BCELoss()

# Retrain full model
for epoch in range(30):
    final_model.train()
    for X_batch, y_batch in train_loader:
        pred = final_model(X_batch)
        loss = loss_fn(pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Evaluate final val loss
final_model.eval()
val_loss = 0
with torch.no_grad():
    for X_batch, y_batch in val_loader:
        pred = final_model(X_batch)
        val_loss += loss_fn(pred, y_batch).item()

# Save with metadata
save_model_version(
    final_model,
    val_loss=val_loss,
    hyperparameters={
        "lr": float(best_params["lr"]),
        "hidden_dim_1": int(best_params["hidden_dim_1"]),
        "hidden_dim_2": int(best_params["hidden_dim_2"])
    },
    notes="-"
)


✅ Model saved as score_model_v2_2025-04-14.pth
📚 Registry updated: ../models/model_registry.json
