# 00_setup.ipynb - Configuración Inicial del Proyecto

**Propósito**:
- Validar estructura del proyecto
- Verificar dependencias instaladas
- Configurar paths usando `utils_shared.py`

**Cuándo ejecutar**:
- Primera vez que clonas el repositorio
- Después de cambios en la estructura de carpetas
- Para verificar que todo está correctamente instalado

In [1]:
import sys
import pathlib
from pathlib import Path

# Agregar directorio actual al path para importar utils_shared
current_dir = Path.cwd()
if current_dir.name == "notebooks":
    sys.path.append(str(current_dir.parent))
else:
    sys.path.append(str(current_dir))

try:
    from notebooks.utils_shared import setup_paths, validate_file_exists
except ImportError:
    # Fallback si se ejecuta desde notebooks/
    sys.path.append(str(current_dir))
    from utils_shared import setup_paths, validate_file_exists

# Configurar paths
paths = setup_paths()

print("[INFO] Paths configurados:")
for key, value in paths.items():
    print(f"  {key}: {value}")

[INFO] Paths configurados:
  BASE_PATH: /Users/manuelnunez/Projects/psych-phenotyping-paraguay
  DATA_PATH: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data
  FORK_PATH: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/Spanish_Psych_Phenotyping_PY
  SPLITS_PATH: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data/splits
  FIGS_PATH: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data/figs


In [2]:
# Verificación de dependencias clave
print("\n[INFO] Verificando dependencias...")

dependencies = [
    'pandas', 'numpy', 'scipy', 'sklearn', 'transformers', 'torch',
    'matplotlib', 'seaborn', 'nltk', 'spacy', 'tqdm'
]

missing = []
for lib in dependencies:
    try:
        __import__(lib)
        print(f"  [OK] {lib}")
    except ImportError:
        print(f"  [ERROR] {lib} (NO INSTALADO)")
        missing.append(lib)

if missing:
    print(f"\n[WARNING] Faltan dependencias: {', '.join(missing)}")
    print("  Instalar con: pip install -r requirements.txt")
else:
    print("\n[INFO] Todas las dependencias instaladas correctamente")


[INFO] Verificando dependencias...
  [OK] pandas
  [OK] numpy
  [OK] scipy
  [OK] sklearn


  from .autonotebook import tqdm as notebook_tqdm


  [OK] transformers
  [OK] torch
  [OK] matplotlib
  [OK] seaborn
  [OK] nltk
  [OK] spacy
  [OK] tqdm

[INFO] Todas las dependencias instaladas correctamente


In [3]:
# Verificación de archivos críticos
print("\n[INFO] Verificando archivos de datos...")

files_to_check = [
    (paths['DATA_PATH'] / 'ips_raw.csv', "Archivo original del dataset"),
    (paths['SPLITS_PATH'] / 'dataset_base.csv', "Dataset base (generado por 02_create_splits)"),
    (paths['SPLITS_PATH'] / 'train_indices.csv', "Indices train (generado por 02_create_splits)"),
    (paths['SPLITS_PATH'] / 'dev_indices.csv', "Indices dev (generado por 02_create_splits)"),
    (paths['SPLITS_PATH'] / 'test_indices.csv', "Indices test (generado por 02_create_splits)")
]

for filepath, desc in files_to_check:
    try:
        validate_file_exists(filepath)
        print(f"  [OK] {filepath.name} ({desc})")
    except FileNotFoundError:
        print(f"  [WARNING] {filepath.name} NO ENCONTRADO ({desc})")


[INFO] Verificando archivos de datos...
  [OK] ips_raw.csv (Archivo original del dataset)
