In [19]:
# ===============================================================
# 00 - Setup inicial del proyecto
# Proyecto: Psych Phenotyping Paraguay üáµüáæ
# Autor: Manuel N√∫√±ez
# Maestr√≠a en Ciencia de Datos - Universidad Comunera (UCOM)
# 2025
# ===============================================================

import os, sys, pathlib, yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ---------------------------------------------------------------
# 1. Detecci√≥n de entorno
# ---------------------------------------------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

print(f"üì¶ Entorno detectado: {'Google Colab' if IN_COLAB else 'Local'}")


üì¶ Entorno detectado: Local


In [20]:
# ---------------------------------------------------------------
# 2. Configuraci√≥n de rutas (local / Colab)
# ---------------------------------------------------------------

BASE_PATH = pathlib.Path.cwd()
if BASE_PATH.name == "notebooks":
    BASE_PATH = BASE_PATH.parent

DATA_PATH = BASE_PATH / "data"
FORK_PATH = BASE_PATH / "Spanish_Psych_Phenotyping_PY"
LEX_PATH  = FORK_PATH / "assets" / "lexicons" / "depression_anxiety"
CONFIG_PATH_MAIN = BASE_PATH / "configs" / "config_PY.yml"
CONFIG_PATH_FORK = FORK_PATH / "config_PY.yml"

# En Colab ‚Üí permitir Drive opcionalmente
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    DRIVE_DATA_PATH = pathlib.Path("/content/drive/MyDrive/psych-data")
    if DRIVE_DATA_PATH.exists():
        DATA_PATH = DRIVE_DATA_PATH
        print("üìÅ Usando datos desde Google Drive.")

print(f"BASE_PATH : {BASE_PATH}")
print(f"DATA_PATH : {DATA_PATH}")
print(f"FORK_PATH : {FORK_PATH}")


BASE_PATH : /Users/manuelnunez/Projects/psych-phenotyping-paraguay
DATA_PATH : /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data
FORK_PATH : /Users/manuelnunez/Projects/psych-phenotyping-paraguay/Spanish_Psych_Phenotyping_PY


In [21]:
# ---------------------------------------------------------------
# 3. Verificaci√≥n de estructura de proyecto
# ---------------------------------------------------------------
required_dirs = [DATA_PATH, FORK_PATH, BASE_PATH / "notebooks", BASE_PATH / "configs"]
for d in required_dirs:
    print(f"‚úÖ Carpeta detectada: {d}") if d.exists() else print(f"‚ö†Ô∏è Falta carpeta: {d}")


‚úÖ Carpeta detectada: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data
‚úÖ Carpeta detectada: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/Spanish_Psych_Phenotyping_PY
‚úÖ Carpeta detectada: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/notebooks
‚úÖ Carpeta detectada: /Users/manuelnunez/Projects/psych-phenotyping-paraguay/configs


In [22]:
# ---------------------------------------------------------------
# 4. Carga de configuraci√≥n (config_PY.yml)
# ---------------------------------------------------------------
config = None

if CONFIG_PATH_MAIN.exists():
    with open(CONFIG_PATH_MAIN, "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)
    print(f"‚úÖ Configuraci√≥n cargada desde {CONFIG_PATH_MAIN}")
elif CONFIG_PATH_FORK.exists():
    with open(CONFIG_PATH_FORK, "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)
    print(f"‚úÖ Configuraci√≥n cargada desde {CONFIG_PATH_FORK}")
else:
    print("‚ö†Ô∏è No se encontr√≥ config_PY.yml ni en /configs ni en el fork.")

if config:
    print("   Claves:", list(config.keys()))


‚úÖ Configuraci√≥n cargada desde /Users/manuelnunez/Projects/psych-phenotyping-paraguay/configs/config_PY.yml
   Claves: ['text_column', 'lexicon_path', 'context_rules_path', 'output_dir']


In [23]:
# ---------------------------------------------------------------
# 5. Carga del dataset limpio (ips_clean.csv)
# ---------------------------------------------------------------
CLEAN_FILE = DATA_PATH / "ips_clean.csv"

if CLEAN_FILE.exists():
    df = pd.read_csv(CLEAN_FILE)
    print(f"‚úÖ Dataset cargado: {CLEAN_FILE.name}")
    print(f"   Filas: {df.shape[0]} | Columnas: {df.shape[1]}")
else:
    print("‚ö†Ô∏è No se encontr√≥ ips_clean.csv en /data.")
    print("   Ejecuta 01_eda_preprocessing.ipynb para generarlo primero.")

if 'etiqueta' in df.columns and 'texto' in df.columns:
    print("\nDistribuci√≥n de clases:")
    print(df['etiqueta'].value_counts())


‚úÖ Dataset cargado: ips_clean.csv
   Filas: 3126 | Columnas: 4

Distribuci√≥n de clases:
etiqueta
depresion    2201
ansiedad      925
Name: count, dtype: int64


In [24]:
# ---------------------------------------------------------------
# 6. Verificaci√≥n final de entorno
# ---------------------------------------------------------------
print("\nüìò Configuraci√≥n activa:")
print(f"   Entorno   : {'Google Colab' if IN_COLAB else 'Local'}")
print(f"   Base path : {BASE_PATH}")
print(f"   Data path : {DATA_PATH}")
print(f"   Fork path : {FORK_PATH}")
print(f"   Dataset   : {'ips_clean.csv' if CLEAN_FILE.exists() else '‚ö†Ô∏è No encontrado'}")

print("\n‚úÖ Setup completado correctamente.")



üìò Configuraci√≥n activa:
   Entorno   : Local
   Base path : /Users/manuelnunez/Projects/psych-phenotyping-paraguay
   Data path : /Users/manuelnunez/Projects/psych-phenotyping-paraguay/data
   Fork path : /Users/manuelnunez/Projects/psych-phenotyping-paraguay/Spanish_Psych_Phenotyping_PY
   Dataset   : ips_clean.csv

‚úÖ Setup completado correctamente.
