In [4]:
cd ..

/home/jorge/DocumentsWLS/Data_Science_Projects/Geospatial-similarity-combining-TDA-LLM-agents-and-MLOps-workflows


In [5]:
import pandas as pd
import geopandas as gpd
import json

In [6]:
df = pd.read_parquet("data/processed/cultivos/evaluacion_cultivos.parquet")
suelo = gpd.read_parquet("data/processed/suelo/suelo_merged.parquet")
df_similarity = pd.read_parquet("flask_app/results/similarity_matrix.parquet")
df_confidence = pd.read_parquet("flask_app/results/confianza_matrix.parquet")

In [7]:
cols_a_conservar = suelo.columns[[0, 3, 4]]  # 1, 4, 5 (índices 0-based)
cols_a_conservar = cols_a_conservar.append(suelo.columns[119:]) 
suelo = suelo[cols_a_conservar].dropna()

In [8]:
comunes = df['CVEGEO'].isin(suelo['CVEGEO'])
df = df[comunes]
suelo = suelo[suelo['CVEGEO'].isin(df['CVEGEO'])]

In [9]:
print(suelo.shape)
print(df.shape)

(2101, 34)
(2101, 293)


In [10]:
import pandas as pd
import json

# Supongamos que tu DataFrame se llama df
# df = pd.read_csv("tu_archivo.csv") o ya lo tienes cargado

# Columnas fijas que quieres mantener siempre
fixed_columns = ["CVEGEO", "Idestado", "Idmunicipio"]

municipios_json = []

for _, row in df.iterrows():
    municipio_data = {col: row[col] for col in fixed_columns}
    
    # Solo agregar cultivos con valor > 0
    cultivos = {col: row[col] for col in df.columns if col not in fixed_columns and row[col] > 0}
    
    municipio_data["cultivos"] = cultivos
    municipios_json.append(municipio_data)
del municipio_data
# Convertimos a JSON
json_output_municipios = json.dumps(municipios_json, indent=2, ensure_ascii=False)

In [11]:
import pandas as pd
import json
from collections import defaultdict

# Columnas fijas
fixed_columns = ["CVEGEO", "Idestado", "Idmunicipio"]

# Diccionario para agrupar por cultivo
cultivos_dict = defaultdict(list)

for _, row in df.iterrows():
    for col in df.columns:
        if col not in fixed_columns and row[col] > 0:
            cultivos_dict[col].append({
                "CVEGEO": row["CVEGEO"],
                "Idestado": row["Idestado"],
                "Idmunicipio": row["Idmunicipio"],
                "valor": row[col]
            })

# Convertir a JSON
json_output_cultivos = json.dumps(cultivos_dict, indent=2, ensure_ascii=False)

In [None]:
import json
import numpy as np
# Suponiendo que json_output es un string JSON
municipios_data = json.loads(json_output_municipios)

In [None]:
json.load

In [9]:
def cultivos_comunes_mapeados(municipios_data, cvegeo1: str, cvegeo2: str) -> dict:
    """
    Devuelve los cultivos comunes entre dos municipios y sus valores, 
    usando un mapeo personalizado de valores a etiquetas.
    
    Args:
        municipios_data: lista de diccionarios de municipios.
        cvegeo1: CVEGEO del primer municipio.
        cvegeo2: CVEGEO del segundo municipio.
        mapeo: diccionario de valor numérico -> etiqueta.
    
    Returns:
        dict con cultivos comunes y valores mapeados.
    """
    mapeo = {
    1: 'Muy Malo',
    2: 'Malo',
    3: 'Regular',
    4: 'Bueno',
    5: 'Excelente'
    }
    # Buscar municipios
    m1 = next((m for m in municipios_data if m["CVEGEO"] == cvegeo1), None)
    m2 = next((m for m in municipios_data if m["CVEGEO"] == cvegeo2), None)
    
    if not m1 or not m2:
        return {"error": "Uno o ambos municipios no fueron encontrados."}
    
    cultivos1 = m1.get("cultivos", {})
    cultivos2 = m2.get("cultivos", {})
    
    comunes = set(cultivos1.keys()) & set(cultivos2.keys())
    
    if not comunes:
        return {"mensaje": f"No hay cultivos compartidos entre {cvegeo1} y {cvegeo2}."}
    
    resultado = {
        cultivo: {
            f"{cvegeo1}": f"{cultivos1[cultivo]} ({mapeo.get(cultivos1[cultivo], 'desconocido')})",
            f"{cvegeo2}": f"{cultivos2[cultivo]} ({mapeo.get(cultivos2[cultivo], 'desconocido')})",
            "diferencia": f"{abs(cultivos1[cultivo]-cultivos2[cultivo])}"
        }
        for cultivo in comunes
    }
    
    return resultado

def cultivos_comunes_df(municipios_data, cvegeo1: str, cvegeo2: str) -> pd.DataFrame:
    # Usamos tu función existente para obtener los cultivos comunes
    resultado = cultivos_comunes_mapeados(municipios_data, cvegeo1, cvegeo2)
    
    if "error" in resultado or "mensaje" in resultado:
        # Devuelve un DataFrame vacío si no hay resultados
        return pd.DataFrame()
    
    # Inicializar diccionario para filas
    data = {f"{cvegeo1}": {}, f"{cvegeo2}": {}, "diferencia": {}}
    
    for cultivo, valores in resultado.items():
        data[f"{cvegeo1}"][cultivo] = valores[f"{cvegeo1}"]
        data[f"{cvegeo2}"][cultivo] = valores[f"{cvegeo2}"]
        data["diferencia"][cultivo] =  np.float32(valores["diferencia"])
    
    # Crear DataFrame
    df = pd.DataFrame(data)
    
    return df

# Ejemplo de uso
df_comunes = cultivos_comunes_df(municipios_data, "01001", "01002")

In [10]:
df_comunes

Unnamed: 0,01001,01002,diferencia
Alfalfa,4.0 (Bueno),4.0 (Bueno),0.0
Nopalitos,2.0 (Malo),2.0 (Malo),0.0
Tuna,1.0 (Muy Malo),1.0 (Muy Malo),0.0
Maíz forrajero en verde,3.0 (Regular),3.0 (Regular),0.0
Brócoli,2.0 (Malo),2.0 (Malo),0.0
Chile verde,2.0 (Malo),1.0 (Muy Malo),1.0
Durazno,5.0 (Excelente),5.0 (Excelente),0.0
Frijol,1.0 (Muy Malo),4.0 (Bueno),3.0
Avena forrajera en verde,3.0 (Regular),3.0 (Regular),0.0
Maíz grano,2.0 (Malo),2.0 (Malo),0.0


In [11]:
def obtener_nombres(df: pd.DataFrame, cvegeo1: str, cvegeo2: str) -> dict:
    suelo_df = df[['CVEGEO', 'NOMGEO', 'NOM_ENT']].copy()
    """
    Devuelve los valores de suelo para dos municipios dados sus CVEGEO.

    Args:
        suelo_df: DataFrame con columnas de suelo y 'CVEGEO'.
        cvegeo1: CVEGEO del primer municipio.
        cvegeo2: CVEGEO del segundo municipio.

    Returns:
        dict con la estructura:
        {
            "01001": {columna1: valor, columna2: valor, ...},
            "01002": {columna1: valor, columna2: valor, ...}
        }
        Si un CVEGEO no se encuentra, devuelve None en su lugar.
    """
    resultados = {}
    for cvegeo in [cvegeo1, cvegeo2]:
        fila = suelo_df[suelo_df['CVEGEO'] == cvegeo]
        if not fila.empty:
            # Convertimos la fila a diccionario de columna -> valor
            resultados[cvegeo] = fila.iloc[0].to_dict()
        else:
            resultados[cvegeo] = None
    return resultados


In [12]:
valores_suelo = obtener_nombres(suelo, "01001", "01002")
print(valores_suelo)


{'01001': {'CVEGEO': '01001', 'NOMGEO': 'Aguascalientes', 'NOM_ENT': 'Aguascalientes'}, '01002': {'CVEGEO': '01002', 'NOMGEO': 'Asientos', 'NOM_ENT': 'Aguascalientes'}}


In [13]:
def rename_df(df: pd.DataFrame, dictionary: dict):
    rename_dict = {}
    for col in df.columns:
        if col in dictionary:
            nombre = dictionary[col]["NOMGEO"] + ', ' + dictionary[col]["NOM_ENT"]
            rename_dict[col] = nombre
        else:
            rename_dict[col] = col  # deja el nombre original si no está en el diccionario
    df_renamed = df.rename(columns=rename_dict)
    return df_renamed

In [14]:
rename_df(df_comunes, valores_suelo)

Unnamed: 0,"Aguascalientes, Aguascalientes","Asientos, Aguascalientes",diferencia
Alfalfa,4.0 (Bueno),4.0 (Bueno),0.0
Nopalitos,2.0 (Malo),2.0 (Malo),0.0
Tuna,1.0 (Muy Malo),1.0 (Muy Malo),0.0
Maíz forrajero en verde,3.0 (Regular),3.0 (Regular),0.0
Brócoli,2.0 (Malo),2.0 (Malo),0.0
Chile verde,2.0 (Malo),1.0 (Muy Malo),1.0
Durazno,5.0 (Excelente),5.0 (Excelente),0.0
Frijol,1.0 (Muy Malo),4.0 (Bueno),3.0
Avena forrajera en verde,3.0 (Regular),3.0 (Regular),0.0
Maíz grano,2.0 (Malo),2.0 (Malo),0.0


# Si le preguntan sobre 01001 y 01002

In [321]:
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import InMemorySaver
from langchain_openai import ChatOpenAI
from langchain.agents import create_agent
from langchain.tools import tool

In [322]:
import json
import numpy as np
# Suponiendo que json_output es un string JSON
municipios_data = json.loads(json_output_municipios)
cultivo_data = json.loads(json_output_cultivos)

In [417]:
cultivo_data

{'Avena forrajera en verde': [{'CVEGEO': '01001',
   'Idestado': 1,
   'Idmunicipio': 1,
   'valor': 3.0},
  {'CVEGEO': '01002', 'Idestado': 1, 'Idmunicipio': 2, 'valor': 3.0},
  {'CVEGEO': '01004', 'Idestado': 1, 'Idmunicipio': 4, 'valor': 3.0},
  {'CVEGEO': '01005', 'Idestado': 1, 'Idmunicipio': 5, 'valor': 3.0},
  {'CVEGEO': '01006', 'Idestado': 1, 'Idmunicipio': 6, 'valor': 3.0},
  {'CVEGEO': '01007', 'Idestado': 1, 'Idmunicipio': 7, 'valor': 2.0},
  {'CVEGEO': '01008', 'Idestado': 1, 'Idmunicipio': 8, 'valor': 1.0},
  {'CVEGEO': '01009', 'Idestado': 1, 'Idmunicipio': 9, 'valor': 2.0},
  {'CVEGEO': '01010', 'Idestado': 1, 'Idmunicipio': 10, 'valor': 2.0},
  {'CVEGEO': '02001', 'Idestado': 2, 'Idmunicipio': 1, 'valor': 1.0},
  {'CVEGEO': '02002', 'Idestado': 2, 'Idmunicipio': 2, 'valor': 5.0},
  {'CVEGEO': '02003', 'Idestado': 2, 'Idmunicipio': 3, 'valor': 1.0},
  {'CVEGEO': '02004', 'Idestado': 2, 'Idmunicipio': 4, 'valor': 1.0},
  {'CVEGEO': '02005', 'Idestado': 2, 'Idmunicipio': 

In [323]:
columns_for_agent = [
    "CVEGEO", "NOMGEO", "NOM_ENT",
    "ALTITUD",
    "PH", "CE", "CO", "CIC", "SB", "SNA",
    "K", "CA", "NA", "MG", "CACO3", "CASO4",
    "DREN_EXT", "DREN_INT",
    "R", "L", "A"
]
suelo = suelo[columns_for_agent]

In [406]:
suelo

Unnamed: 0,CVEGEO,NOMGEO,NOM_ENT,ALTITUD,PH,CE,CO,CIC,SB,SNA,...,CA,NA,MG,CACO3,CASO4,DREN_EXT,DREN_INT,R,L,A
0,01008,San José de Gracia,Aguascalientes,2300.000000,6.087500,1.750000,0.762500,9.425000,81.824997,2.987500,...,7.262500,0.312500,4.412500,0.000000,0.0,4.500000,3.625000,19.750000,19.000000,48.750000
1,01009,Tepezalá,Aguascalientes,2189.333252,5.125000,0.716667,0.283333,10.166667,66.666664,3.900000,...,10.891666,0.491667,1.791667,0.000000,0.0,3.333333,3.333333,10.833333,14.833333,41.000000
2,01010,El Llano,Aguascalientes,2037.500000,5.425000,0.100000,0.675000,10.175000,71.074997,4.275000,...,4.900000,0.437500,1.450000,0.000000,0.0,2.500000,3.250000,16.625000,19.375000,51.500000
4,01001,Aguascalientes,Aguascalientes,2000.000000,3.100000,1.000000,0.600000,5.500000,45.900002,3.200000,...,3.200000,0.350000,0.850000,0.000000,0.0,4.000000,2.500000,4.000000,13.000000,33.000000
5,01002,Asientos,Aguascalientes,2029.666626,4.395555,0.532778,0.275000,7.955555,56.042221,4.400000,...,6.183333,0.553333,1.236667,0.000000,0.0,3.666667,2.966667,14.011111,11.088889,32.122223
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2469,32036,Ojocaliente,Zacatecas,2221.500000,3.016667,0.058333,0.658333,9.466666,39.083332,0.900000,...,12.633333,0.183333,0.816667,0.000000,0.0,3.500000,2.750000,10.666667,11.166667,19.833334
2470,32014,General Francisco R. Murguía,Zacatecas,1866.555542,6.842778,3.518889,0.594815,20.939444,80.762962,18.122963,...,27.555000,5.050926,2.313704,7.508889,0.0,3.222222,2.944444,24.785185,16.129629,41.492592
2472,32045,Tepechitlán,Zacatecas,1860.000000,5.366667,1.000000,0.400000,10.300000,87.566666,8.800000,...,3.766667,0.900000,5.666667,0.000000,0.0,3.000000,4.000000,24.000000,14.000000,62.000000
2473,32046,Tepetongo,Zacatecas,1950.000000,6.475000,2.000000,0.800000,32.549999,78.275002,1.625000,...,14.750000,0.500000,8.600000,0.000000,0.0,3.000000,3.250000,48.500000,28.500000,23.000000


## Dado un cultivo, dar sugerencias nuevas de donde plantar

In [324]:
def municipios_similares_iniciales(cultivos_dict, similarity, confidence, cultivo, min_valor=4, top_k=5, min_score=0.70):
    """
    Devuelve municipios similares distintos para un cultivo, ordenados por score descendente:
    - Solo originales con valor >= min_valor
    - Calcula score = similarity * confidence
    - Considera hasta top_k similares por cada original
    - Mantiene solo el mejor score por municipio similar
    - Indica a qué municipio original se parece
    - Excluye similares que ya tienen valor registrado (solo N/A)
    - Filtra los similares con score < min_score
    """
    if cultivo not in cultivos_dict:
         return {
        "originales": [],
        "similares_distintos": [],
        "mensaje": f"El cultivo '{cultivo}' no se encontró en los datos disponibles."
    }
    
    valores_cultivo = {m["CVEGEO"]: m["valor"] for m in cultivos_dict[cultivo]}

    # Filtrar originales válidos
    originales = [
        {"CVEGEO": m, "value": v}
        for m, v in valores_cultivo.items()
        if v >= min_valor and m in similarity.columns
    ]
    if not originales:
        return {
            "originales": [],
            "similares_distintos": [],
            "mensaje": (
                f"El cultivo '{cultivo}' existe en los datos, pero ningún municipio "
                f"cumple con el valor mínimo requerido (≥ {min_valor})."
            )
        }

    orig_cvegeos = [o["CVEGEO"] for o in originales]
    similares_map = {}

    for m_orig in orig_cvegeos:
        if m_orig not in similarity.index:
            continue

        # Calcular score = similarity * confidence
        sim_scores = (similarity.loc[m_orig] * confidence.loc[m_orig]).drop(m_orig)
        sim_scores_sorted = sim_scores.sort_values(ascending=False).head(top_k)

        for m_sim, score_val in sim_scores_sorted.items():
            sim_val = similarity.loc[m_orig, m_sim]
            conf_val = confidence.loc[m_orig, m_sim]
            valor_sim = valores_cultivo.get(m_sim, "N/A")

            # Solo mantener similares sin valor registrado (N/A)
            if valor_sim != "N/A":
                continue

            # Guardar solo el mejor score para cada similar
            if m_sim not in similares_map or score_val > similares_map[m_sim]["score"]:
                similares_map[m_sim] = {
                    "CVEGEO": m_sim,
                    "similarity": sim_val,
                    "confidence": conf_val,
                    "score": score_val,
                    "value": valor_sim,
                    "similar_to": m_orig
                }

    # Convertir a lista y filtrar por min_score
    similares_distintos = [
        v for v in similares_map.values() if v["score"] >= min_score *100
    ]

    # Ordenar por score descendente
    similares_distintos = sorted(similares_distintos, key=lambda x: x["score"], reverse=True)

    return {
        "originales": originales,
        "similares_distintos": similares_distintos
    }
def recomendar_municipios_por_cultivo_1(cultivos_dict, similarity, confidence, cultivo,
                                        suelo: pd.DataFrame, min_valor=4, top_k=3, min_score=0.70) -> dict:
    """
    Finds distinct municipalities with agro-environmental conditions suitable for a given crop.
    - Considers only original municipalities with value ≥ min_valor.
    - Computes a composite score = similarity * confidence.
    - Keeps only the best match per similar municipality and filters by min_score.
    - Excludes municipalities that already cultivate the crop (only "N/A" values are considered similar).
    - Adds soil feature vectors for both originals and similar municipalities, excluding the redundant 'CVEGEO' column.
    Focuses primarily on identifying **new potential areas for cultivation** while also listing
    **current producing municipalities** and how well they perform.
    """
    
    resultado_cultivo = municipios_similares_iniciales(cultivos_dict, similarity, confidence, cultivo, min_valor, top_k, min_score)

    # Función interna para obtener features de un solo municipio
    def obtener_features(cvegeo: str) -> dict:
        fila = suelo[suelo['CVEGEO'] == cvegeo]
        if fila.empty:
            return None
        features = fila.iloc[0].to_dict()
        features.pop("CVEGEO", None)  # Eliminar CVEGEO si existe
        return features

    # Agregar features a originales
    for orig in resultado_cultivo.get("originales", []):
        cve = orig["CVEGEO"]
        orig["features_suelo"] = obtener_features(cve)

    # Agregar features a similares
    for sim in resultado_cultivo.get("similares_distintos", []):
        cve = sim["CVEGEO"]
        sim["features_suelo"] = obtener_features(cve)

    return resultado_cultivo


In [325]:
recomendar_municipios_por_cultivo_1(cultivo_data, df_similarity, df_confidence, "Manzana", suelo, min_valor=4, min_score=0.7)

{'originales': [{'CVEGEO': '01002',
   'value': 5.0,
   'features_suelo': {'NOMGEO': 'Asientos',
    'NOM_ENT': 'Aguascalientes',
    'ALTITUD': 2029.6666259765625,
    'PH': 4.39555549621582,
    'CE': 0.5327777862548828,
    'CO': 0.2750000059604645,
    'CIC': 7.955555438995361,
    'SB': 56.04222106933594,
    'SNA': 4.400000095367432,
    'K': 1.2644444704055786,
    'CA': 6.183333396911621,
    'NA': 0.5533333420753479,
    'MG': 1.2366666793823242,
    'CACO3': 0.0,
    'CASO4': 0.0,
    'DREN_EXT': 3.6666667461395264,
    'DREN_INT': 2.9666666984558105,
    'R': 14.01111125946045,
    'L': 11.088889122009277,
    'A': 32.122222900390625}},
  {'CVEGEO': '08031',
   'value': 5.0,
   'features_suelo': {'NOMGEO': 'Guerrero',
    'NOM_ENT': 'Chihuahua',
    'ALTITUD': 2152.142822265625,
    'PH': 6.023928642272949,
    'CE': 1.1659523248672485,
    'CO': 0.6935714483261108,
    'CIC': 18.163095474243164,
    'SB': 71.96023559570312,
    'SNA': 3.081904649734497,
    'K': 0.605476200

## Dado un municipio, dar sugerencia de donde hay otro similar y que plantar en este

In [326]:
def top_cultivos_recomendados_iniciales(cvegeo, similarity, confidence, municipios_data, top_n=3, min_valor=4.0):
    """
    Devuelve:
    - original: CVEGEO dado
    - similares: lista de los top_n municipios más parecidos con similarity, confidence y score
    - cultivos_recomendados: lista de cultivos con valor en el municipio original >= min_valor
    """
    if cvegeo not in similarity.columns:
        return {"error": f"{cvegeo} no está en similarity columns"}
    
    # Calcular score
    sim_scores = similarity.loc[cvegeo] * confidence.loc[cvegeo]
    sim_scores[cvegeo] = -1  # no contarse a sí mismo
    
    # Obtener los top_n más similares
    top_similares_cvegeos = sim_scores.nlargest(top_n).index.tolist()
    
    # Construir lista de similares con similarity, confidence y score
    similares_list = []
    for sim_cvegeo in top_similares_cvegeos:
        sim_val = similarity.loc[cvegeo, sim_cvegeo]
        conf_val = confidence.loc[cvegeo, sim_cvegeo]
        score_val = sim_val * conf_val
        similares_list.append({
            "CVEGEO": sim_cvegeo,
            "similarity": sim_val,
            "confidence": conf_val,
            "score": score_val
        })
    
    # Obtener cultivos recomendados según el municipio original
    orig_data = next((m for m in municipios_data if m["CVEGEO"] == cvegeo), None)
    cultivos_recomendados = []
    if orig_data:
        for cultivo, valor in orig_data["cultivos"].items():
            if valor >= min_valor:
                cultivos_recomendados.append({"cultivo": cultivo, "value_original": valor})
    
    return {
        "original": cvegeo,
        "similares": similares_list,
        "cultivos_recomendados_inicialmente": cultivos_recomendados
    }

def recomendar_cultivos_por_municipio_1(cvegeo, similarity, confidence, municipios_data, suelo: pd.DataFrame, top_n=3, min_valor=4.0) -> dict:
    """
    Devuelve:
    - original: CVEGEO dado
    - similares: lista de los top_n municipios más parecidos con similarity, confidence y score
    - Para cada similar:
        * cultivos_nuevos: cultivos recomendados que no cultiva actualmente
        * cultivos_compartidos: cultivos comunes con valor_original, valor_similar y diferencia
    Añade los features de suelo al original y a los similares de un resultado de cultivo,
    eliminando la columna 'CVEGEO' de los features para evitar redundancia.
    """
    resultado_cultivo = top_cultivos_recomendados_iniciales(
        cvegeo, similarity, confidence, municipios_data, top_n, min_valor
    )

    # Función interna para obtener features de un solo municipio
    def obtener_features(cvegeo: str) -> dict:
        fila = suelo[suelo['CVEGEO'] == cvegeo]
        if fila.empty:
            return None
        features = fila.iloc[0].to_dict()
        features.pop("CVEGEO", None)  # Eliminar CVEGEO si existe
        return features

    # Agregar features al original
    if "original" in resultado_cultivo:
        cve = resultado_cultivo["original"]
        resultado_cultivo["features_suelo_original"] = obtener_features(cve)

    # Obtener cultivos del original
    orig_data = next((m for m in municipios_data if m["CVEGEO"] == cvegeo), None)
    cultivos_original = orig_data["cultivos"] if orig_data else {}

    # Agregar features y cultivos nuevos/compartidos a los similares
    for sim in resultado_cultivo.get("similares", []):
        cve = sim["CVEGEO"]
        sim["features_suelo"] = obtener_features(cve)

        # Buscar cultivos del similar
        sim_data = next((m for m in municipios_data if m["CVEGEO"] == cve), None)
        if sim_data:
            cultivos_similar = sim_data["cultivos"]
            cultivos_existentes = set(cultivos_similar.keys())

            cultivos_nuevos = []
            cultivos_compartidos = []

            for cultivo, valor_orig in cultivos_original.items():
                if valor_orig >= min_valor:
                    if cultivo in cultivos_existentes:
                        valor_sim = cultivos_similar[cultivo]
                        diferencia = float(valor_sim) - float(valor_orig)
                        cultivos_compartidos.append({
                            "cultivo": cultivo,
                            "value_original": valor_orig,
                            "value_similar": valor_sim,
                            "difference": abs(diferencia)
                        })
                    else:
                        cultivos_nuevos.append({
                            "cultivo": cultivo,
                            "value_original": valor_orig
                        })

            sim["cultivos_nuevos"] = cultivos_nuevos
            sim["cultivos_compartidos"] = cultivos_compartidos

    # Quitar lista intermedia del resultado
    resultado_cultivo.pop("cultivos_recomendados_inicialmente", None)

    return resultado_cultivo


In [327]:
recomendar_cultivos_por_municipio_1("01001", df_similarity, df_confidence, municipios_data, suelo, top_n=1, min_valor=4)

{'error': '01001 no está en similarity columns'}

In [328]:
def cultivos_comunes_mapeados(municipios_data, cvegeo1: str, cvegeo2: str) -> dict:
    """
    Returns the common crops between two municipalities and their values,
    using a custom mapping of values to labels.
    
    Args:
        cvegeo1: CVEGEO del primer municipio.
        cvegeo2: CVEGEO del segundo municipio.
    
    Returns:
        dict con cultivos comunes y valores mapeados.
    """
    mapeo = {
    1: 'Muy Malo',
    2: 'Malo',
    3: 'Regular',
    4: 'Bueno',
    5: 'Excelente'
    }
    # Buscar municipios
    m1 = next((m for m in municipios_data if m["CVEGEO"] == cvegeo1), None)
    m2 = next((m for m in municipios_data if m["CVEGEO"] == cvegeo2), None)
    
    if not m1 or not m2:
        return {"error": "Uno o ambos municipios no fueron encontrados."}
    
    cultivos1 = m1.get("cultivos", {})
    cultivos2 = m2.get("cultivos", {})
    
    comunes = set(cultivos1.keys()) & set(cultivos2.keys())
    
    if not comunes:
        return {"mensaje": f"No hay cultivos compartidos entre {cvegeo1} y {cvegeo2}."}
    
    resultado = {
        cultivo: {
            f"{cvegeo1}": f"{cultivos1[cultivo]} ({mapeo.get(cultivos1[cultivo], 'desconocido')})",
            f"{cvegeo2}": f"{cultivos2[cultivo]} ({mapeo.get(cultivos2[cultivo], 'desconocido')})",
            "diferencia": f"{abs(cultivos1[cultivo]-cultivos2[cultivo])}"
        }
        for cultivo in comunes
    }
    
    return resultado

def cultivos_comunes_dict_1(municipios_data, cvegeo1: str, cvegeo2: str) -> dict:
    """
    Devuelve un diccionario con los cultivos comunes entre dos municipios (cvegeo1 y cvegeo2),
    sus valores y la diferencia.

    Args:
        municipios_data: lista de diccionarios con información de los municipios
        cvegeo1: CVEGEO del primer municipio
        cvegeo2: CVEGEO del segundo municipio

    Returns:
        dict con estructura:
        {
            "cultivo1": {cvegeo1: valor1, cvegeo2: valor2, "diferencia": diff},
            "cultivo2": {...},
            ...
        }
    """
    # Usamos tu función existente para obtener los cultivos comunes
    resultado = cultivos_comunes_mapeados(municipios_data, cvegeo1, cvegeo2)
    
    if "error" in resultado or "mensaje" in resultado:
        return {}  # Devuelve dict vacío si no hay resultados

    data = {}
    for cultivo, valores in resultado.items():
        data[cultivo] = {
            cvegeo1: valores.get(cvegeo1, None),
            cvegeo2: valores.get(cvegeo2, None),
            "diferencia": np.float32(valores.get("diferencia", 0.0))
        }
    
    return data



In [329]:
cultivos_comunes_dict_1(municipios_data, "01001", "01002")

{'Maíz forrajero en verde': {'01001': '3.0 (Regular)',
  '01002': '3.0 (Regular)',
  'diferencia': 0.0},
 'Uva': {'01001': '3.0 (Regular)', '01002': '4.0 (Bueno)', 'diferencia': 1.0},
 'Tomate verde': {'01001': '2.0 (Malo)',
  '01002': '3.0 (Regular)',
  'diferencia': 1.0},
 'Alfalfa': {'01001': '4.0 (Bueno)',
  '01002': '4.0 (Bueno)',
  'diferencia': 0.0},
 'Frijol': {'01001': '1.0 (Muy Malo)',
  '01002': '4.0 (Bueno)',
  'diferencia': 3.0},
 'Durazno': {'01001': '5.0 (Excelente)',
  '01002': '5.0 (Excelente)',
  'diferencia': 0.0},
 'Avena forrajera en verde': {'01001': '3.0 (Regular)',
  '01002': '3.0 (Regular)',
  'diferencia': 0.0},
 'Brócoli': {'01001': '2.0 (Malo)', '01002': '2.0 (Malo)', 'diferencia': 0.0},
 'Nopalitos': {'01001': '2.0 (Malo)',
  '01002': '2.0 (Malo)',
  'diferencia': 0.0},
 'Pastos y praderas': {'01001': '2.0 (Malo)',
  '01002': '3.0 (Regular)',
  'diferencia': 1.0},
 'Maíz grano': {'01001': '2.0 (Malo)',
  '01002': '2.0 (Malo)',
  'diferencia': 0.0},
 'Tuna':

In [330]:
from langchain.tools import tool
@tool 
def recomendar_municipios_por_cultivo(cultivo: str, min_value: float = 4.0, top_k=3, min_score=0.70) -> dict:
    """
    Given a crop, identifies municipalities with similar agro-environmental conditions 
    suitable for its cultivation.  
    Focuses on finding **new potential areas (similares)** while also listing 
    **current producing municipalities (originales)** and how well they perform.  
    Uses a score = similarity*confidence and includes detailed soil feature vectors for each.

    Parameters:
      • cultivo — Crop name to analyze and find suitable municipalities for.  
      • min_value — Minimum cultivation value to consider a municipality as an “original” producer (default: 4.0).  
      • top_k — Number of top similar municipalities to include in the recommendation (default: 3).  
                 Helps prevent cases where no similar municipality meets the conditions.  
      • min_score — Minimum similarity-confidence score to keep a municipality as valid (default: 0.70).  
    """
    return recomendar_municipios_por_cultivo_1(cultivo_data, df_similarity, df_confidence, cultivo, suelo, min_value, top_k, min_score)

@tool
def recomendar_cultivos_por_municipio(cvegeo: str, top_n: int = 1, min_value: float = 4.0) -> dict:
    """
    Given a municipality (CVEGEO), identifies the most similar municipalities based on 
    similarity*confidence scores.  
    For each similar municipality, it provides:
      • **New crops** that are promising but not currently cultivated.  
      • **Shared crops** with their respective values in both municipalities and the difference.  
    Also includes detailed soil feature vectors for both the original and similar municipalities.

    Parameters:
      • cvegeo — Unique municipality identifier (CVEGEO) to use as the reference.  
      • top_n — Number of top similar municipalities to include in the analysis (default: 1).  
      • min_value — Minimum cultivation value required for a crop to be considered significant 
                    in the similarity and recommendation process (default: 4.0).  
    """
    return recomendar_cultivos_por_municipio_1(cvegeo, df_similarity, df_confidence, municipios_data, suelo, top_n, min_value)
@tool
def cultivos_comunes(cvegeo1: str, cvegeo2: str) -> dict:
    """
    Compares two municipalities (CVEGEO1 and CVEGEO2) and returns the crops they share in common,
    including their respective values and the performance difference.
    Numeric suitability values are mapped to qualitative labels (e.g., “Excelente”, “Regular”, “Malo”) for easier interpretation.
    """
    return cultivos_comunes_dict_1(municipios_data, cvegeo1, cvegeo2)

In [331]:
lista_cultivos = list(df.columns)
lista_cultivos = lista_cultivos[3:]

In [332]:
lista_municipios = list(df_similarity.columns)

In [333]:
df_filtrado = suelo[suelo["CVEGEO"].isin(lista_municipios)]

# Crear diccionario en el formato deseado
dict_cvegeo = {
    row["CVEGEO"]: f"{row['NOMGEO']}, {row['NOM_ENT']}"
    for _, row in df_filtrado.iterrows()
}

In [340]:
selector_prompt = (
    "You are an expert agronomy assistant specialized in crop suitability and soil analysis for Mexican municipalities. "
    "You have access to three analytical tools that use similarity, confidence, and soil characteristics:\n\n"
    "Important: Each crop has a 'value' score (1-5) indicating suitability, which can be mapped to qualitative levels "
    "such as: 5=Excelente, 4=Bueno, 3=Regular, 2=Malo, 1=Muy Malo, and 'N/A' meaning the crop is not currently cultivated "
    "or lacks recorded data in that municipality.\n\n"
    "1. recomendar_municipios_por_cultivo(cultivo, min_value=4.0, top_k=3, min_score=0.70):\n"
    "   Finds municipalities with similar agro-environmental conditions for a given crop.\n"
    "   - Returns current producers (originales) and potential new areas (similares_distintos) with soil features.\n"
    "   - Use results to argue where the crop would likely thrive and why, referencing soil, similarity, confidence, and score.\n\n"
    "2. recomendar_cultivos_por_municipio(cvegeo, top_n=1, min_value=4.0):\n"
    "   For a municipality, identifies top similar regions and recommends promising or shared crops.\n"
    "   - Use shared crops as a baseline and reason which new crops could succeed based on data and agronomic logic.\n"
    "   - You may propose additional crops if justified by soil or environmental similarity "
    "(take into account the average difference in shared crops).\n\n"
    "3. cultivos_comunes(cvegeo1, cvegeo2):\n"
    "   Compares two municipalities and lists shared crops with performance differences and qualitative ratings.\n"
    "   - When used, provide a concise summary of shared crops and what that implies about their conditions.\n\n"
    "Always select the most relevant tool, interpret the output critically, and craft conclusions that combine data-driven reasoning with expert judgment."
)

# Inicializa el LLM
selector_model = ChatOpenAI(
    model="gpt-5-nano",
    reasoning_effort='medium')

selector_agent = create_agent(
    model=selector_model,
    tools=[recomendar_municipios_por_cultivo, recomendar_cultivos_por_municipio, cultivos_comunes],
    system_prompt=selector_prompt
)

In [341]:
query = "Where is the best place to plant Manzana"

for step in selector_agent.stream(
    {"messages": [{"role": "user", "content": query}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()

Tool Calls:
  recomendar_municipios_por_cultivo (call_w4P6vnewAIO9dAnTCsOvjsAy)
 Call ID: call_w4P6vnewAIO9dAnTCsOvjsAy
  Args:
    cultivo: Manzana
    min_value: 4.0
    top_k: 3
    min_score: 0.7
Name: recomendar_municipios_por_cultivo

{'originales': [{'CVEGEO': '01002', 'value': 5.0, 'features_suelo': {'NOMGEO': 'Asientos', 'NOM_ENT': 'Aguascalientes', 'ALTITUD': 2029.6666259765625, 'PH': 4.39555549621582, 'CE': 0.5327777862548828, 'CO': 0.2750000059604645, 'CIC': 7.955555438995361, 'SB': 56.04222106933594, 'SNA': 4.400000095367432, 'K': 1.2644444704055786, 'CA': 6.183333396911621, 'NA': 0.5533333420753479, 'MG': 1.2366666793823242, 'CACO3': 0.0, 'CASO4': 0.0, 'DREN_EXT': 3.6666667461395264, 'DREN_INT': 2.9666666984558105, 'R': 14.01111125946045, 'L': 11.088889122009277, 'A': 32.122222900390625}}, {'CVEGEO': '08031', 'value': 5.0, 'features_suelo': {'NOMGEO': 'Guerrero', 'NOM_ENT': 'Chihuahua', 'ALTITUD': 2152.142822265625, 'PH': 6.023928642272949, 'CE': 1.1659523248672485, 'CO':

In [241]:
query = "What places are similar to 01002?"

for step in selector_agent.stream(
    {"messages": [{"role": "user", "content": query}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()

Tool Calls:
  recomendar_cultivos_por_municipio (call_UgsOHK9q7UAdd0H0hcaISTIU)
 Call ID: call_UgsOHK9q7UAdd0H0hcaISTIU
  Args:
    cvegeo: 01002
    top_n: 3
    min_value: 4.0
Name: recomendar_cultivos_por_municipio

{'original': '01002', 'similares': [{'CVEGEO': '16085', 'similarity': 0.8067035, 'confidence': 99.42035, 'score': 80.20274, 'features_suelo': {'NOMGEO': 'Tangancícuaro', 'NOM_ENT': 'Michoacán de Ocampo', 'ALTITUD': 2075.0, 'PH': 6.860000133514404, 'CE': 0.6000000238418579, 'CO': 0.8299999833106995, 'CIC': 41.84000015258789, 'SB': 65.9800033569336, 'SNA': 1.399999976158142, 'K': 0.8399999737739563, 'CA': 6.050000190734863, 'NA': 0.6000000238418579, 'MG': 5.840000152587891, 'CACO3': 0.0, 'CASO4': 0.0, 'DREN_EXT': 4.0, 'DREN_INT': 4.0, 'R': 26.0, 'L': 29.0, 'A': 45.0}, 'cultivos_nuevos': [{'cultivo': 'Frijol', 'value_original': 4.0}, {'cultivo': 'Alfalfa', 'value_original': 4.0}, {'cultivo': 'Durazno', 'value_original': 5.0}, {'cultivo': 'Uva', 'value_original': 4.0}, {'cul

### Agente que reescribe para evitar problemas

In [None]:
dict_cvegeo
lista_cultivos
lista_municipios

In [50]:
len(dict_cvegeo)

123

In [274]:
import re

def clean_text(text: str) -> str:
    """
    Normaliza un string:
    - Convierte a minúsculas
    - Quita símbolos innecesarios (excepto ? y !)
    - Elimina espacios múltiples y caracteres innecesarios
    """
    # Convertir a minúsculas
        
    # Quitar símbolos que no sean letras, números, espacios, ?, !
    text = re.sub(r"[^a-z0-9áéíóúüñ\s?!.,;:%+*'\"()\[\]-]", "", text, flags=re.IGNORECASE)
    
    # Reemplazar múltiples espacios por uno solo
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

In [356]:
rewriter_prompt = (
    "You are a preprocessing agent for normalizing user queries about crops and municipalities.\n"
    "You are given the full list of crops and the full dictionary of municipalities with CVEGEO:\n"
    f"Cultivos: {lista_cultivos}\n"
    f"Municipios: {dict_cvegeo}\n\n"
    "Step 1: Detect all words in the input that could be crops or municipalities, even if slightly misspelled.\n"
    "Step 2: For each candidate word:\n"
    "        - If it's a crop, select the closest match from the given crops list.\n"
    "        - If it's a municipality, select the closest match from the given municipios dictionary and use ONLY THE CVEGEO key.\n"
    "Step 3: If no REASONABLE match is found for a crop, use 'N/A'. If no reasonable match is found for a municipality, use 'N/A'.\n"
    "Step 4: Output JSON with keys: rephrased_text, original_text, note.\n"
    "Step 5: Keep the note short and concise, mentioning which DB part was used.\n\n"
    
    "Example input: 'Where is the best place to plant manzina in Aldma?'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'Where is the best place to plant manzina in Aldma?',\n"
    "  'rephrased_text': 'Where is the best place to plant Manzana in 08002?',\n"
    "  'note': 'crop corrected using crop list; municipality corrected using municipios dictionary'\n"
    "}\n\n"
    
    "Example input: 'Where can I plant Manzana?'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'Where can I plant Manzana?',\n"
    "  'rephrased_text': 'Where can I plant Manzana?',\n"
    "  'note': 'crop matched using crop list; municipality is not mentioned'\n"
    "}\n\n"
    
    "Example input: 'I want to grow xyzabc in Asientos'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'I want to grow xyzabc in Asientos',\n"
    "  'rephrased_text': 'I want to grow N/A in 01002',\n"
    "  'note': 'crop cannot be corrected using crop list; municipality corrected using municipios dictionary'\n"
    "}\n"
    "Example input: 'I want to grow manzina in shajshkh'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'I want to grow manzina in shajshkh',\n"
    "  'rephrased_text': 'I want to grow Manzana in N/A',\n"
    "  'note': 'crop corrected using crop list; municipality cannot be corrected using municipios dictionary'\n"
    "}\n"
    "EXAMPLES OF WRONG ANSWERS: ['Asientos (01002)', 'Asientos'], CORRECT ONE: '01002'"
)

# Inicializa el LLM
rewriter_model = ChatOpenAI(
    model="gpt-5-nano",
    reasoning_effort='low')


rewriter_agent = create_agent(
    model=rewriter_model,
    system_prompt=rewriter_prompt
)

In [353]:
query = "Tell me similar municipalities to asientos and what crops are the best to plant"
clean_text(query)

'Tell me similar municipalities to asientos and what crops are the best to plant'

In [354]:
query = clean_text(query)

for step in rewriter_agent.stream(
    {"messages": [{"role": "user", "content": query}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()


{
  "original_text": "Tell me similar municipalities to asientos and what crops are the best to plant",
  "rephrased_text": "Tell me similar municipalities to 01002 and what crops are the best to plant",
  "note": "municipality corrected using municipios dictionary; similarity data not available in DB; crops suggested from crop list"
}


In [345]:
query = "Where is the best place to plant manzina"
clean_text(query)

'Where is the best place to plant manzina'

In [346]:
query = clean_text(query)

for step in rewriter_agent.stream(
    {"messages": [{"role": "user", "content": query}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()


{
  "original_text": "Where is the best place to plant manzina",
  "rephrased_text": "Where is the best place to plant Manzana",
  "note": "crop corrected using crop list; municipality is not mentioned"
}


In [347]:
query = "Where is the best place to plant manzina in tgsaush"
clean_text(query)

'Where is the best place to plant manzina in tgsaush'

In [348]:
query = clean_text(query)

for step in rewriter_agent.stream(
    {"messages": [{"role": "user", "content": query}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()


{
  "original_text": "Where is the best place to plant manzina in tgsaush",
  "rephrased_text": "Where is the best place to plant Manzana in N/A",
  "note": "crop corrected using crop list; municipality cannot be corrected using municipios dictionary"
}


## adding now both rewrite and recommender

In [366]:
rewriter_prompt = (
    "You are a preprocessing agent for normalizing user queries about crops and municipalities.\n"
    "You are given the full list of crops and the full dictionary of municipalities with CVEGEO:\n"
    f"Cultivos: {lista_cultivos}\n"
    f"Municipios: {dict_cvegeo}\n\n"
    "Step 1: Detect all words in the input that could be crops or municipalities, even if slightly misspelled.\n"
    "Step 2: For each candidate word:\n"
    "        - If it's a crop, select the closest match from the given crops list.\n"
    "        - If it's a municipality, select the closest match from the given municipios dictionary and use ONLY THE CVEGEO key.\n"
    "Step 3: If no REASONABLE match is found for a crop, use 'N/A'. If no reasonable match is found for a municipality, use 'N/A'.\n"
    "Step 4: Output JSON with keys: rephrased_text, original_text, note.\n"
    "Step 5: Keep the note short and concise, mentioning which DB part was used.\n\n"
    
    "Example input: 'Where is the best place to plant manzina in Aldma?'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'Where is the best place to plant manzina in Aldma?',\n"
    "  'rephrased_text': 'Where is the best place to plant Manzana in 08002?',\n"
    "  'note': 'crop corrected using crop list; municipality corrected using municipios dictionary'\n"
    "}\n\n"
    
    "Example input: 'Where can I plant Manzana?'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'Where can I plant Manzana?',\n"
    "  'rephrased_text': 'Where can I plant Manzana?',\n"
    "  'note': 'crop matched using crop list; municipality is not mentioned'\n"
    "}\n\n"
    
    "Example input: 'I want to grow xyzabc in Asientos'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'I want to grow xyzabc in Asientos',\n"
    "  'rephrased_text': 'I want to grow N/A in 01002',\n"
    "  'note': 'crop cannot be corrected using crop list; municipality corrected using municipios dictionary'\n"
    "}\n"
    "Example input: 'I want to grow manzina in shajshkh'\n"
    "Example output:\n"
    "{\n"
    "  'original_text': 'I want to grow manzina in shajshkh',\n"
    "  'rephrased_text': 'I want to grow Manzana in N/A',\n"
    "  'note': 'crop corrected using crop list; municipality cannot be corrected using municipios dictionary'\n"
    "}\n"
    "EXAMPLES OF WRONG ANSWERS: ['Asientos (01002)', 'Asientos'], CORRECT ONE: '01002'"
)

# Inicializa el LLM
rewriter_model = ChatOpenAI(
    model="gpt-5-mini",
    reasoning_effort='minimal')


rewriter_agent = create_agent(
    model=rewriter_model,
    system_prompt=rewriter_prompt
)

In [369]:
query = "Tell me similar municipalities to asientos and what crops are the best to plant"
query = clean_text(query)
input_data = {"messages": [{"role": "user", "content": query}]}

response = rewriter_agent.invoke(input_data)
response

{'messages': [HumanMessage(content='Tell me similar municipalities to asientos and what crops are the best to plant', additional_kwargs={}, response_metadata={}, id='a36defdd-7950-40a7-89e7-58b8454ee816'),
  AIMessage(content='{\n  "original_text": "Tell me similar municipalities to asientos and what crops are the best to plant",\n  "rephrased_text": "Tell me similar municipalities to 01002 and what crops are the best to plant",\n  "note": "municipality corrected using municipios dictionary; crop not mentioned"\n}', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 74, 'prompt_tokens': 3858, 'total_tokens': 3932, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 2816}}, 'model_provider': 'openai', 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CaU50E6zPS5RtYsA

In [383]:
selector_prompt = (
    "You are an expert agronomy assistant specialized in crop suitability and soil analysis for Mexican municipalities. "
    "All user queries have been preprocessed by a middleware agent which:\n"
    "- Corrects misspelled municipalities to their CVEGEO codes.\n"
    "- Corrects misspelled crops using a reference crop list.\n"
    "- Outputs a JSON with 'rephrased_text', 'original_text', and a short 'note'.\n\n"
    "Important: Each crop has a 'value' score (1-5) indicating suitability, mapped to qualitative levels: "
    "5=Excelente, 4=Bueno, 3=Regular, 2=Malo, 1=Muy Malo, and 'N/A' meaning the crop is not cultivated or lacks data in that municipality.\n\n"
    "Available analytical tools:\n\n"
    "1. recomendar_municipios_por_cultivo(cultivo, min_value=4.0, top_k=3, min_score=0.70):\n"
    "   - Finds municipalities with similar agro-environmental conditions for a given crop.\n"
    "   - Returns current producers (originales) and potential new areas (similares_distintos) with soil features.\n"
    "   - Use results to argue where the crop would likely thrive, referencing soil, similarity, confidence, and score.\n\n"
    "2. recomendar_cultivos_por_municipio(cvegeo, top_n=1, min_value=4.0):\n"
    "   - Identifies top similar regions for a municipality and recommends promising or shared crops.\n"
    "   - Use shared crops as a baseline and reason which new crops could succeed based on soil and environmental similarity.\n"
    "   - You may propose additional crops if justified by soil or environmental similarity, considering the average difference in shared crops.\n\n"
    "3. cultivos_comunes(cvegeo1, cvegeo2):\n"
    "   - Compares two municipalities and lists shared crops with performance differences and qualitative ratings.\n"
    "   - Provide a concise summary of shared crops and implications for their conditions.\n\n"
    "Guidelines:\n"
    "- Always use the preprocessed inputs from the middleware (CVEGEO codes and validated crops).\n"
    "- Select the most relevant tool and interpret outputs critically.\n"
    "- Base your conclusions on both data-driven reasoning and expert agronomic judgment.\n"
    "- When a crop or municipality is marked 'N/A', explain why it cannot be evaluated.\n"
    "- Keep recommendations and reasoning clear and concise, referencing soil, similarity, and suitability scores.\n\n"
    "- Add the 'note' from the input, dont change anything from it"
    "Example middleware output:\n"
    "{\n"
    "  'original_text': 'Tell me similar municipalities to asientos and what crops are the best to plant',\n"
    "  'rephrased_text': 'Tell me similar municipalities to 01002 and what crops are the best to plant',\n"
    "  'note': 'municipality corrected using municipios dictionary; similarity data not available in DB; crops suggested from crop list'\n"
    "}"
)


# Inicializa el LLM
selector_model = ChatOpenAI(
    model="gpt-5-nano",
    reasoning_effort='medium')

selector_agent = create_agent(
    model=selector_model,
    tools=[recomendar_municipios_por_cultivo, recomendar_cultivos_por_municipio, cultivos_comunes],
    system_prompt=selector_prompt
)

In [384]:
response

{'messages': [HumanMessage(content='Tell me similar municipalities to asientos and what crops are the best to plant', additional_kwargs={}, response_metadata={}, id='a36defdd-7950-40a7-89e7-58b8454ee816'),
  AIMessage(content='{\n  "original_text": "Tell me similar municipalities to asientos and what crops are the best to plant",\n  "rephrased_text": "Tell me similar municipalities to 01002 and what crops are the best to plant",\n  "note": "municipality corrected using municipios dictionary; crop not mentioned"\n}', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 74, 'prompt_tokens': 3858, 'total_tokens': 3932, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 2816}}, 'model_provider': 'openai', 'model_name': 'gpt-5-mini-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CaU50E6zPS5RtYsA

In [385]:
from langchain.messages import HumanMessage, AIMessage

# Extraer solo el contenido del AIMessage
ai_content = next(
    (msg.content for msg in response['messages'] if isinstance(msg, AIMessage)),
    None
)

print(ai_content)

{
  "original_text": "Tell me similar municipalities to asientos and what crops are the best to plant",
  "rephrased_text": "Tell me similar municipalities to 01002 and what crops are the best to plant",
  "note": "municipality corrected using municipios dictionary; crop not mentioned"
}


In [386]:
response_rewriter = ai_content

for step in selector_agent.stream(
    {"messages": [{"role": "user", "content": response_rewriter}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()

Tool Calls:
  recomendar_cultivos_por_municipio (call_8eIw8Iixlc1ORB0lhYbCn0j5)
 Call ID: call_8eIw8Iixlc1ORB0lhYbCn0j5
  Args:
    cvegeo: 01002
    top_n: 3
    min_value: 4.0
Name: recomendar_cultivos_por_municipio

{'original': '01002', 'similares': [{'CVEGEO': '16085', 'similarity': 0.8067035, 'confidence': 99.42035, 'score': 80.20274, 'features_suelo': {'NOMGEO': 'Tangancícuaro', 'NOM_ENT': 'Michoacán de Ocampo', 'ALTITUD': 2075.0, 'PH': 6.860000133514404, 'CE': 0.6000000238418579, 'CO': 0.8299999833106995, 'CIC': 41.84000015258789, 'SB': 65.9800033569336, 'SNA': 1.399999976158142, 'K': 0.8399999737739563, 'CA': 6.050000190734863, 'NA': 0.6000000238418579, 'MG': 5.840000152587891, 'CACO3': 0.0, 'CASO4': 0.0, 'DREN_EXT': 4.0, 'DREN_INT': 4.0, 'R': 26.0, 'L': 29.0, 'A': 45.0}, 'cultivos_nuevos': [{'cultivo': 'Frijol', 'value_original': 4.0}, {'cultivo': 'Alfalfa', 'value_original': 4.0}, {'cultivo': 'Durazno', 'value_original': 5.0}, {'cultivo': 'Uva', 'value_original': 4.0}, {'cul

## Add more questions

In [391]:
def top_municipios_por_cultivo(cultivo_data, cultivo_name, N=5):
    """
    Devuelve los top N municipios para un cultivo específico según su valor.
    
    Args:
        cultivo_data (dict): Datos tipo cultivo -> lista de dicts de municipios
        cultivo_name (str): Nombre del cultivo
        N (int): Número de top resultados a devolver
    
    Returns:
        list of dict: Top N municipios con su información y valor
    """
    if cultivo_name not in cultivo_data:
        return []
    
    # Ordenamos por 'valor' descendente
    sorted_list = sorted(cultivo_data[cultivo_name], key=lambda x: x['valor'], reverse=True)
    return sorted_list[:N]


def top_cultivos_por_municipio(municipios_data, municipio_id, N=5):
    """
    Devuelve los top N cultivos para un municipio específico según su valor.
    
    Args:
        municipios_data (list of dict): Lista de municipios con sus cultivos
        municipio_id (str): CVEGEO del municipio
        N (int): Número de top cultivos a devolver
    
    Returns:
        list of tuples: [(nombre_cultivo, valor), ...] ordenados de mayor a menor
    """
    # Buscamos el municipio
    municipio = next((m for m in municipios_data if m['CVEGEO'] == municipio_id), None)
    if municipio is None:
        return []
    
    # Obtenemos y ordenamos los cultivos por valor
    sorted_cultivos = sorted(municipio['cultivos'].items(), key=lambda x: x[1], reverse=True)
    return sorted_cultivos[:N]


In [393]:
# Top 3 municipios para "Avena forrajera en verde"
top_municipios_por_cultivo(cultivo_data, "Avena forrajera en verde", N=3)


[{'CVEGEO': '02002', 'Idestado': 2, 'Idmunicipio': 2, 'valor': 5.0},
 {'CVEGEO': '10036', 'Idestado': 10, 'Idmunicipio': 36, 'valor': 5.0},
 {'CVEGEO': '13070', 'Idestado': 13, 'Idmunicipio': 70, 'valor': 5.0}]

In [394]:
# Top 3 cultivos para el municipio con CVEGEO = '01001'
top_cultivos_por_municipio(municipios_data, '01001', N=3)

[('Durazno', 5.0),
 ('Sorgo forrajero en verde', 4.0),
 ('Triticale forrajero en verde', 4.0)]

In [396]:
from typing import List, Dict, Tuple

# ===========================
# Tool 1: Top municipios por cultivo
# ===========================
@tool
def top_municipios_cultivo(cultivo_name: str, N: int = 5) -> List[Dict]:
    """
    Devuelve los top N municipios para un cultivo específico según su valor.
    Nota: Solo devuelve datos de cultivo, no incluye interpretación ni inferencia del modelo.
    
    Args:
        cultivo_data (dict): Datos tipo cultivo -> lista de dicts de municipios
        cultivo_name (str): Nombre del cultivo
        N (int): Número de top resultados a devolver
    
    Returns:
        list of dict: Top N municipios con su información y valor
    """
    return top_municipios_por_cultivo(cultivo_data, cultivo_name, N)
    


# ===========================
# Tool 2: Top cultivos por municipio
# ===========================
@tool
def top_cultivos_municipio(municipio_id: str, N: int = 5) -> List[Tuple[str, float]]:
    """
    Tool: Devuelve los top N cultivos para un municipio específico según su valor.
    Nota: Solo devuelve datos de cultivo, no incluye interpretación ni inferencia del modelo.
    
    Args:
        municipios_data (list of dict): Lista de municipios con sus cultivos
        municipio_id (str): CVEGEO del municipio
        N (int): Número de top cultivos a devolver
    
    Returns:
        list of tuples: [(nombre_cultivo, valor), ...] ordenados de mayor a menor
    """
    return top_cultivos_por_municipio(municipios_data, municipio_id, N)

In [404]:
selector_prompt = (
    "You are an expert agronomy assistant specialized in crop suitability and soil analysis for Mexican municipalities. "
    "All user queries have been preprocessed by a middleware agent which:\n"
    "- Corrects misspelled municipalities to their CVEGEO codes.\n"
    "- Corrects misspelled crops using a reference crop list.\n"
    "- Outputs a JSON with 'rephrased_text', 'original_text', and a short 'note'.\n\n"
    "Important: Each crop has a 'value' score (1-5) indicating suitability, mapped to qualitative levels: "
    "5=Excelente, 4=Bueno, 3=Regular, 2=Malo, 1=Muy Malo, and 'N/A' meaning the crop is not cultivated or lacks data in that municipality.\n\n"
    "Available analytical tools:\n\n"
    "1. recomendar_municipios_por_cultivo(cultivo, min_value=4.0, top_k=3, min_score=0.70):\n"
    "   - Finds municipalities with similar agro-environmental conditions for a given crop.\n"
    "   - Returns current producers (originales) and potential new areas (similares_distintos) with soil features.\n"
    "   - Use results to argue where the crop would likely thrive, referencing soil, similarity, confidence, and score.\n\n"
    "2. recomendar_cultivos_por_municipio(cvegeo, top_n=1, min_value=4.0):\n"
    "   - Identifies top similar regions for a municipality and recommends promising or shared crops.\n"
    "   - Use shared crops as a baseline and reason which new crops could succeed based on soil and environmental similarity.\n"
    "   - You may propose additional crops if justified by soil or environmental similarity, considering the average difference in shared crops.\n\n"
    "3. cultivos_comunes(cvegeo1, cvegeo2):\n"
    "   - Compares two municipalities and lists shared crops with performance differences and qualitative ratings.\n"
    "   - Provide a concise summary of shared crops and implications for their conditions.\n\n"
    "4. top_municipios_cultivo(cultivo_name: str, N: int = 5) -> List[Dict]:\n"
    "   - Returns the top N municipalities for a given crop based purely on data values.\n"
    "   - Does not include model reasoning or predictions; only uses recorded cultivation data.\n\n"
    "5. top_cultivos_municipio(municipio_id: str, N: int = 5) -> List[Tuple[str, float]]:\n"
    "   - Returns the top N crops for a given municipality based purely on data values.\n"
    "   - Does not include model reasoning or predictions; only uses recorded cultivation data.\n\n"
    "Guidelines:\n"
    "- Always use the preprocessed inputs from the middleware (CVEGEO codes and validated crops).\n"
    "- Select the most relevant tool and interpret outputs critically.\n"
    "- Base your conclusions on both data-driven reasoning and expert agronomic judgment.\n"
    "- When a crop or municipality is marked 'N/A', explain why it cannot be evaluated.\n"
    "- Keep recommendations highly detailed and reasoning clear and concise, referencing soil, similarity, and suitability scores.\n\n"
    "- Add the 'note' from the input, dont change anything from it"
    "Example middleware output:\n"
    "{\n"
    "  'original_text': 'Tell me similar municipalities to asientos and what crops are the best to plant',\n"
    "  'rephrased_text': 'Tell me similar municipalities to 01002 and what crops are the best to plant',\n"
    "  'note': 'municipality corrected using municipios dictionary; similarity data not available in DB; crops suggested from crop list'\n"
    "}"
)

# Inicializa el LLM
selector_model = ChatOpenAI(
    model="gpt-5-nano",
    reasoning_effort='medium')

selector_agent = create_agent(
    model=selector_model,
    tools=[recomendar_municipios_por_cultivo, recomendar_cultivos_por_municipio, cultivos_comunes,top_municipios_cultivo ,top_cultivos_municipio],
    system_prompt=selector_prompt
)

In [405]:
response_rewriter = "recommend municipalities that could plant Manzana"

for step in selector_agent.stream(
    {"messages": [{"role": "user", "content": response_rewriter}]}
):
    for update in step.values():
        if not update:  # 👈 Evita errores con None
            continue
        for message in update.get("messages", []):
            message.pretty_print()

Tool Calls:
  recomendar_municipios_por_cultivo (call_9266jKrZ0MNyQ489UnLZhFGV)
 Call ID: call_9266jKrZ0MNyQ489UnLZhFGV
  Args:
    cultivo: Manzana
    min_value: 4.0
    top_k: 3
    min_score: 0.7
Name: recomendar_municipios_por_cultivo

{'originales': [{'CVEGEO': '01002', 'value': 5.0, 'features_suelo': {'NOMGEO': 'Asientos', 'NOM_ENT': 'Aguascalientes', 'ALTITUD': 2029.6666259765625, 'PH': 4.39555549621582, 'CE': 0.5327777862548828, 'CO': 0.2750000059604645, 'CIC': 7.955555438995361, 'SB': 56.04222106933594, 'SNA': 4.400000095367432, 'K': 1.2644444704055786, 'CA': 6.183333396911621, 'NA': 0.5533333420753479, 'MG': 1.2366666793823242, 'CACO3': 0.0, 'CASO4': 0.0, 'DREN_EXT': 3.6666667461395264, 'DREN_INT': 2.9666666984558105, 'R': 14.01111125946045, 'L': 11.088889122009277, 'A': 32.122222900390625}}, {'CVEGEO': '08031', 'value': 5.0, 'features_suelo': {'NOMGEO': 'Guerrero', 'NOM_ENT': 'Chihuahua', 'ALTITUD': 2152.142822265625, 'PH': 6.023928642272949, 'CE': 1.1659523248672485, 'CO':

In [415]:
response_rewriter = "recommend municipalities that could plant Manzana"
for token, metadata in selector_agent.stream(
    {"messages": [{"role": "user", "content": response_rewriter}]},
    stream_mode="messages"
):
    print(token.content, end="", flush=True)

Para Manzana (Manzana/Apple), puedo identificar los municipios con mayor afinidad agro-ambiental para su cultivo usando el motor de recomendaciones. Antes de generar la lista, ¿prefieres que cubra todo México o quieres limitar la búsqueda a una región específica (Norte, Bajío, Centro, Centro-Sur)? Si te parece bien, procedo de inmediato y te devuelvo los 3 CVEGEO con mayor adecuación (valor de Manzana ≥ 4.0 y similitud ≥ 0.70), con explicación de suelos, clima y razonamiento agronómico.

Qué voy a entregar si procedo:
- 3 municipios CVEGEO con mayor adecuación para Manzana.
- Detalles: código CVEGEO, nombre del municipio, puntuación de Manzana, rasgos de suelo (drenaje, textura, pH, profundidad), rasgos climáticos relevantes (altitud, temperatura, horas de frío) y notas sobre la viabilidad.
- Interpretación agronómica: por qué estas localidades serían adecuadas y qué consideraciones de manejo serían necesarias (riego, composición del suelo, protección contra heladas, etc.).

Nota: 
not