In [154]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json
import time

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia estructurada.
        Extrae bloques como DATA, SET, WHERE, RUN, etc.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {"type": "data_step", "output_table": line.split()[1].strip(';'), "statements": []}
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

def compare_runtime():
    runtimes = {}
    for lang in ["python", "r"]:
        os.chdir(os.path.join(os.getcwd(),lang))
        script_ext = ".py" if lang == "python" else ".r"
        script_file = f"translated_code{script_ext}"
        if not os.path.exists(script_file):
            continue

        print(f"\n⏱ Ejecutando código en {lang.upper()}...")
        start = time.time()
     
        
        result = subprocess.run(
            ["python3", script_file] if lang == "python" else ["Rscript", script_file],
            capture_output=True,
            text=True
        )
        duration = time.time() - start
        runtimes[lang] = duration

        print("📤 Salida:")
        print(result.stdout.strip())
        if result.stderr.strip():
            print("⚠️ Errores:")
            print(result.stderr.strip())
        else:
            print("✅ Sin errores")
        print(f"⏱ Tiempo de ejecución: {duration:.3f} segundos")
        os.chdir("..")
    if len(runtimes) == 2:
        faster = min(runtimes, key=runtimes.get)
        print(f"\n🚀 {faster.upper()} fue más rápido ({runtimes[faster]:.3f} s). Puedes considerar usar esa versión.")

def compare_csv_outputs():
    sas_csv = "output.csv"
    py_csv = os.path.join("python", "output.csv")
    r_csv = os.path.join("r", "output.csv")
    available = [("SAS", sas_csv), ("Python", py_csv), ("R", r_csv)]

    try:
        reference_name, reference_file = next((name, path) for name, path in available if os.path.exists(path))
        ref_df = pd.read_csv(reference_file).sort_index(axis=1).reset_index(drop=True)

        for name, path in available:
            if name == reference_name or not os.path.exists(path):
                continue
            print(f"\n🔍 Comparando {reference_name} vs {name}...")
            test_df = pd.read_csv(path).sort_index(axis=1).reset_index(drop=True)
            try:
                pd.testing.assert_frame_equal(ref_df, test_df)
                print(f"✅ {name} coincide con {reference_name}.")
            except AssertionError as e:
                print(f"❌ Diferencias encontradas entre {reference_name} y {name}:")
                try:
                    diff = ref_df.compare(test_df, keep_shape=True, keep_equal=False)
                    print("Resumen de diferencias (primeras filas):")
                    print(diff.head())
                except Exception as sube:
                    print("No se pudo generar comparación detallada:", sube)
                print("\nError original:", e)
    except StopIteration:
        print("⚠️ No se encontró ningún archivo CSV de referencia para comparar.")

def compare_code_prompt(original_code: str, translated_code: str, lang: str) -> str:
    """Usa un modelo generativo para comparar código SAS original y su traducción."""
    client = OpenAI()
    prompt = f"""
        You are a senior code reviewer. Compare the following SAS code and its translation into {lang}.
        Focus on whether the logic, data transformations, and output behavior are preserved.
        
        SAS code:
        {original_code}
        
        Translated {lang} code:
        {translated_code}
        
        Please provide a short evaluation of the fidelity of the translation in spanish.
        IMPORTANT answer must be in spanish.
        """
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    return response.choices[0].message.content.strip()


def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)

    prompt = f"""
        You are a professional developer. Translate the following SAS code to {target_lang}.
        Respond with a VALID JSON object that contains the following two keys:
        
        "description": (a short explanation of the code),
        "code": (the translated code as a valid JSON string — all newlines MUST be escaped as \\n and quotes properly escaped).
        
        DO NOT use Markdown formatting (e.g., no triple backticks).
        Your response MUST be valid JSON and directly parsable with json.loads().
        
        SAS code:
        {ir.code}
        
        This code consists of the following structured steps:
        {ir.structure}
        """
    if ir.macros:
            prompt += f"""
                The SAS code also defines the following macros. Translate them as reusable functions if appropriate:{ir.macros}
            """
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open(f"{target_lang.lower()}/translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")

    try:
        match = re.search(r'{.*}', raw_json, re.DOTALL)
        if not match:
            raise ValueError("No se encontró un bloque JSON válido en la respuesta.")

        cleaned_json = match.group(0)
        json_data = json.loads(cleaned_json)
        code_only = json_data.get("code", "").strip()

        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"{target_lang.lower()}/translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
       
        print(f"✅ Código traducido guardado en {filename}")
 #       compare_code_prompt(sas_code,code_only,target_lang)

    except Exception as e:
        print("❌ No se pudo extraer el código del JSON:", e)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")
    compare_runtime()
    


In [158]:
import gradio as gr
import os

def traducir_y_mostrar(sas_code: str, language: str) -> str:
    try:
        run_translation(sas_code, language)
        ext = ".py" if language.lower() == "python" else ".r"
        translated_file = f"{language.lower()}/translated_code{ext}"
        if os.path.exists(translated_file):
            with open(translated_file, "r") as f:
                return f.read()
        else:
            return "⚠️ Traducción completada, pero no se encontró el archivo resultante."
    except Exception as e:
        return f"❌ Error al traducir: {str(e)}"

def comparar_resultados() -> str:
    try:
        from io import StringIO
        import sys

        # Capturar stdout temporalmente
        output = StringIO()
        sys_stdout_backup = sys.stdout
        sys.stdout = output

        compare_runtime()
        compare_csv_outputs()

        sys.stdout = sys_stdout_backup
        return output.getvalue()

    except Exception as e:
        return f"❌ Error al ejecutar comparaciones: {str(e)}"

demo = gr.Interface(
    fn=traducir_y_mostrar,
    inputs=[
        gr.Textbox(label="Código SAS", lines=12, placeholder="%macro..."),
        gr.Radio(choices=["Python", "R"], label="Lenguaje de destino", value="Python")
    ],
    outputs=gr.Code(label="Código traducido"),
    title="Traductor de SAS a Python/R",
    description="Introduce tu código SAS y selecciona el lenguaje. Puedes comparar la fidelidad funcional tras traducir.",
    live=False
)

comparar_btn = gr.Interface(
    fn=comparar_resultados,
    inputs=[],
    outputs=gr.Textbox(label="Resultados de comparación"),
    title="Comparar ejecución y fidelidad"
)

# Combinar ambas interfaces
gr.TabbedInterface([demo, comparar_btn], ["Traducir", "Comparar"]).launch()





In [102]:
# Test
low = """
        proc import datafile="default_of_credit_card_clients.csv"
            out=class_data
            dbms=csv
            replace;
            getnames=yes;
        run;
        
        data adultos;
          set class_data;
          where AGE >= 40;
        run;
        
        PROC EXPORT DATA= adultos
            OUTFILE="output.csv"
            DBMS=CSV
            REPLACE;
        RUN;
        """
medium = """
        proc import datafile="default_of_credit_card_clients.csv"
            out=default_data
            dbms=csv
            replace;
            getnames=yes;
        run;
        
        %macro filtrar_deuda(umbral);
          data deuda_alta;
            set default_data;
            if BILL_AMT1 > &umbral;
          run;
        
          proc export data=deuda_alta
              outfile="output.csv"
              dbms=csv
              replace;
          run;
        %mend;
        
        %filtrar_deuda(20000);
        """
high = """
        proc import datafile="default_of_credit_card_clients.csv"
            out=default_data
            dbms=csv
            replace;
            getnames=yes;
        run;
        
        %macro evaluar_riesgo(pago, bill);
          data riesgo;
            set default_data;
            if &bill > 0 then RATIO = &pago / &bill;
            else RATIO = .;
        
            if RATIO < 0.1 then RIESGO = "ALTO";
            else if RATIO < 0.3 then RIESGO = "MEDIO";
            else RIESGO = "BAJO";
          run;
        
          proc export data=riesgo
              outfile="output.csv"
              dbms=csv
              replace;
          run;
        %mend;
        
        %evaluar_riesgo(PAY_AMT1, BILL_AMT1);

"""

In [151]:
import gradio as gr
import os

sas_code_storage = {"code": ""}
translated_code_storage = {"code": ""}
lang_storage = {"lang": ""}
resumen = ""

def traducir_y_mostrar(sas_code: str, language: str):
    sas_code_storage["code"] = sas_code
    lang_storage["lang"] = language
    global resumen
    try:
        run_translation(sas_code, language)
        ext = ".py" if language.lower() == "python" else ".r"
        translated_file = os.path.join(language.lower(), f"translated_code{ext}")
        if os.path.exists(translated_file):
            with open(translated_file, "r") as f:
                resumen = "✅ Traducción completada. También puedes ejecutar las comparaciones en la siguiente pestaña."
                translated_code_storage["code"] = f.read()
                evaluacion = compare_code_prompt(sas_code, translated_code_storage["code"], language)
                return translated_code_storage["code"], evaluacion
        else:
            resumen = "⚠️ Archivo traducido no encontrado."
            return "", ""
    except Exception as e:
        resumen = f"❌ Error: {str(e)}"
        return "", ""


def comparar_resultados() -> str:
    try:
        from io import StringIO
        import sys
        output = StringIO()
        sys_stdout_backup = sys.stdout
        sys.stdout = output
        compare_runtime()
        compare_csv_outputs()
        sys.stdout = sys_stdout_backup
        return output.getvalue()
    except Exception as e:
        return f"❌ Error al ejecutar comparaciones: {str(e)}"

demo = gr.Interface(
    fn=traducir_y_mostrar,
    inputs=[
        gr.Textbox(label="Código SAS", lines=12, placeholder="%macro..."),
        gr.Radio(choices=["Python", "R"], label="Lenguaje de destino", value="Python")
    ],
    outputs=[
        gr.Code(label="Código traducido"),
        gr.Textbox(label="Evaluación generativa inmediata")
    ],
    title="Traductor de SAS a Python/R",
    description="Introduce tu código SAS y selecciona el lenguaje."
)


comparar_btn = gr.Interface(
    fn=comparar_resultados,
    inputs=[],
    outputs=gr.Textbox(label="Resultados de comparación"),
    title="Comparar ejecución y fidelidad"
)



gr.TabbedInterface(
    [demo, comparar_btn],
    ["Traducir", "Comparar"]
).launch()




In [13]:
!pip install gradio 

Collecting gradio
  Downloading gradio-5.29.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-macosx_10_12_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downl