In [31]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia estructurada.
        Extrae bloques como DATA, SET, WHERE, RUN, etc.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {"type": "data_step", "output_table": line.split()[1].strip(';'), "statements": []}
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)

    prompt = prompt = f"""
        You are a professional developer. Translate the following SAS code to {target_lang}.
        Respond with a VALID JSON object that contains the following two keys:
        
        "description": (a short explanation of the code),
        "code": (the translated code as a valid JSON string — all newlines MUST be escaped as \\n and quotes properly escaped).
        
        DO NOT use Markdown formatting (e.g., no triple backticks).
        Your response MUST be valid JSON and directly parsable with json.loads().
        
        SAS code:
        {ir.code}
        
        This code consists of the following structured steps:
        {ir.structure}
    """


    if ir.macros:
        prompt += f"The SAS code also defines the following macros. Translate them as reusable functions if appropriate:{ir.macros}"
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")
    print(raw_json)

    # Intentar cargar el código desde el JSON y guardarlo como archivo de código
    try:
        # Intentar extraer el JSON real desde el mensaje
        match = re.search(r'{.*}', raw_json, re.DOTALL)
        if not match:
            raise ValueError("No se encontró un bloque JSON válido en la respuesta.")

        cleaned_json = match.group(0)
        json_data = json.loads(cleaned_json)
        code_only = json_data.get("code", "").strip()

        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
        print(f"✅ Código traducido guardado en {filename}")
    except Exception as e:
        print("❌ No se pudo extraer el código del JSON:", e)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

✅ Respuesta JSON guardada en translation_result.json
{"description": "The SAS code defines a macro named 'example'. This macro creates a new dataset named 'filtered' from the 'class' dataset in the 'sashelp' library. It filters the data to include only the observations where the 'age' variable is greater than 13. It then keeps only the 'name', 'age', and 'height' variables in the new dataset. The macro is then called to execute these operations.", "code": "import pandas as pd\n\ndef example():\n    sashelp_class = pd.read_csv('sashelp.class.csv')\n    filtered = sashelp_class[sashelp_class['age'] > 13][['name', 'age', 'height']]\n    return filtered\n\nfiltered_data = example()"}
✅ Código traducido guardado en translated_code.py


In [35]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia estructurada.
        Extrae bloques como DATA, SET, WHERE, RUN, etc.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {"type": "data_step", "output_table": line.split()[1].strip(';'), "statements": []}
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)

    prompt = f"""
        You are a professional developer. Translate the following SAS code to {target_lang}.
        Respond with a VALID JSON object that contains the following two keys:
        
        "description": (a short explanation of the code),
        "code": (the translated code as a valid JSON string — all newlines MUST be escaped as \\n and quotes properly escaped).
        
        DO NOT use Markdown formatting (e.g., no triple backticks).
        Your response MUST be valid JSON and directly parsable with json.loads().
        
        SAS code:
        {ir.code}
        
        This code consists of the following structured steps:
        {ir.structure}
    """
    if ir.macros:
        prompt += f"""
        The SAS code also defines the following macros. Translate them as reusable functions if appropriate:{ir.macros}
        """
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")

    # Intentar cargar el código desde el JSON y guardarlo como archivo de código
    try:
        # Intentar extraer el JSON real desde el mensaje
        match = re.search(r'{.*}', raw_json, re.DOTALL)
        if not match:
            raise ValueError("No se encontró un bloque JSON válido en la respuesta.")

        cleaned_json = match.group(0)
        json_data = json.loads(cleaned_json)
        code_only = json_data.get("code", "").strip()

        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
        print(f"✅ Código traducido guardado en {filename}")

        # Ejecutar el código traducido
        try:
            if target_lang.lower() == "python":
                result = subprocess.run(["python3", filename], capture_output=True, text=True)
            elif target_lang.lower() == "r":
                result = subprocess.run(["Rscript", filename], capture_output=True, text=True)
            else:
                raise ValueError("Lenguaje no soportado para ejecución")
            print("📤 Salida de la ejecución:")
            print(result.stdout)
            print("⚠️ Errores (si hay):")
            print(result.stderr)
        except Exception as ex:
            print("❌ Error al ejecutar el código traducido:", ex)
    except Exception as e:
        print("❌ No se pudo extraer el código del JSON:", e)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

✅ Respuesta JSON guardada en translation_result.json
✅ Código traducido guardado en translated_code.py
📤 Salida de la ejecución:

⚠️ Errores (si hay):
Traceback (most recent call last):
  File "/Users/cesarrojasflores/Documents/Final Project/translated_code.py", line 8, in <module>
    filtered_df = example()
                  ^^^^^^^^^
  File "/Users/cesarrojasflores/Documents/Final Project/translated_code.py", line 4, in example
    class_df = pd.read_sas('sashelp.class')
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/pandas/io/sas/sasreader.py", line 146, in read_sas
    raise ValueError(
ValueError: unable to infer format of SAS file from filename: 'sashelp.class'



In [None]:
def validate_functional_equivalence(reference_file: str, test_file: str, target_lang: str):
    """
    Ejecuta ambos archivos y compara si sus salidas estándar son equivalentes.
    """
    def run_file(path: str) -> str:
        if target_lang.lower() == "python":
            result = subprocess.run(["python3", path], capture_output=True, text=True)
        elif target_lang.lower() == "r":
            result = subprocess.run(["Rscript", path], capture_output=True, text=True)
        else:
            raise ValueError("Lenguaje no soportado")
        return result.stdout.strip()

    try:
        ref_output = run_file(reference_file)
        test_output = run_file(test_file)
        if ref_output == test_output:
            print("✅ Fidelidad funcional: salidas equivalentes.")
        else:
            print("❌ Las salidas difieren.")
            print("🔎 Esperado:\n", ref_output)
            print("🔎 Obtenido:\n", test_output)
    except Exception as e:
        print("❌ Error durante la validación de fidelidad:", e)


In [61]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia estructurada.
        Extrae bloques como DATA, SET, WHERE, RUN, etc.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {"type": "data_step", "output_table": line.split()[1].strip(';'), "statements": []}
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

import time

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)

    prompt = f"""
        You are a professional developer. Translate the following SAS code to {target_lang}.
        Respond with a VALID JSON object that contains the following two keys:
        
        "description": (a short explanation of the code),
        "code": (the translated code as a valid JSON string — all newlines MUST be escaped as \\n and quotes properly escaped).
        
        DO NOT use Markdown formatting (e.g., no triple backticks).
        Your response MUST be valid JSON and directly parsable with json.loads().
        
        SAS code:
        {ir.code}
        
        This code consists of the following structured steps:
        {ir.structure}
"""
    if ir.macros:
        prompt += f"""
The SAS code also defines the following macros. Translate them as reusable functions if appropriate:
{ir.macros}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")

    # Intentar cargar el código desde el JSON y guardarlo como archivo de código
    try:
        # Intentar extraer el JSON real desde el mensaje
        match = re.search(r'{.*}', raw_json, re.DOTALL)
        if not match:
            raise ValueError("No se encontró un bloque JSON válido en la respuesta.")

        cleaned_json = match.group(0)
        json_data = json.loads(cleaned_json)
        code_only = json_data.get("code", "").strip()

        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
        print(f"✅ Código traducido guardado en {filename}")

        # Ejecutar el código traducido y medir tiempo de ejecución
        
        runtimes = {}
        for lang in ["python", "r"]:
            script_ext = ".py" if lang == "python" else ".r"
            script_file = f"translated_code{script_ext}"
            if not os.path.exists(script_file):
                continue

            print(f"⏱ Ejecutando código en {lang.upper()}...")
            start = time.time()
            result = subprocess.run(
                ["python3", script_file] if lang == "python" else ["Rscript", script_file],
                capture_output=True,
                text=True
            )
            duration = time.time() - start
            runtimes[lang] = duration

            print("📤 Salida:")
            print(result.stdout.strip())
            if result.stderr.strip():
                print("⚠️ Errores:")
                print(result.stderr.strip())
            else:
                print("✅ Sin errores")
            print(f"⏱ Tiempo de ejecución: {duration:.3f} segundos")

        # Comparación de velocidad
        if len(runtimes) == 2:
            faster = min(runtimes, key=runtimes.get)
            print(f"🚀 {faster.upper()} fue más rápido ({runtimes[faster]:.3f} s). Puedes considerar usar esa versión.")
    except Exception as ex:
        print("❌ Error al ejecutar los códigos traducidos:", ex)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

✅ Respuesta JSON guardada en translation_result.json
✅ Código traducido guardado en translated_code.py
⏱ Ejecutando código en PYTHON...
📤 Salida:

⚠️ Errores:
Traceback (most recent call last):
  File "/Users/cesarrojasflores/Documents/Final Project/translated_code.py", line 9, in <module>
    filtered_data = example()
                    ^^^^^^^^^
  File "/Users/cesarrojasflores/Documents/Final Project/translated_code.py", line 5, in example
    df = pd.read_csv('sashelp.class.csv')
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/pandas/io/parsers/readers.py", line 1026, in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/pandas/io/parsers/readers.py", line 620, in _read
    parser = TextFileReader(