In [1]:
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import os
import re
import pandas as pd

In [13]:
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello"}],
            temperature=0.0,
            max_tokens=10
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))
test_openai_connection()

✅ Conexión exitosa con OpenAI:
Respuesta: Hello! How can I assist you today?


In [31]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia detallada.
        Detecta estructuras comunes: DATA, SET, WHERE, KEEP, DROP, IF, PROC, MACRO.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {
                    "type": "data_step",
                    "output_table": line.split()[1].strip(';'),
                    "statements": []
                }
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

    def describe(self) -> str:
        return f"IR with {len(self.structure)} block(s) and {len(self.macros)} macro(s):\nStructure: {self.structure}\nMacros: {self.macros}"

class CodeGenerator:
    def __init__(self, target_language: str):
        self.target_language = target_language
        self.client = OpenAI()

    def generate(self, ir: IntermediateRepresentation) -> str:
        prompt = f"""
You are a professional developer with deep knowledge of SAS and {self.target_language}.
Translate the following SAS code to full, idiomatic, and functional {self.target_language} code.

- Do NOT simplify or use placeholders like 'pass'.
- Translate all operations and logic as completely as possible.
- Use equivalent libraries (e.g. pandas for Python, dplyr for R) when needed.
- Assume access to datasets like 'sashelp.class'.
- If macros exist, implement equivalent functions or preprocessing logic.

SAS code:
{ir.code}

Structure:
{ir.structure}

Macros:
{ir.macros}

Now translate the full logic to {self.target_language} Do not summarize. Return only code:
"""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )
        raw_code = response.choices[0].message.content.strip()

        cleaned = re.sub(r"(?i)^.*?```(?:python|r)?\\n", "", raw_code)
        cleaned = re.sub(r"```.*$", "", cleaned, flags=re.DOTALL)
        cleaned = re.sub(r"^\s*(Python|R) code:\s*", "", cleaned, flags=re.IGNORECASE)

        return cleaned.strip()

class CodeValidator:
    def validate(self, original_code: str, translated_code: str) -> str:
        diff = difflib.unified_diff(
            original_code.splitlines(),
            translated_code.splitlines(),
            fromfile='original',
            tofile='translated',
            lineterm=''
        )
        return '\n'.join(diff)

    def run_translated_code(self, code: str, language: str) -> str:
        suffix = '.py' if language.lower() == 'python' else '.r'
        with tempfile.NamedTemporaryFile(mode='w+', suffix=suffix, delete=False) as temp_file:
            temp_file.write(code)
            temp_file.flush()
            try:
                if language.lower() == 'python':
                    result = subprocess.run(['python3', temp_file.name], capture_output=True, text=True)
                elif language.lower() == 'r':
                    result = subprocess.run(['Rscript', temp_file.name], capture_output=True, text=True)
                else:
                    return f"Unsupported language: {language}"
                return result.stdout + result.stderr
            finally:
                os.remove(temp_file.name)

    def validate_functional_equivalence(self, df_expected: pd.DataFrame, df_actual: pd.DataFrame) -> str:
        try:
            pd.testing.assert_frame_equal(df_expected.reset_index(drop=True), df_actual.reset_index(drop=True))
            return "✅ DataFrames are functionally equivalent."
        except AssertionError as e:
            return f"❌ Functional difference detected:\n{str(e)}"

# --- Ejemplo de uso ---

def run_translation(sas_code: str, target_lang: str):
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)
    print("\n[IR Summary]\n", ir.describe())

    validator = CodeValidator()
    print(f"\nTranslating to {target_lang} with OpenAI...")
    generator = CodeGenerator(target_lang)
    translated_code = generator.generate(ir)
    print("\nTranslated code:\n", translated_code)

    diff_report = validator.validate(sas_code, translated_code)
    print("\nDiff report:\n", diff_report)

    print(f"\nRunning {target_lang} code...")
    output = validator.run_translated_code(translated_code, target_lang)
    print("\nExecution output:\n", output)

if __name__ == "__main__":
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")



[IR Summary]
 IR with 0 block(s) and 1 macro(s):
Structure: []
Macros: [{'type': 'macro', 'name': 'example', 'definition': ['data filtered;', 'set sashelp.class;', 'where age > 13;', 'keep name age height;', 'run;']}]

Translating to Python with OpenAI...

Translated code:
 In Python, the equivalent code using the pandas library would look like this:

Diff report:
 --- original
+++ translated
@@ -1,10 +1 @@
-
-    %macro example();
-      data filtered;
-        set sashelp.class;
-        where age > 13;
-        keep name age height;
-      run;
-    %mend example;
-    %example;
-    
+In Python, the equivalent code using the pandas library would look like this:

Running Python code...

Execution output:
   File "/var/folders/j4/p802rxx54qz8sky8x8dn1cbc0000gn/T/tmpkzexmsiu.py", line 1
    In Python, the equivalent code using the pandas library would look like this:
       ^^^^^^
SyntaxError: invalid syntax



In [5]:

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia detallada.
        Detecta estructuras comunes: DATA, SET, WHERE, KEEP, DROP, IF, PROC, MACRO.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {
                    "type": "data_step",
                    "output_table": line.split()[1].strip(';'),
                    "statements": []
                }
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

    def describe(self) -> str:
        return f"IR with {len(self.structure)} block(s) and {len(self.macros)} macro(s):\nStructure: {self.structure}\nMacros: {self.macros}"

class CodeGenerator:
    def __init__(self, target_language: str):
        self.target_language = target_language
        self.client = OpenAI()

    def generate(self, ir: IntermediateRepresentation) -> str:
        prompt = f"""
You are a professional developer with deep knowledge of SAS and {self.target_language}.
Translate the following SAS code to full, idiomatic, and functional {self.target_language} code.

- Do NOT simplify or use placeholders like 'pass'.
- Translate all operations and logic as completely as possible.
- Use equivalent libraries (e.g. pandas for Python, dplyr for R) when needed.
- Assume access to datasets like 'sashelp.class'.
- If macros exist, implement equivalent functions or preprocessing logic.


SAS code:
{ir.code}

Structure:
{ir.structure}

Macros:
{ir.macros}

Now translate the full logic to {self.target_language}:



"""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )
        raw_code = response.choices[0].message.content.strip()

        cleaned = re.sub(r"(?i)^.*?```(?:python|r)?\\n", "", raw_code)
        cleaned = re.sub(r"```.*$", "", cleaned, flags=re.DOTALL)
        cleaned = re.sub(r"^\s*(Python|R) code:\s*", "", cleaned, flags=re.IGNORECASE)

        return cleaned.strip()

class CodeValidator:
    def validate(self, original_code: str, translated_code: str) -> str:
        diff = difflib.unified_diff(
            original_code.splitlines(),
            translated_code.splitlines(),
            fromfile='original',
            tofile='translated',
            lineterm=''
        )
        return '\n'.join(diff)

    def run_translated_code(self, code: str, language: str) -> str:
        suffix = '.py' if language.lower() == 'python' else '.r'
        with tempfile.NamedTemporaryFile(mode='w+', suffix=suffix, delete=False) as temp_file:
            temp_file.write(code)
            temp_file.flush()
            try:
                if language.lower() == 'python':
                    result = subprocess.run(['python3', temp_file.name], capture_output=True, text=True)
                elif language.lower() == 'r':
                    result = subprocess.run(['Rscript', temp_file.name], capture_output=True, text=True)
                else:
                    return f"Unsupported language: {language}"
                return result.stdout + result.stderr
            finally:
                os.remove(temp_file.name)

    def validate_functional_equivalence(self, df_expected: pd.DataFrame, df_actual: pd.DataFrame) -> str:
        try:
            pd.testing.assert_frame_equal(df_expected.reset_index(drop=True), df_actual.reset_index(drop=True))
            return "✅ DataFrames are functionally equivalent."
        except AssertionError as e:
            return f"❌ Functional difference detected:\n{str(e)}"

# --- Ejemplo de uso ---

def run_translation(sas_code: str, target_lang: str):
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)
    print("\n[IR Summary]\n", ir.describe())

    validator = CodeValidator()
    print(f"\nTranslating to {target_lang} with OpenAI...")
    generator = CodeGenerator(target_lang)
    translated_code = generator.generate(ir)
    print("\nTranslated code:\n", translated_code)

    diff_report = validator.validate(sas_code, translated_code)
    print("\nDiff report:\n", diff_report)

    print(f"\nRunning {target_lang} code...")
    output = validator.run_translated_code(translated_code, target_lang)
    print("\nExecution output:\n", output)

if __name__ == "__main__":
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")



[IR Summary]
 IR with 0 block(s) and 1 macro(s):
Structure: []
Macros: [{'type': 'macro', 'name': 'example', 'definition': ['data filtered;', 'set sashelp.class;', 'where age > 13;', 'keep name age height;', 'run;']}]

Translating to Python with OpenAI...

Translated code:
 

Diff report:
 --- original
+++ translated
@@ -1,10 +0,0 @@
-
-    %macro example();
-      data filtered;
-        set sashelp.class;
-        where age > 13;
-        keep name age height;
-      run;
-    %mend example;
-    %example;
-    

Running Python code...

Execution output:
 


In [None]:
parser = SASParser()
parsed = parser.parse(sas_code)
ir = IntermediateRepresentation(parsed)
print("\n[IR Summary]\n", ir.describe())

validator = CodeValidator()
print(f"\nTranslating to {target_lang} with OpenAI...")
generator = CodeGenerator(target_lang)
translated_code = generator.generate(ir)
print("\nTranslated code:\n", translated_code)

In [19]:
parser = SASParser()
parsed = parser.parse(sas_code)
ir = IntermediateRepresentation(parsed)
print("\n[IR Summary]\n", ir.describe())



[IR Summary]
 IR with 0 block(s) and 1 macro(s):
Structure: []
Macros: [{'type': 'macro', 'name': 'example', 'definition': ['data filtered;', 'set sashelp.class;', 'where age > 13;', 'keep name age height;', 'run;']}]


In [25]:
target_lang = "Python"
validator = CodeValidator()
generator = CodeGenerator(target_lang)

In [29]:
generator.generate(ir)

'In Python, you can use the pandas library to perform the same operations as in the SAS code. Here is the equivalent Python code:'

In [33]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Do not summarize. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    print("\nTraducción generada:\n")
    print(response.choices[0].message.content.strip())

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Guardar resultado traducido en un archivo
    client = OpenAI()
    prompt = f"""
Translate the following SAS code to Python. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    code_only = response.choices[0].message.content.strip()
    code_only = re.sub(r"(?i)^.*?```(?:python)?\n", "", code_only)
    code_only = re.sub(r"```.*$", "", code_only, flags=re.DOTALL)

    with open("translated_code.py", "w") as f:
        f.write(code_only)
    print("
✅ Código traducido guardado en translated_code.py")


SyntaxError: unterminated string literal (detected at line 85) (1408493743.py, line 85)

In [37]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Do not summarize. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    print("\nTraducción generada:\n")
    print(response.choices[0].message.content.strip())

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Guardar resultado traducido en un archivo
    client = OpenAI()
    prompt = f"""
Translate the following SAS code to Python. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    code_only = response.choices[0].message.content.strip()
    code_only = re.sub(r"(?i)^.*?```(?:python)?\n", "", code_only)
    code_only = re.sub(r"```.*$", "", code_only, flags=re.DOTALL)

    with open("translated_code.py", "w") as f:
        f.write(code_only)
    print(" Código traducido guardado en translated_code.py")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

In Python, you can use the pandas library to perform similar operations as in the SAS code. Here's how you can translate the SAS code to Python:

```python
import pandas as pd

def example():
    # Assuming that 'sashelp.class' is a CSV file
    df = pd.read_csv('sashelp.class.csv')
    filtered = df[df['age'] > 13][['name', 'age', 'height']]
    return filtered

filtered_df = example()
```

Please note that in this Python code, it's assumed that 'sashelp.class' is a CSV file. If your data is in a different format or if you're fetching it from a different source (like a database), you'll need to adjust the data loading line accordingly.
 Código traducido guardado en translated_code.py


In [41]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Do not summarize. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    print("\nTraducción generada:\n")
    print(response.choices[0].message.content.strip())

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Guardar resultado traducido en un archivo
    client = OpenAI()
    prompt = f"""
Translate the following SAS code to Python. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    code_only = response.choices[0].message.content.strip()
    if "```" in code_only:
        code_only = re.sub(r"(?i)^.*?```(?:python)?", "", code_only)
        code_only = re.sub(r"```.*$", "", code_only, flags=re.DOTALL)

    f.write(code_only)
    print(" Código traducido guardado en translated_code.py")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

In Python, using pandas library, the equivalent code would be:

```python
import pandas as pd

def example():
    # Assuming sashelp.class is a csv file
    data = pd.read_csv('sashelp.class.csv')
    filtered = data[(data['age'] > 13)][['name', 'age', 'height']]
    return filtered

example()
```

Please replace `'sashelp.class.csv'` with the actual path to your data file. If your data is in another format (like Excel, SQL database, etc.), you will need to adjust the data loading line accordingly.


ValueError: I/O operation on closed file.

In [43]:
code_only

"In Python, you can use the pandas library to perform similar data manipulation tasks. Here's how you can translate the SAS code to Python:\n\n"

In [55]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Do not summarize. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    print("\nTraducción generada:\n")
    print(response.choices[0].message.content.strip())

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Guardar resultado traducido en un archivo
    client = OpenAI()
    prompt = f"""
Translate the following SAS code to Python. Return only code:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    code_only = response.choices[0].message.content.strip()
    if "```" in code_only:
        pattern_start = rf"(?i)^.*?```(?:{target_lang.lower()})?"
        code_only = re.sub(pattern_start, "", code_only)
        code_only = re.sub(r"```.*$", "", code_only, flags=re.DOTALL)

    filename = "translated_code.py" if target_lang.lower() == "python" else "translated_code.r"
    with open(filename, "w") as f:
        f.write(code_only)
    print(f" Código traducido guardado en {filename}")
    print("Código traducido guardado en translated_code.py")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

In Python, the equivalent code using pandas library would be:

```python
import pandas as pd

def example():
    # Assuming sashelp.class is a csv file
    df = pd.read_csv('sashelp.class.csv')
    filtered = df[df['age'] > 13][['name', 'age', 'height']]
    return filtered

filtered_df = example()
```

Please note that the original SAS code is using a dataset from SAS's built-in library, `sashelp.class`. In the Python code, it's assumed that the same dataset is available as a CSV file. If the dataset is stored in a different format or location, the code to load the dataset would need to be adjusted accordingly.
 Código traducido guardado en translated_code1.py
Código traducido guardado en translated_code.py


In [57]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Return only code. No comments, no explanation:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_output = response.choices[0].message.content.strip()
    print("\nTraducción generada:\n")
    print(raw_output)

    # Extraer solo el bloque de código dentro de ```
    code_block = re.findall(rf"```(?:{target_lang.lower()})?\\n(.*?)```", raw_output, re.DOTALL)
    code_only = code_block[0].strip() if code_block else raw_output

    filename = "translated_code.py" if target_lang.lower() == "python" else "translated_code.r"
    with open(filename, "w") as f:
        f.write(code_only)
    print(f"\n✅ Código traducido guardado en {filename}")

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

import pandas as pd
from sas7bdat import SAS7BDAT

def example():
    with SAS7BDAT('sashelp.class') as file:
        df = file.to_data_frame()
    filtered = df[df['age'] > 13][['name', 'age', 'height']]
    return filtered

example()

✅ Código traducido guardado en translated_code.py


In [61]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Return only code. No comments, no explanation:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_output = response.choices[0].message.content.strip()
    print("\nTraducción generada:\n")
    print(raw_output)

    # Extraer solo el bloque de código dentro de ```
    code_block = re.findall(rf"```(?:{target_lang.lower()})?\\n(.*?)```", raw_output, re.DOTALL)
    code_only = code_block[0].strip() if code_block else raw_output

    filename = "translated_code.py" if target_lang.lower() == "python" else "translated_code.r"
    with open(filename, "w") as f:
        f.write(code_only)
    print(f"\n✅ Código traducido guardado en {filename}")

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Generar resultado en formato JSON con separación de texto y código
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}.
Respond in JSON format with two fields: "description" and "code".

"description" should be a brief explanation of the code purpose.
"code" should contain only the translated code.

SAS code:
{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)

    print("Respuesta JSON guardada en translation_result.json")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

import pandas as pd
from sas7bdat import SAS7BDAT

def example():
    with SAS7BDAT('sashelp.class') as file:
        df = file.to_data_frame()
    filtered = df[df['age'] > 13][['name', 'age', 'height']]
    return filtered

example()

✅ Código traducido guardado en translated_code.py
Respuesta JSON guardada en translation_result.json


In [85]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        # ... (código existente sin cambios) ...
        return {"code": sas_code}

# Resto del código sin cambios...

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}. Return only code. No comments, no explanation:

{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_output = response.choices[0].message.content.strip()
    print("\nTraducción generada:\n")
    print(raw_output)

    # Extraer solo el bloque de código dentro de ```
    code_block = re.findall(rf"```(?:{target_lang.lower()})?\\n(.*?)```", raw_output, re.DOTALL)
    code_only = code_block[0].strip() if code_block else raw_output

    filename = "translated_code.py" if target_lang.lower() == "python" else "translated_code.r"
    with open(filename, "w") as f:
        f.write(code_only)
    print(f"\n✅ Código traducido guardado en {filename}")

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")

    # Generar resultado en formato JSON con separación de texto y código
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}.
Respond in JSON format with two fields: "description" and "code".

"description" should be a brief explanation of the code purpose.
"code" should contain only the translated code.

SAS code:
{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result_f.json", "w") as f:
        f.write(raw_json)

    print("Respuesta JSON guardada en translation_result.json")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

Traducción generada:

import pandas as pd
from sas7bdat import SAS7BDAT

def example():
    with SAS7BDAT('sashelp.class') as file:
        df = file.to_data_frame()
    filtered = df[df['age'] > 13][['name', 'age', 'height']]
    return filtered

example()

✅ Código traducido guardado en translated_code.py
Respuesta JSON guardada en translation_result.json


In [87]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        return {"code": sas_code}

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer with deep knowledge of SAS and {target_lang}. Translate the following SAS code to full, idiomatic, and functional {target_lang} code.


Respond in JSON format with two fields: \"description\" and \"code\".

\"description\" should be a brief explanation of the code purpose.
\"code\" should contain only the translated code.



- Do NOT simplify or use placeholders like 'pass'.
- Translate all operations and logic as completely as possible.
- Use equivalent libraries (e.g. pandas for Python, dplyr for R) when needed.
- Assume access to datasets like 'sashelp.class'.
- If macros exist, implement equivalent functions or preprocessing logic.


SAS code:
{ir.code}

Structure:
{ir.structure}

Macros:
{ir.macros}

Now translate the full logic to {target_lang}:

"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")

    # Intentar cargar el código desde el JSON y guardarlo como archivo de código
    try:
        json_data = json.loads(raw_json)
        code_only = json_data.get("code", "").strip()
        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
        print(f"✅ Código traducido guardado en {filename}")
    except Exception as e:
        print("❌ No se pudo extraer el código del JSON:", e)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

✅ Respuesta JSON guardada en translation_result.json
❌ No se pudo extraer el código del JSON: Invalid control character at: line 4 column 10 (char 325)


In [79]:
raw_json

'{\n"description": "The SAS code is defining a macro that filters a dataset \'sashelp.class\' to only include rows where \'age\' is greater than 13. It then keeps only the \'name\', \'age\', and \'height\' columns. The Python equivalent would use the pandas library to achieve the same result.",\n"code": \n"\nimport pandas as pd\n\ndef example():\n    # Assuming \'sashelp.class\' is a csv file\n    data = pd.read_csv(\'sashelp.class.csv\')\n    filtered = data[data[\'age\'] > 13][[\'name\', \'age\', \'height\']]\n    return filtered\n\nexample()\n"\n}'

In [83]:
json_data = json.loads(raw_json)

JSONDecodeError: Invalid control character at: line 4 column 2 (char 294)