In [1]:
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import os
import re
import pandas as pd

In [3]:

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia estructurada.
        Extrae bloques como DATA, SET, WHERE, RUN, etc.
        """
        blocks = []
        current_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith("data"):
                current_block = {"type": "data_step", "output_table": line.split()[1].strip(';'), "statements": []}
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")

class CodeGenerator:
    def __init__(self, target_language: str):
        self.target_language = target_language
        self.client = OpenAI()

    def generate(self, ir: IntermediateRepresentation) -> str:
        prompt = f"""
            You are a professional developer with deep knowledge of SAS and {self.target_language}.
            Your task is to fully translate the following SAS code into idiomatic and functional {self.target_language} code.
            
            - Do NOT simplify or summarize.
            - Translate all operations and logic as completely as possible.
            - Use equivalent libraries (e.g. pandas for Python, dplyr for R) when needed.
            - Assume that datasets like `sashelp.class` are already loaded or show how to load them.
            
            SAS code:
            {ir.code}
            
            This code consists of the following structured steps:
            {ir.structure}
            
            Now translate the full logic to {self.target_language} code:
            """
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )
        return response.choices[0].message.content.strip()

class CodeValidator:
    def validate(self, original_code: str, translated_code: str) -> str:
        diff = difflib.unified_diff(
            original_code.splitlines(),
            translated_code.splitlines(),
            fromfile='original',
            tofile='translated',
            lineterm=''
        )
        return '\n'.join(diff)

 

# --- Ejemplo de uso ---

def run_translation(sas_code: str, target_lang: str):
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)

    validator = CodeValidator()
    print(f"\nTranslating to {target_lang}...")
    generator = CodeGenerator(target_lang)
    translated_code = generator.generate(ir)
    print("\nTranslated code:\n", translated_code)

    diff_report = validator.validate(sas_code, translated_code)
    print("\nDiff report:\n", diff_report)



if __name__ == "__main__":
    sas_code = """
     %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")  # Cambia a "R" si deseas traducir a R



Translating to Python...

Translated code:
 In Python, we can use the pandas library to perform similar operations as in the SAS code. Here is the Python equivalent:

```python
import pandas as pd

# Assuming sashelp.class is a CSV file
sashelp_class = pd.read_csv('sashelp.class.csv')

def example():
    filtered = sashelp_class[sashelp_class['age'] > 13][['name', 'age', 'height']]
    return filtered

filtered_df = example()
```

In this Python code:

- We first import the pandas library.
- We read the 'sashelp.class.csv' file into a DataFrame `sashelp_class`.
- We define a function `example` that filters the DataFrame to only include rows where 'age' is greater than 13, and keeps only the 'name', 'age', and 'height' columns.
- We call the function and store the result in `filtered_df`.

Diff report:
 --- original
+++ translated
@@ -1,10 +1,21 @@
+In Python, we can use the pandas library to perform similar operations as in the SAS code. Here is the Python equivalent:
 
-     %macro e

In [5]:
translated_code

NameError: name 'translated_code' is not defined

In [31]:
sas_code = """
/* Calculate monthly payment */
data loan;
  set loan;
  /* Use the monthly interest rate for calculation */
  monthly_interest_rate = interest_rate / 12;
  /* Calculate the monthly payment using the loan formula */
  monthly_payment = loan_amount * monthly_interest_rate /
                    (1 - (1 + monthly_interest_rate)**(-loan_term * 12));
run;

/* Create a table of loan repayments */
data loan_schedule;
  set loan;
  /* Variables for the schedule */
  payment_number = 0;
  remaining_balance = loan_amount;
  interest_paid = 0;
  principal_paid = 0;
  /* Loop to create monthly repayment data */
  do while (remaining_balance > 0);
    payment_number = payment_number + 1;
    /* Calculate the interest portion of the payment */
    interest_paid = remaining_balance * monthly_interest_rate;
    /* Calculate the principal portion of the payment */
    principal_paid = monthly_payment - interest_paid;
    /* Update the remaining balance */
    remaining_balance = remaining_balance - principal_paid;
    /* Add a new record for each month */
    output;
    end;
run;

/* Display the loan schedule */
proc print data=loan_schedule;
  title "Loan Repayment Schedule";
  var payment_number interest_paid principal_paid remaining_balance;
run;
"""

In [33]:
run_translation(sas_code, target_lang="Python")  # Cambia a "R" si deseas traducir a R


Translating to Python...

Translated code:
 Python code:

```python
import pandas as pd

# Assume that the loan dataset is loaded as a pandas DataFrame
# loan = pd.read_csv('loan.csv')

# Calculate monthly payment
loan['monthly_interest_rate'] = loan['interest_rate'] / 12
loan['monthly_payment'] = loan['loan_amount'] * loan['monthly_interest_rate'] / \
                          (1 - (1 + loan['monthly_interest_rate'])**(-loan['loan_term'] * 12))

# Create a table of loan repayments
loan_schedule = pd.DataFrame(columns=['payment_number', 'interest_paid', 'principal_paid', 'remaining_balance'])
for index, row in loan.iterrows():
    payment_number = 0
    remaining_balance = row['loan_amount']
    interest_paid = 0
    principal_paid = 0
    while remaining_balance > 0:
        payment_number += 1
        interest_paid = remaining_balance * row['monthly_interest_rate']
        principal_paid = row['monthly_payment'] - interest_paid
        remaining_balance -= principal_paid
        loa

In [35]:
sas_code = """
%MACRO MACRO_BASES_IEP_BIV(ruta,bases,nummod,flag_cambio);
%DO J=1 %TO &nummod.;
		%LET BASE&J.= %SCAN(&BASES.,&J.," ");

		DATA TABLA_UNICA_TRATADA_VF (KEEP=new_score_fusion);
		SET &ruta..&&BASE&J. ;
		RUN; 

		PROC CONTENTS DATA=TABLA_UNICA_TRATADA_VF VARNUM
		OUT=VARS_TRAT; 
		RUN;

		DATA VARS_TRAT;
		SET VARS_TRAT (KEEP=NAME );
			 TIPO='C';
		RUN;

		PROC SORT DATA=VARS_TRAT; BY TIPO; RUN; 

		PROC SQL NOPRINT;*SE GUARDA EN MACRO-VARIABLES LAS LISTAS CON LOS NOMBRES DE LAS VARIABLES;
		SELECT NAME INTO:NOMBRE&J. SEPARATED BY ' ' FROM VARS_TRAT;
		SELECT TIPO INTO:TIPO&J. SEPARATED BY ' ' FROM VARS_TRAT;
		SELECT COUNT(1) INTO:N&J. FROM VARS_TRAT;
		QUIT;

		DATA TABLA_&J. ;
		SET &ruta..&&BASE&J.;
		MTH = 666;

		RUN; 


	%PUT _USER_;

	%DO i=1 %TO &&N&J.;
	%LET parametro= %SCAN(&&Nombre&j.,&i.," ");
	%LET tipo_var= %SCAN(&&tipo&j.,&i.," ");

	%PUT &i.;
	%PUT &J.;
	%PUT &parametro.;
	%PUT &tipo_var.;

	
		%IF &tipo_var.=C %THEN %DO ;
			/*Obtener los deciles con toda la historia*/

			proc sort data=TABLA_&J. out=BASE_IEP ; by &parametro. target; run; 

			Data BASE_IEP_N;
			Set BASE_IEP;
				Reg = _N_;
			Run;

			proc rank data=BASE_IEP_N Ties=Dense
			out=decil_ref (keep= &parametro. Reg  decil mth TARGET ) groups=20;
			Var &parametro.;
			ranks decil;
			run;

			proc means data=decil_ref  max ; 
			 	class  decil ; 
				var  &parametro. ;
				output out=intervalo_&bases.  max= Maximo min=Minimo; 
			run;

			Proc Freq Data= decil_ref Noprint;
			    Tables  decil*mth /Missing Out= _Freq_(drop= PCT_ROW PERCENT ) OutPct;
				Tables  decil /Missing Out= _Freq_TOTAL (rename= PERCENT = PCT_COL)OutPct;
				Tables  mth*DECIL*TARGET /Missing Out= _Freq_TARGET (drop= PCT_TABL PERCENT PCT_ROW) OutPct;
			Run;

			DATA _Freq_TARGET;
			SET  _Freq_TARGET;
			format Variable $35.;
			Variable="&parametro";
			RUN;
 
			PROC SORT DATA=_Freq_TARGET;
			BY MTH decil variable ; 
			RUN; 


			DATA BASE_FREQ;
			SET  _Freq_TOTAL  _Freq_;
			format Variable $35.;
			Variable="&parametro";

			RENAME COUNT=_FREQ_
			PCT_COL =PERC_OBS ;
			RUN;
			 			  
		%END; 

		%IF &tipo_var.=D %THEN %DO ;
		   proc sort data=TABLA_&J. out=BASE_IEP ; by &parametro. target; run; 
           Proc Freq Data= BASE_IEP  Noprint;
                        Tables  &parametro.*mth /Missing Out= _Freq_(drop= PCT_ROW PERCENT ) OutPct;
						Tables  &parametro. /Missing Out= _Freq_TOTAL (rename= PERCENT = PCT_COL)OutPct;
						Tables  mth*&parametro.*TARGET /Missing Out= _Freq_TARGET (drop= PCT_TABL PERCENT PCT_ROW) OutPct;

            Run;


			DATA _Freq_TARGET;
			SET  _Freq_TARGET;
			format Variable $35.;
			Variable="&parametro";

			RENAME &parametro. = decil;

			length decil $20.;
			RUN;
 
			PROC SORT DATA=_Freq_TARGET;
			BY MTH decil variable ; 
			RUN; 

			DATA BASE_FREQ;
			SET  _Freq_TOTAL _Freq_;
			format Variable $35.;
			Variable="&parametro.";

			RENAME COUNT=_FREQ_
			PCT_COL =PERC_OBS 
			&parametro. = decil;
			length decil $20.;
			RUN;
 			  
		%END;/*TERMINO DE TIPO DE VARIABLE*/ 


/*            *Transpone el conteo de la variable analizada seg˙n su VO por Grupos;*/
            Proc Transpose Data=_Freq_TARGET (where=(target=0))
                        Out= _Freq_TARGET_BUENOS (Rename=(Vo1= Vo0 )) 
                        Prefix = VO ;
                        Var COUNT;
                        By  MTH decil variable ;
            Run;
			Proc Transpose Data=_Freq_TARGET (where=(target=1))
                        Out= _Freq_TARGET_MALOS (Rename=(Vo1= Vo1)) 
                        Prefix = VO ;
                        Var COUNT;
                        By  MTH decil variable ;
            Run;

			Proc Transpose Data=_Freq_TARGET (where=(target=0))
                        Out= _PCT_TARGET_BUENOS (Rename=(Vo1= FREQ0 )) 
                        Prefix = VO ;
                        Var  PCT_COL;
                        By  MTH decil variable ;
            Run;
			Proc Transpose Data=_Freq_TARGET (where=(target=1))
                        Out= _PCT_TARGET_MALOS (Rename=(Vo1= FREQ1)) 
                        Prefix = VO ;
                        Var  PCT_COL;
                        By  MTH decil variable ;
            Run;


	PROC SORT DATA=BASE_FREQ(WHERE=(MISSING(MTH)))  OUT=PORC_ESP_BASE(KEEP=DECIL PERC_OBS 
	RENAME=(PERC_OBS=PERC_ESP));
	BY DECIL ; 
	RUN;  

	PROC SQL;
	CREATE TABLE Base_Freq_FIN AS SELECT 
	A.*, B.PERC_ESP,D.Vo0,C.Vo1,DIVIDE(F.FREQ0,100) AS FREQ0,DIVIDE(E.FREQ1,100) AS FREQ1
FROM Base_Freq AS A 
	LEFT JOIN PORC_ESP_BASE AS B ON A.DECIL=B.DECIL
		LEFT JOIN _Freq_TARGET_MALOS AS C ON A.DECIL=C.DECIL AND A.MTH=C.MTH
			LEFT JOIN _Freq_TARGET_BUENOS AS D ON A.DECIL=D.DECIL AND A.MTH=D.MTH
				LEFT JOIN _PCT_TARGET_MALOS AS E ON A.DECIL=E.DECIL AND A.MTH=E.MTH
					LEFT JOIN _PCT_TARGET_BUENOS AS F ON A.DECIL=F.DECIL AND A.MTH=F.MTH;
	QUIT;
			
	PROC SORT DATA=Base_Freq_FIN ; BY VARIABLE MTH ; RUN;  
			
/*SE CALCULA IEP POR REGISTRO*/
	DATA DETALLE_BASE&J._MES&I.;
	SET Base_Freq_FIN ;
	BY  MTH; 

			RETAIN TotalIEP;
               IEP=SUM((PERC_OBS/100),-(PERC_ESP/100))* LOG((PERC_OBS/100)/(PERC_ESP/100));

		    If First.mth  Then TotalIEP =0;


			   TotalIEP = Sum(TotalIEP,IEP);

			If Last.mth Then IEP_MES = TotalIEP;

			Drop TotalIEP;
	RUN;

	PROC SORT DATA=DETALLE_BASE&J._MES&I.;  BY VARIABLE MTH ; RUN;

DATA DETALLE_BASE&J._MES&I.;
SET DETALLE_BASE&J._MES&I.;
BY 	VARIABLE MTH ;
Retain Cum0 Cum1 IVp Cum0_1 Cum1_1 Ginip KSmax;
 *C·lculo del WOE;
  WOE= Log(Freq0/Freq1);

/**Inicializa variables;*/
If First.MTH Then Do;
            Cum0 =0;
            Cum1=0;
            IVp=0;
            Cum0_1=0;
            Cum1_1=0;
            Ginip=0;
            KSmax=0;

            Ejx =0;
            Ejy =0;
End;

*Frecuencia relativa acumulada;
Cum0 = Sum(Cum0,Freq0);
Cum1 = Sum(Cum1,Freq1);

*Calcula preliminalmente el IV por registro sumando el IV anterior + el actual;
IVp = Sum(IVp, Sum(Freq0,-Freq1)*WOE);

*Calcula preliminarmente el KS por registro qued·ndose con el KS mayor entre el registro anterior y el actual;
KSmax = Max(KSmax, Abs(Sum(Cum1,-Cum0)));

            *Galcula el Gini preliminarmente por registro sumando el Gini anterior + el actual;
            Ginip=Sum(Ginip,Sum(Cum0,-Cum0_1) *Sum(Cum1,Cum1_1));
                        Cum0_1= Cum0;
                        Cum1_1= Cum1;

*En el ˙ltimo registro por bloque de variables deja el valor del...;
                                   *IV: suma de IV de todos los grupos por variable;
                                   *KS: valor m·ximo del grupo de variables de los KS ;
                                   *Gini: suma de los Gini del grupo -1;
If Last.MTH Then Do;
            IV= IVp;
            KS= KSmax;
            Gini = Abs(Ginip-1);
            ROC = (1+Gini)/2;

            Ejx =1;
            Ejy =1;

End;

Drop IVp KSmax Cum0_1 Cum1_1 Ginip ;


RUN;  

		 
	PROC SORT DATA= DETALLE_BASE&J._MES&I. (KEEP= VARIABLE MTH IEP_MES IV KS GINI ROC) 
	OUT=_IEP_FIN_ (WHERE=(NOT MISSING(IEP_MES))); 
	BY VARIABLE;
	RUN; 

    PROC TRANSPOSE DATA= _IEP_FIN_
    Out= RESUMEN_BASE&J._MES&I.
    Prefix = MTH ;
   	VAR IEP_MES IV KS GINI ROC;
    BY variable ;
    RUN;


	%END;/*TERMINO DEL DO DE LAS VARIABLES*/

	DATA FIN_RESUMEN_MODELO_&J.;
	SET RESUMEN_BASE:;
			format MODELO $35.;
			MODELO ="&&BASE&J.";
	RUN;

	DATA FIN_DETALLE_BASE_&J.;
	SET DETALLE_BASE:;
			format MODELO $35.;
			MODELO ="&&BASE&J.";
	RUN;

	PROC DATASETS LIB=WORK;
	DELETE   _Freq_:  DECIL_: PORC_ESP_BASE  RESUMEN_BASE: DETALLE_BASE: _IEP_FIN_ 
	_Freq_TARGET_MALOS _Freq_TARGET_buenos _PCT_TARGET_BUENOS _PCT_TARGET_MALOS;
	RUN; 
	QUIT; 
%END;/*TERMINO DE TODAS LAS BASE */


	DATA STS_MTH_&bases.;
	SET FIN_RESUMEN_MODELO_:;
	RUN;

	DATA sf.detalle&bases.(drop=mth);
	SET FIN_DETALLE_BASE_:;
	RUN;

	PROC DATASETS LIB=WORK;
	DELETE FIN_RESUMEN_MODELO_: FIN_DETALLE_BASE_:;
	RUN;QUIT;
%MEND;	

"""

In [37]:
run_translation(sas_code, target_lang="Python")  # Cambia a "R" si deseas traducir a R


Translating to Python...

Translated code:
 This is a complex SAS macro that performs a series of data manipulations and calculations. Here's a Python translation using pandas and numpy libraries. Please note that the SAS macro variables are replaced with Python function arguments and the SAS datasets are replaced with pandas dataframes.

```python
import pandas as pd
import numpy as np

def macro_bases_iep_biv(ruta, bases, nummod, flag_cambio):
    for j in range(1, nummod+1):
        base = bases.split(" ")[j-1]

        # Loading the dataset
        tabla_unica_tratada_vf = pd.read_csv(f"{ruta}/{base}")

        # Keeping only the 'new_score_fusion' column
        tabla_unica_tratada_vf = tabla_unica_tratada_vf[['new_score_fusion']]

        # Creating a dataframe with variable names
        vars_trat = pd.DataFrame(tabla_unica_tratada_vf.columns, columns=['NAME'])
        vars_trat['TIPO'] = 'C'

        # Sorting the dataframe
        vars_trat = vars_trat.sort_values(by='TIPO')


In [13]:
class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        """
        Analiza código SAS y extrae una representación intermedia detallada.
        Detecta estructuras comunes: DATA, SET, WHERE, KEEP, DROP, IF, PROC, MACRO.
        """
        blocks = []
        current_block = {}
        macros = []
        in_macro = False
        macro_block = {}
        lines = sas_code.strip().splitlines()

        for line in lines:
            line = line.strip()
            if line.lower().startswith('%macro'):
                in_macro = True
                macro_name = line.split()[1].split('(')[0]
                macro_block = {
                    "type": "macro",
                    "name": macro_name,
                    "definition": []
                }
            elif in_macro and line.lower().startswith('%mend'):
                in_macro = False
                macros.append(macro_block)
                macro_block = {}
            elif in_macro:
                macro_block["definition"].append(line)
            elif line.lower().startswith("data"):
                current_block = {
                    "type": "data_step",
                    "output_table": line.split()[1].strip(';'),
                    "statements": []
                }
            elif line.lower().startswith("set"):
                current_block["input_table"] = line.split()[1].strip(';')
            elif line.lower().startswith("where"):
                current_block["filter"] = line[len("where"):].strip(' ;')
            elif line.lower().startswith("keep"):
                current_block["keep"] = line[len("keep"):].strip(' ;')
            elif line.lower().startswith("drop"):
                current_block["drop"] = line[len("drop"):].strip(' ;')
            elif line.lower().startswith("if"):
                current_block.setdefault("conditions", []).append(line.strip(' ;'))
            elif line.lower().startswith("proc"):
                current_block["proc"] = line.strip(' ;')
            elif line.lower().startswith("run"):
                blocks.append(current_block)
                current_block = {}
            elif line:
                current_block.setdefault("statements", []).append(line.strip(' ;'))

        return {
            "language": "SAS",
            "code": sas_code,
            "purpose": "data analysis / statistics / reporting",
            "structure": blocks,
            "macros": macros
        }

class IntermediateRepresentation:
    def __init__(self, parsed_data: Dict[str, Any]):
        self.language = parsed_data.get("language")
        self.code = parsed_data.get("code")
        self.purpose = parsed_data.get("purpose")
        self.structure = parsed_data.get("structure")
        self.macros = parsed_data.get("macros")

    def describe(self) -> str:
        return f"IR with {len(self.structure)} block(s) and {len(self.macros)} macro(s):\nStructure: {self.structure}\nMacros: {self.macros}"

class CodeGenerator:
    def __init__(self, target_language: str):
        self.target_language = target_language
        self.client = OpenAI()

    def generate(self, ir: IntermediateRepresentation) -> str:
        prompt = f"""
You are a professional developer with deep knowledge of SAS and {self.target_language}.
Translate the following SAS code to full, idiomatic, and functional {self.target_language} code.

- Do NOT simplify or use placeholders like 'pass'.
- Translate all operations and logic as completely as possible.
- Use equivalent libraries (e.g. pandas for Python, dplyr for R) when needed.
- Assume access to datasets like 'sashelp.class'.
- If macros exist, implement equivalent functions or preprocessing logic.

SAS code:
{ir.code}

Structure:
{ir.structure}

Macros:
{ir.macros}

Now translate the full logic to {self.target_language} ONLY give me the code:
"""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )
        raw_code = response.choices[0].message.content.strip()

        cleaned = re.sub(r"(?i)^.*?```(?:python|r)?\\n", "", raw_code)
        cleaned = re.sub(r"```.*$", "", cleaned, flags=re.DOTALL)
        cleaned = re.sub(r"^\s*(Python|R) code:\s*", "", cleaned, flags=re.IGNORECASE)

        return cleaned.strip()

class CodeValidator:
    def validate(self, original_code: str, translated_code: str) -> str:
        diff = difflib.unified_diff(
            original_code.splitlines(),
            translated_code.splitlines(),
            fromfile='original',
            tofile='translated',
            lineterm=''
        )
        return '\n'.join(diff)

    def run_translated_code(self, code: str, language: str) -> str:
        suffix = '.py' if language.lower() == 'python' else '.r'
        with tempfile.NamedTemporaryFile(mode='w+', suffix=suffix, delete=False) as temp_file:
            temp_file.write(code)
            temp_file.flush()
            try:
                if language.lower() == 'python':
                    result = subprocess.run(['python3', temp_file.name], capture_output=True, text=True)
                elif language.lower() == 'r':
                    result = subprocess.run(['Rscript', temp_file.name], capture_output=True, text=True)
                else:
                    return f"Unsupported language: {language}"
                return result.stdout + result.stderr
            finally:
                os.remove(temp_file.name)

    def validate_functional_equivalence(self, df_expected: pd.DataFrame, df_actual: pd.DataFrame) -> str:
        try:
            pd.testing.assert_frame_equal(df_expected.reset_index(drop=True), df_actual.reset_index(drop=True))
            return "✅ DataFrames are functionally equivalent."
        except AssertionError as e:
            return f"❌ Functional difference detected:\n{str(e)}"




In [15]:
# --- Ejemplo de uso ---

def run_translation(sas_code: str, target_lang: str):
    parser = SASParser()
    parsed = parser.parse(sas_code)
    ir = IntermediateRepresentation(parsed)
    print("\n[IR Summary]\n", ir.describe())

    validator = CodeValidator()
    print(f"\nTranslating to {target_lang}...")
    generator = CodeGenerator(target_lang)
    translated_code = generator.generate(ir)
    print("\nTranslated code:\n", translated_code)

    diff_report = validator.validate(sas_code, translated_code)
    print("\nDiff report:\n", diff_report)

    print(f"\nRunning {target_lang} code...")
    output = validator.run_translated_code(translated_code, target_lang)
    print("\nExecution output:\n", output)

if __name__ == "__main__":
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


[IR Summary]
 IR with 0 block(s) and 1 macro(s):
Structure: []
Macros: [{'type': 'macro', 'name': 'example', 'definition': ['data filtered;', 'set sashelp.class;', 'where age > 13;', 'keep name age height;', 'run;']}]

Translating to Python...

Translated code:
 In Python, you can use the pandas library to perform similar data manipulation operations as in SAS. Here's how you can translate the SAS code to Python:

Diff report:
 --- original
+++ translated
@@ -1,10 +1 @@
-
-    %macro example();
-      data filtered;
-        set sashelp.class;
-        where age > 13;
-        keep name age height;
-      run;
-    %mend example;
-    %example;
-    
+In Python, you can use the pandas library to perform similar data manipulation operations as in SAS. Here's how you can translate the SAS code to Python:

Running Python code...

Execution output:
   File "/var/folders/j4/p802rxx54qz8sky8x8dn1cbc0000gn/T/tmp3lq8efmm.py", line 1
    In Python, you can use the pandas library to perform sim

In [7]:
import os
from openai import OpenAI
from typing import Any, Dict, List
import difflib
import subprocess
import tempfile
import re
import pandas as pd
import json

# Asegurar que la clave de API esté disponible desde la variable de entorno
if "OPENAI_API_KEY" not in os.environ:
    raise EnvironmentError("OPENAI_API_KEY not set in environment variables.")

# Función sencilla para probar la conexión con la API de OpenAI
def test_openai_connection():
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": "Say hello and write a line of Python code."}],
            temperature=0.0,
            max_tokens=50
        )
        print("✅ Conexión exitosa con OpenAI:")
        print("Respuesta:", response.choices[0].message.content.strip())
    except Exception as e:
        print("❌ Error al conectar con OpenAI:")
        print(str(e))

class SASParser:
    def parse(self, sas_code: str) -> Dict[str, Any]:
        return {"code": sas_code}

def run_translation(sas_code: str, target_lang: str):
    print(f"Traduciendo código SAS a {target_lang}...")
    client = OpenAI()
    prompt = f"""
You are a professional developer. Translate the following SAS code to {target_lang}.
Respond in JSON format with two fields: \"description\" and \"code\".

\"description\" should be a brief explanation of the code purpose.
\"code\" should contain only the translated code.

SAS code:
{sas_code}
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )
    raw_json = response.choices[0].message.content.strip()

    with open("translation_result.json", "w") as f:
        f.write(raw_json)
    print("\n✅ Respuesta JSON guardada en translation_result.json")

    # Intentar cargar el código desde el JSON y guardarlo como archivo de código
    try:
        # Intentar extraer el JSON real desde el mensaje
        match = re.search(r'{.*}', raw_json, re.DOTALL)
        if not match:
            raise ValueError("No se encontró un bloque JSON válido en la respuesta.")

        cleaned_json = match.group(0)
        json_data = json.loads(cleaned_json)
        code_only = json_data.get("code", "").strip()

        ext = ".py" if target_lang.lower() == "python" else ".r"
        filename = f"translated_code{ext}"
        with open(filename, "w") as f:
            f.write(code_only)
        print(f"✅ Código traducido guardado en {filename}")
    except Exception as e:
        print("❌ No se pudo extraer el código del JSON:", e)

if __name__ == "__main__":
    test_openai_connection()
    sas_code = """
    %macro example();
      data filtered;
        set sashelp.class;
        where age > 13;
        keep name age height;
      run;
    %mend example;
    %example;
    """
    run_translation(sas_code, target_lang="Python")


✅ Conexión exitosa con OpenAI:
Respuesta: Hello! Here's a line of Python code for you:

print("Hello, World!")
Traduciendo código SAS a Python...

✅ Respuesta JSON guardada en translation_result.json
❌ No se pudo extraer el código del JSON: Invalid control character at: line 4 column 25 (char 313)
