In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio anthropic httpx==0.27.2

In [3]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [4]:
# System message and prompt

system_message = "Eres un asistente que reimplementa código Python en C++ de alto rendimiento para una Mac M2. "
system_message += "Responde solo con código C++; usa los comentarios con moderación y no proporciones ninguna explicación más allá de comentarios ocasionales. "
system_message += "La respuesta C++ debe producir una salida idéntica en el menor tiempo posible."

def user_prompt_for(python):
    user_prompt = "Reescribe este código Python en C++ con la implementación más rápida posible que produzca una salida idéntica en el menor tiempo posible."
    user_prompt += "Responde solo con código C++; no expliques tu trabajo más allá de algunos comentarios."
    user_prompt += "Manten la implementación de la generación de números aleatorios idénticos para que los resultados de la coincidencia sean exactos."
    user_prompt += "Responde solo con código C++; no añadas nada más que código; usa los comentarios con moderación y no proporciones ninguna explicación más allá de comentarios ocasionales. "
    user_prompt += "Presta atención a los tipos de números para asegurar que no haya desbordamientos de int (overflow). Recuerda incluir todos los paquetes de C++ necesarios, como iomanip.\n\n"
    user_prompt += python
    return user_prompt

def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

In [5]:
# Python code
python_hard = """
def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value

def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [103]:
# Models
openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)
anthropic_api_key = userdata.get('ANTHROPIC_API_KEY')
claude = anthropic.Anthropic(api_key=anthropic_api_key)

OPENAI_MODEL = "gpt-4o"
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
CODE_GEMMA = "google/codegemma-7b-it"

In [104]:
# Download CodeGemma

def load_model():
  quant_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_quant_type="nf4"
  )

  tokenizer = AutoTokenizer.from_pretrained(CODE_GEMMA)
  model = AutoModelForCausalLM.from_pretrained(
      CODE_GEMMA,
      device_map="auto",
      quantization_config=quant_config
  )

  return tokenizer, model

gemma_tok, gemma_model = load_model()

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [105]:
# Stream methods
def stream_gpt(python):
  stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
  reply = ""
  for chunk in stream:
      fragment = chunk.choices[0].delta.content or ""
      reply += fragment
      yield reply.replace('```cpp\n','').replace('```','')

def stream_claude(python):
  result = claude.messages.stream(
      model=CLAUDE_MODEL,
      max_tokens=2000,
      system=system_message,
      messages=[{"role": "user", "content": user_prompt_for(python)}],
  )
  reply = ""
  with result as stream:
      for text in stream.text_stream:
          reply += text
          yield reply.replace('```cpp\n','').replace('```','')

def stream_code_gemma(python):
  quant_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_quant_type="nf4"
  )
  # Use CodeGemma model
  model_name = "google/codegemma-7b-it"

  # Load tokenizer and model
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForCausalLM.from_pretrained(
      model_name,
      quantization_config=quant_config,
      device_map="cuda"           # Uses GPU if available
  )

  # Convert messages into a CodeGemma-style prompt
  formatted_prompt = (
      "<|system|>\n" + system_message + "\n"
      "<|user|>\n" + user_prompt_for(python) + "\n"
      "<|assistant|>"
  )

  # Tokenize input
  inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")  # Move to GPU

  # Create a streamer
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

  # Generate tokens asynchronously
  generation_kwargs = {
      "input_ids": inputs["input_ids"],
      "max_new_tokens": 750,
      "temperature": 0.7,
      "do_sample": True,
      "streamer": streamer
  }

  # Run generation in a separate thread to avoid blocking
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
  thread.start()

  # Stream and print tokens in real time
  reply = ""
  for new_text in streamer:
      reply += new_text
      yield reply.replace('```cpp\n','').replace('```','')

In [106]:
# Events

def update_code(code):
  if code=="Pi":
    return pi
  elif code=="Python hard":
    return python_hard
  return ""

def optimize(python, model):
    if model=="GPT":
        result = stream_gpt(python)
    elif model=="Claude":
        result = stream_claude(python)
    elif model=="CodeGemma":
        result = stream_code_gemma(python.value)
    else:
        raise ValueError("Unknown model")
    print(result)
    for stream_so_far in result:
        yield stream_so_far

In [107]:
# Declare execution code

def write_output(cpp):
  code = cpp.replace("```cpp","").replace("```","")
  with open("optimized.cpp", "w") as f:
      f.write(code)

def execute_python(code):
  try:
      output = io.StringIO()
      sys.stdout = output
      exec(code)
  finally:
      sys.stdout = sys.__stdout__
  return output.getvalue()

def execute_cpp(code):
  write_output(code)
  try:
      compile_cmd = ["clang++", "-Ofast", "-std=c++17", "-o", "optimized", "optimized.cpp"]
      compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)
      run_cmd = ["./optimized"]
      run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)
      return run_result.stdout
  except subprocess.CalledProcessError as e:
      return f"An error occurred:\n{e.stderr}"

In [108]:
# Declare UI

css = """
.python {background-color: #306998;}
.cpp {background-color: #050;}
"""

with gr.Blocks(css=css) as ui:
    gr.Markdown("## Convierte código de Python a C++")
    with gr.Row():
        code = gr.Dropdown(["Pi", "Python hard"], label="Selecciona el el código", value="Pi")
    with gr.Row():
        python = gr.Textbox(label="Código en Python:", lines=10, value=pi)
        cpp = gr.Textbox(label="Código en C++:", lines=10)
    with gr.Row():
        model = gr.Dropdown(["CodeGemma", "GPT", "Claude"], label="Selecciona el modelo", value="CodeGemma")
    with gr.Row():
        convert = gr.Button("Convertir el código")
    with gr.Row():
        python_run = gr.Button("Ejecutar Python")
        cpp_run = gr.Button("Ejecutar C++")
    with gr.Row():
        python_out = gr.TextArea(label="Resultado en Python:", elem_classes=["python"])
        cpp_out = gr.TextArea(label="Resultado en C++:", elem_classes=["cpp"])

    code.change(update_code, inputs=[code], outputs=[python])
    convert.click(optimize, inputs=[python, model], outputs=[cpp])
    python_run.click(execute_python, inputs=[python], outputs=[python_out])
    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])

In [109]:
# Launch UI

ui.launch(inbrowser=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b4a46f3afff1b5dbd3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [91]:
gr.Interface(fn=stream_code_gemma, inputs=[python], outputs=[gr.Textbox()]).launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://79cc3236a67f6dc89c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


