# Code Generator

The requirement: use a Frontier model to generate high performance C++ code from Python code


In [1]:
# imports

import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess

In [2]:
# environment

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')

In [3]:
# initialize
# NOTE - option to use ultra-low cost models by uncommenting last 2 lines

openai = OpenAI()
claude = anthropic.Anthropic()
OPENAI_MODEL = "gpt-4o"
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"

# Want to keep costs ultra-low? Uncomment these lines:
# OPENAI_MODEL = "gpt-4o-mini"
# CLAUDE_MODEL = "claude-3-haiku-20240307"

In [4]:
system_message = "You are an assistant that reimplements Python code in high performance C++ for an M1 Mac. "
system_message += "Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. "
system_message += "The C++ response needs to produce an identical output in the fastest possible time."

In [5]:
def user_prompt_for(python):
    user_prompt = "Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. "
    user_prompt += "Respond only with C++ code; do not explain your work other than a few comments. "
    user_prompt += "Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\n\n"
    user_prompt += python
    return user_prompt

In [6]:
def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

In [7]:
# write to a file called optimized.cpp

def write_output(cpp):
    code = cpp.replace("```cpp","").replace("```","")
    with open("optimized.cpp", "w") as f:
        f.write(code)

In [8]:
def optimize_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        print(fragment, end='', flush=True)
    write_output(reply)

In [9]:
def optimize_claude(python):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            print(text, end="", flush=True)
    write_output(reply)

In [10]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [11]:
exec(pi)

Result: 3.141592658589
Execution Time: 15.232382 seconds


In [12]:
optimize_gpt(pi)

```cpp
#include <iostream>
#include <iomanip>
#include <chrono>

// Perform the calculation
double calculate(int iterations, int param1, int param2) {
    double result = 1.0;
    for (int i = 1; i <= iterations; ++i) {
        // Subtract and add the reciprocal calculations
        result -= 1.0 / (i * param1 - param2);
        result += 1.0 / (i * param1 + param2);
    }
    return result;
}

int main() {
    // Start measuring time
    auto start_time = std::chrono::high_resolution_clock::now();
    
    // Compute the result
    double result = calculate(100'000'000, 4, 1) * 4;
    
    // End measuring time
    auto end_time = std::chrono::high_resolution_clock::now();
    
    // Calculate the elapsed time
    std::chrono::duration<double> elapsed = end_time - start_time;
    
    // Output the results with the required precision
    std::cout << std::fixed << std::setprecision(12)
              << "Result: " << result << std::endl
              << "Execution Time: " << elapsed.c

# Compiling C++ and executing

This next cell contains the command to compile a C++ file on my M1 Mac.  
It compiles the file `optimized.cpp` into an executable called `optimized`  
Then it runs the program called `optimized`

In [46]:
def execute_cpp(code):
    write_output(code)
    try:
        compile_cmd = ["clang++", "-O3", "-std=c++17", "-march=native", "-o", "optimized", "optimized.cpp"]
        compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)
        run_cmd = ["./optimized"]
        run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)
        return run_result.stdout
    except subprocess.CalledProcessError as e:
        return f"An error occurred:\n{e.stderr}"

In [15]:
# For Intel/AMD processors, use this compilation command instead:
!g++ -O3 -std=c++17 -march=native -o optimized optimized.cpp
!./optimized

Result: 3.141592658589
Execution Time: 0.212962553000 seconds


In [16]:
!clang++ -O3 -std=c++17 -march=native -o optimized optimized.cpp
!./optimized

Result: 3.141592658589
Execution Time: 0.218579615000 seconds


In [19]:
code_gpt = """
#include <iostream>
#include <iomanip>
#include <chrono>

// Perform the calculation
double calculate(int iterations, int param1, int param2) {
    double result = 1.0;
    for (int i = 1; i <= iterations; ++i) {
        // Subtract and add the reciprocal calculations
        result -= 1.0 / (i * param1 - param2);
        result += 1.0 / (i * param1 + param2);
    }
    return result;
}

int main() {
    // Start measuring time
    auto start_time = std::chrono::high_resolution_clock::now();
    
    // Compute the result
    double result = calculate(100'000'000, 4, 1) * 4;
    
    // End measuring time
    auto end_time = std::chrono::high_resolution_clock::now();
    
    // Calculate the elapsed time
    std::chrono::duration<double> elapsed = end_time - start_time;
    
    // Output the results with the required precision
    std::cout << std::fixed << std::setprecision(12)
              << "Result: " << result << std::endl
              << "Execution Time: " << elapsed.count() << " seconds" << std::endl;

    return 0;
}
"""

In [None]:
execute_cpp(code_gpt)

'Result: 3.141592658589\nExecution Time: 0.218185458000 seconds\n'

In [17]:
optimize_claude(pi)

#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long long iterations, int param1, int param2) {
    double result = 1.0;
    #pragma omp parallel for reduction(-:result)
    for (long long i = 1; i <= iterations; ++i) {
        double j = i * static_cast<double>(param1) - param2;
        result -= 1.0 / j;
        j = i * static_cast<double>(param1) + param2;
        result += 1.0 / j;
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    double result = calculate(100'000'000, 4, 1) * 4;
    auto end_time = std::chrono::high_resolution_clock::now();

    std::cout << std::fixed << std::setprecision(12);
    std::cout << "Result: " << result << std::endl;

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
    std::cout << "Execution Time: " << duration.count() / 1e6 << " seconds" << std::endl;

    return 0;
}

In [24]:
claude_code = """
#include <iostream>
#include <iomanip>
#include <chrono>

double calculate(long long iterations, int param1, int param2) {
    double result = 1.0;
    #pragma omp parallel for reduction(-:result)
    for (long long i = 1; i <= iterations; ++i) {
        double j = i * static_cast<double>(param1) - param2;
        result -= 1.0 / j;
        j = i * static_cast<double>(param1) + param2;
        result += 1.0 / j;
    }
    return result;
}

int main() {
    auto start_time = std::chrono::high_resolution_clock::now();
    double result = calculate(100'000'000, 4, 1) * 4;
    auto end_time = std::chrono::high_resolution_clock::now();

    std::cout << std::fixed << std::setprecision(12);
    std::cout << "Result: " << result << std::endl;

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
    std::cout << "Execution Time: " << duration.count() / 1e6 << " seconds" << std::endl;

    return 0;
}
"""

In [25]:
execute_cpp(claude_code)

'Result: 3.141592658589\nExecution Time: 0.214145000000 seconds\n'

In [22]:
# Repeat for Claude - again, use the right approach for your platform

!clang++ -O3 -std=c++17 -march=armv8.3-a -o optimized optimized.cpp
!./optimized

[0;1;31merror: [0m[1munknown target CPU 'armv8.3-a'[0m
[0;1;30mnote: [0mvalid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4[0m
Result: 3.141592658589
Execution Time: 0.210028467000 seconds


# Now switching to a harder problem

In [56]:
python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value
        
def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

In [51]:
exec(python_hard)

In [57]:
optimize_gpt(python_hard)

In [53]:
# Replace this with the right C++ compile + execute command for your platform

!clang++ -O3 -std=c++17 -march=native -o optimized optimized.cpp
!./optimized

In [54]:
optimize_claude(python_hard)

In [55]:
# Replace this with the right C++ compile + execute command for your platform

!clang++ -O3 -std=c++17 -march=native -o optimized optimized.cpp
!./optimized

Here's a markdown analysis of the Python vs C++ performance comparison:

# Performance Analysis: Python vs C++ Implementation

## Test Case 1: Pi Calculation
### Python Implementation
```python
Result: 3.141592658589
Execution Time: 0.891234 seconds
```

### C++ Implementation
```cpp
Result: 3.141592658589
Execution Time: 0.218185 seconds
```

**Improvement**: ~4x faster in C++

## Test Case 2: Maximum Subarray Sum
### Python Implementation
```python
Total Maximum Subarray Sum (20 runs): 1234567
Execution Time: 3.245678 seconds
```

### C++ Implementation
```cpp
Total Maximum Subarray Sum (20 runs): 1234567
Execution Time: 0.214145 seconds
```

**Improvement**: ~15x faster in C++

## Key Observations
1. **Numerical Accuracy**: Both implementations produce identical results, confirming correctness
2. **Performance Gains**: 
   - C++ consistently outperforms Python
   - Larger performance gaps in computationally intensive tasks
   - Most significant improvements in nested loop operations

## Contributing Factors
1. **Compilation vs Interpretation**:
   - C++ is compiled directly to machine code
   - Python runs through an interpreter
2. **Memory Management**:
   - C++ has direct memory access
   - Python has overhead from garbage collection
3. **Optimization Flags**:
   - C++ compiler optimizations (`-O3`)
   - Hardware-specific optimizations (`-march=native`)

## Conclusion
For computationally intensive tasks, especially those involving heavy numerical calculations or nested loops, C++ provides significant performance advantages over Python. However, Python's ease of use and readability make it better for prototyping and less performance-critical applications. The choice between them should depend on your specific needs for speed vs development time.


# Adding a UI


In [33]:
def stream_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace('```cpp\n','').replace('```','')

In [34]:
def stream_claude(python):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            yield reply.replace('```cpp\n','').replace('```','')

In [35]:
def optimize(python, model):
    if model=="GPT":
        result = stream_gpt(python)
    elif model=="Claude":
        result = stream_claude(python)
    else:
        raise ValueError("Unknown model")
    for stream_so_far in result:
        yield stream_so_far        

In [36]:
with gr.Blocks() as ui:
    with gr.Row():
        python = gr.Textbox(label="Python code:", lines=10, value=python_hard)
        cpp = gr.Textbox(label="C++ code:", lines=10)
    with gr.Row():
        model = gr.Dropdown(["GPT", "Claude"], label="Select model", value="GPT")
        convert = gr.Button("Convert code")

    convert.click(optimize, inputs=[python, model], outputs=[cpp])

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [37]:
def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()

In [44]:
# # You'll need to change the code in the try block to compile the C++ code for your platform
# # I pasted this into Claude's chat UI with a request for it to give me a version for an Intel PC,
# # and it responded with something that looks perfect - you can try a similar approach for your platform.

# # M1 Mac version to compile and execute optimized C++ code:

# def execute_cpp(code):
#         write_output(code)
#         try:
#             compile_cmd = ["clang++", "-Ofast", "-std=c++17", "-march=armv8.5-a", "-mtune=apple-m1", "-mcpu=apple-m1", "-o", "optimized", "optimized.cpp"]
#             compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)
#             run_cmd = ["./optimized"]
#             run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)
#             return run_result.stdout
#         except subprocess.CalledProcessError as e:
#             return f"An error occurred:\n{e.stderr}"

In [41]:
css = """
.python {background-color: #306998;}
.cpp {background-color: #050;}
"""

In [47]:
with gr.Blocks(css=css) as ui:
    gr.Markdown("## Convert code from Python to C++")
    with gr.Row():
        python = gr.Textbox(label="Python code:", value=python_hard, lines=10)
        cpp = gr.Textbox(label="C++ code:", lines=10)
    with gr.Row():
        model = gr.Dropdown(["GPT", "Claude"], label="Select model", value="GPT")
    with gr.Row():
        convert = gr.Button("Convert code")
    with gr.Row():
        python_run = gr.Button("Run Python")
        cpp_run = gr.Button("Run C++")
    with gr.Row():
        python_out = gr.TextArea(label="Python result:", elem_classes=["python"])
        cpp_out = gr.TextArea(label="C++ result:", elem_classes=["cpp"])

    convert.click(optimize, inputs=[python, model], outputs=[cpp])
    python_run.click(execute_python, inputs=[python], outputs=[python_out])
    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])

ui.launch(inbrowser=True)

