In [62]:
# pip install openai
!pip3 install openai



In [63]:
import os
from openai import OpenAI

# grab the token from the token.txt file
with open("token.txt", "r") as f:
    GITHUB_TOKEN = f.read().strip()

# Set it as an environment variable
os.environ["GITHUB_TOKEN"] = GITHUB_TOKEN


# To authenticate with the model you will need to generate a personal access token (PAT) in your GitHub settings.
client = OpenAI(
    base_url="https://models.inference.ai.azure.com",
    api_key=os.environ["GITHUB_TOKEN"],
)



In [64]:
# List of available models
model_choices = ["gpt-4o-mini" , "Codestral-2501", "gemini-2.0-flash", "gpt-4.1-mini"]

In [7]:
# zero shot, few shot, CoT
def run_prompt(task_num, prompt_strat, prompt, max_tokens, temp, model_num):
  model = model_choices[model_num]
  # Set up conversation history
  messages = [
      {"role": "user",
        "content": prompt},
  ]

  response = client.chat.completions.create(
      model = model,
      messages=messages,
      max_tokens=max_tokens,
      temperature=temp,
  )

  # Add response to the conversation
  reply = response.choices[0].message.content

  return {"task_num": task_num, "model": model, "prompt strategy": prompt_strat, "prompt": prompt, "output": reply}

In [8]:
def run_prompt_self_consistency(task_num, prompt, max_tokens, temp, model_num, num_attempts):
  model = model_choices[model_num]
  # Set up conversation history
  messages = [
      {"role": "user",
        "content": prompt},
  ]
  output = {}
  for i in range(num_attempts):
    response = client.chat.completions.create(
        model = model,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temp,
    )

    # Add response to the conversation
    reply = response.choices[0].message.content
    output[f"output attempt {i+1}"] = reply

  return {"task_num": task_num, "model": model, "prompt strategy": "self consistency", "prompt": prompt, "output": output}

In [28]:
runs = []

# **Task 1: Code Summarization (Java)**

In [None]:
code = """
```java
public Map < String , Integer > countWordFrequency ( List < String > words ) {
    Map < String , Integer > freqMap = new HashMap < >() ;
    for ( String word : words ) {
        freqMap . put ( word , freqMap . getOrDefault ( word , 0) + 1) ;
    }
    return freqMap ;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Summarize what this code does in one sentence:\n{code}"
print(prompt)

Summarize what this code does in one sentence:

```java
public Map < String , Integer > countWordFrequency ( List < String > words ) {
    Map < String , Integer > freqMap = new HashMap < >() ;
    for ( String word : words ) {
        freqMap . put ( word , freqMap . getOrDefault ( word , 0) + 1) ;
    }
    return freqMap ;
}
```



In [None]:
for i in range(2):
  run_info = run_prompt(1, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
The code defines a method that counts the frequency of each word in a list and returns a map where the keys are the words and the values are their respective counts.


Codestral-2501
This code defines a method that takes a list of words and returns a map with each word as the key and its frequency in the list as the value.




## Prompting Strategy 2: Self Consistency

In [None]:
for i in range(2):
  run_info = run_prompt_self_consistency(1, prompt, 768, 0.7, i, 2)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"]["output attempt 1"])
  print(run_info["output"]["output attempt 2"])
  print("\n")

gpt-4o-mini
This code defines a method that takes a list of words and returns a map that counts the frequency of each word in the list.
This code defines a method that takes a list of words and returns a map containing the frequency count of each word.


Codestral-2501
This Java method `countWordFrequency` takes a list of strings and returns a map where the keys are the strings and the values are their respective frequencies in the list.
The code defines a method that takes a list of words and returns a map where the keys are words and the values are their respective frequencies in the list.




# **Task 2: Bug Fixing (Python – Off-by-One)**

In [10]:
code = """
```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Identify and fix the bug in the following Python function:\n{code}"
print(prompt)

Identify and fix the bug in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```



In [None]:
for i in range(2):
  run_info = run_prompt(2, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
The provided function `sum_range` is intended to calculate the sum of all integers from `start` to `end - 1`. However, if the intention is to include `end` in the summation (i.e., to sum from `start` to `end` inclusive), the range should be modified. 

Here's the corrected version of the function:

```python
def sum_range(start, end):
    total = 0
    for i in range(start, end + 1):  # Change end to end + 1 to include 'end'
        total += i
    return total
```

If the original intention was to sum from `start` to `end - 1`, then the function is correct as it is. Please clarify the expected behavior, and choose the appropriate version based on whether you want to include `end` or not.


Codestral-2501
The bug in the given Python function `sum_range` is related to the range of numbers being summed. The `range` function in Python includes the start value but excludes the end value. Therefore, the function currently calculates the sum of all integers from `start` to `end-1`

## Prompting Strategy 2: Prompt Chaining

In [25]:
i = 1 #Codestral 25.01
run_info = {"task_num": 2,
          "model": "",
          "prompt strategy": "prompt chaining",
          "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
          "output": {"response 1": "", "response 2": ""}}

run_info["model"] = model_choices[i]
prompt1 = f"Identify the bug in the following Python function:\n{code}\nGive only the identified bug back as an answer."
run_info["prompt"]["step 1 prompt"] = prompt1

# Set up conversation history
messages = [
    {"role": "user",
    "content": prompt1}
]

# Call the model
response = client.chat.completions.create(
    model=model_choices[i], # Choose the model you want to use
    max_tokens=768,
    temperature=0.7,
    messages=messages
)

# Add response to the conversation
reply = response.choices[0].message.content
messages.append({"role": "assistant", "content": reply})
response1 = reply
run_info["output"]["response 1"] = response1

# User follows up
followup = f"Fix the bug in the following Python function:\n{code}"
run_info["prompt"]["step 2 prompt"] = followup

# Add user message
messages.append({"role": "user", "content": followup})

# Call again with updated history
response = client.chat.completions.create(
    model=model_choices[i],
    max_tokens=768,
    temperature=0.7,
    messages=messages
)

# Add response to the conversation
reply = response.choices[0].message.content
run_info["output"]["response 2"] = reply
runs.append(run_info)

print(run_info["model"])
print("PROMPT 1:")
print(run_info["prompt"]["step 1 prompt"]+"\n")
print("RESPONSE 1:")
print(run_info["output"]["response 1"]+"\n")
print("PROMPT 2:")
print(run_info["prompt"]["step 2 prompt"]+"\n")
print("RESPONSE 2:")
print(run_info["output"]["response 2"])
print("\n")


Codestral-2501
PROMPT 1:
Identify the bug in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```

Give only the identified bug back as an answer.

RESPONSE 1:
The bug in the function is that the range function does not include the `end` value.

PROMPT 2:
Fix the bug in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```


RESPONSE 2:
You can fix the bug by changing the range function to include the `end` value. You can do this by adding 1 to the `end` value in the range function. Here is the corrected function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end + 1 ) :
        total += i
    return total
```




In [23]:
i = 3 #gpt-4.1-mini
run_info = {"task_num": 2,
          "model": "",
          "prompt strategy": "prompt chaining",
          "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
          "output": {"response 1": "", "response 2": ""}}

run_info["model"] = model_choices[i]
prompt1 = f"Identify the bug in the following Python function:\n{code}\nGive only the identified bug back as an answer."
run_info["prompt"]["step 1 prompt"] = prompt1

# Set up conversation history
messages = [
    {"role": "user",
    "content": prompt1}
]

# Call the model
response = client.chat.completions.create(
    model=model_choices[i], # Choose the model you want to use
    max_tokens=768,
    temperature=0.7,
    messages=messages
)

# Add response to the conversation
reply = response.choices[0].message.content
messages.append({"role": "assistant", "content": reply})
response1 = reply
run_info["output"]["response 1"] = response1

# User follows up
followup = f"Fix the bug in the following Python function:\n{code}"
run_info["prompt"]["step 2 prompt"] = followup

# Add user message
messages.append({"role": "user", "content": followup})

# Call again with updated history
response = client.chat.completions.create(
    model=model_choices[i],
    max_tokens=768,
    temperature=0.7,
    messages=messages
)

# Add response to the conversation
reply = response.choices[0].message.content
run_info["output"]["response 2"] = reply
runs.append(run_info)

print(run_info["model"])
print("PROMPT 1:")
print(run_info["prompt"]["step 1 prompt"]+"\n")
print("RESPONSE 1:")
print(run_info["output"]["response 1"]+"\n")
print("PROMPT 2:")
print(run_info["prompt"]["step 2 prompt"]+"\n")
print("RESPONSE 2:")
print(run_info["output"]["response 2"])
print("\n")


gpt-4.1-mini
PROMPT 1:
Identify the bug in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```

Give only the identified bug back as an answer.

RESPONSE 1:
The function does not include the `end` value in the sum because `range(start, end)` is exclusive of `end`.

PROMPT 2:
Fix the bug in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```


RESPONSE 2:
```python
def sum_range(start, end):
    total = 0
    for i in range(start, end + 1):
        total += i
    return total
```




# **Task 3: Bug Classification (C++)**

In [29]:
code = """
```cpp
int * getArray (int size ) {
    int arr [ size ];
    return arr ;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [30]:
prompt = f"Classify the bug in the following C++ function:\n{code}\nDo not include a solution."
print(prompt)

Classify the bug in the following C++ function:

```cpp
int * getArray (int size ) {
    int arr [ size ];
    return arr ;
}
```

Do not include a solution.


In [31]:
models_task3 = [1,3]
for i in models_task3:
  run_info = run_prompt(3, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
To classify the bug in the given C++ function, consider the following:

1. **Array Lifetime**: The array `arr` is created within the function `getArray`. This array has automatic storage duration, meaning it is allocated on the stack and will be destroyed when the function ends.

2. **Returning a Pointer**: The function returns a pointer to the local array `arr`. After the function returns, the array `arr` no longer exists, making the returned pointer invalid.

3. **Use-After-Free**: Attempting to use the returned pointer will result in undefined behavior because it points to memory that has been deallocated.

Therefore, the bug in this function is related to:
- **Lifetime of Local Variables**: Returning a pointer to a local array that goes out of scope.
- **Invalid Pointer Usage**: Using a pointer to memory that has been deallocated or is no longer valid.

This is an example of a common bug in C++ related to the lifetime of local variables and the dangers of returning p

## Prompting Strategy 2: Chain of Thought

In [32]:
prompt = f"Classify the bug in the following C++ function. Think step-by-step:\n{code}\nDo not include a solution."
print(prompt)

Classify the bug in the following C++ function. Think step-by-step:

```cpp
int * getArray (int size ) {
    int arr [ size ];
    return arr ;
}
```

Do not include a solution.


In [33]:
for i in models_task3:
  run_info = run_prompt(3, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
Let's think through the function step-by-step to identify any potential issues:

1. **Function Definition**: The function `getArray` is defined to take an integer `size` and return a pointer to an integer.

2. **Local Array**: Inside the function, an array `arr` of integers is declared with a size specified by the `size` parameter: `int arr[size];`.

3. **Return Statement**: The function returns a pointer to this local array: `return arr;`.

Now, let's consider what could go wrong:

- **Lifetime of the Local Array**: The array `arr` is a local variable. Local variables in C++ have automatic storage duration, meaning they are allocated on the stack and their lifetime ends when the function returns. Returning a pointer to a local variable that goes out of scope is dangerous because the memory it points to may be reused or deallocated.

- **Dangling Pointer**: After the function `getArray` returns, the pointer returned will become a dangling pointer because it points to mem

# **Task 4: Generating Email Validators (Python + Regex)**

In [36]:
code = """
```python
def is_valid_email ( email ):
  # TODO : Complete using regex
  pass
```
"""

## Prompting Strategy 1: Prompt Chaining

In [37]:
models_task4 = [1,3]

In [39]:
for i in models_task4:
  run_info = {"task_num": 4,
            "model": "",
            "prompt strategy": "prompt chaining",
            "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
            "output": {"response 1": "", "response 2": ""}}

  run_info["model"] = model_choices[i]
  prompt1 = f"Give a concise list of criteria for a valid email address"
  run_info["prompt"]["step 1 prompt"] = prompt1

  # Set up conversation history
  messages = [
      {"role": "user",
      "content": prompt1}
  ]

  # Call the model
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response1 = reply
  run_info["output"]["response 1"] = response1

  # User follows up
  followup = f"Given the follwing criteria:\n{reply}\nComplete the Python function using regex to validate email addresses:\n{code}\nPlease respond with only the completed function."
  run_info["prompt"]["step 2 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i],
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  run_info["output"]["response 2"] = reply
  runs.append(run_info)

  print(run_info["model"])
  print("PROMPT 1:")
  print(run_info["prompt"]["step 1 prompt"]+"\n")
  print("RESPONSE 1:")
  print(run_info["output"]["response 1"]+"\n")
  print("PROMPT 2:")
  print(run_info["prompt"]["step 2 prompt"]+"\n")
  print("RESPONSE 2:")
  print(run_info["output"]["response 2"])
  print("\n")


Codestral-2501
PROMPT 1:
Give a concise list of criteria for a valid email address

RESPONSE 1:
Here is a concise list of criteria for a valid email address:

1. **Local Part**:
   - Can contain letters (a-z, A-Z), digits (0-9), and special characters (!#$%&'*+-/=?^_`{|}~).
   - Cannot contain spaces or certain special characters (e.g., commas, colons, semicolons).
   - Must be at least one character long.

2. **At Symbol (@)**:
   - Must be present and separate the local part from the domain part.

3. **Domain Part**:
   - Must contain at least one dot (.) to separate the domain name from the top-level domain (TLD).
   - Can include letters, digits, and hyphens (-).
   - Must end with a valid top-level domain (e.g., .com, .org, .net).

4. **General Length**:
   - The entire email address must be no longer than 254 characters.

5. **No Consecutive or Leading/Trailing Dots**:
   - The local part and domain part cannot have consecutive dots or start/end with a dot.

6. **No Quoted String

## Prompting Strategy 2: Few Shot

In [None]:
prompt = f"""
Example 1:
Input: x@example.com
Output: True

Example 2:
Input: Abc..123@example.com
Output: False

Example 3:
Input: user.name+tag+sorting@example.com
Output: True

Example 4:
Input: user@.com
Output: False

Example 5:
Input: angh__hgj@domain.com
Output: False

Complete the Python function using regex to validate email addresses:\n{code}\nPlease respond with only the completed function.
"""

In [None]:
for i in range(2):
  run_info=run_prompt(4, "few shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
import re

def is_valid_email(email):
    pattern = r'^(?!.*\.\.)(?!.*\.$)(?!.*@.*@)(?!.*@-)(?!.*-@)(?!.*@\.)(?!.*\.$)[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None
```


Codestral-2501
```python
import re

def is_valid_email(email):
    # Define the regex pattern for validating an email
    pattern = re.compile(
        r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
    )
    return bool(pattern.match(email))
```




# **Task 5: Generating Flask APIs (Python)**

In [None]:
code = """
```python
from flask import Flask , jsonify

app = Flask ( __name__ )

@app . route (’/ greet / < username >’)
def greet ( username ) :
  # TODO : Return a JSON greeting
  pass
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Create a ‘/greet/<username>‘ endpoint that returns a JSON greeting given the following:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(5, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])

gpt-4o-mini
To create a `/greet/<username>` endpoint in your Flask application that returns a JSON greeting, you'll need to follow these steps:

1. **Import Required Libraries**: You already have the necessary imports for Flask and `jsonify`.

2. **Define the Route**: You need to ensure the route is defined correctly, handling the URL parameter for the username.

3. **Create the JSON Response**: Inside the `greet` function, you'll create a dictionary with a greeting message that includes the username, and then return it as a JSON response.

4. **Run the Application**: Ensure the Flask app runs if it is executed as the main module.

Here’s how you can implement this:

```python
from flask import Flask, jsonify

app = Flask(__name__)

@app.route('/greet/<username>')
def greet(username):
    # Create a JSON response with a greeting message
    response = {
        'greeting': f'Hello, {username}!'
    }
    return jsonify(response)

if __name__ == '__main__':
    app.run(debug=True)
```



## Prompting Strategy 2: Chain of Thought

In [None]:
prompt = f"Create a ‘/greet/<username>‘ endpoint that returns a JSON greeting given the following:\n{code}\nThink step-by-step."
for i in range(2):
  run_info = run_prompt(5, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])

gpt-4o-mini
To create a `/greet/<username>` endpoint that returns a JSON greeting in a Flask application, we can follow these steps:

1. **Import Necessary Libraries**: We already have `Flask` and `jsonify` imported.
2. **Define the Route**: We need to ensure the route is correctly formatted and remove any unnecessary spaces or incorrect characters.
3. **Create the Greeting Message**: We will create a message using the `username` passed in the URL.
4. **Return the JSON Response**: We need to use `jsonify` to return the greeting in JSON format.

Here's the complete implementation of the `/greet/<username>` endpoint:

```python
from flask import Flask, jsonify

app = Flask(__name__)

@app.route('/greet/<username>')
def greet(username):
    # Create a greeting message
    greeting_message = f"Hello, {username}!"
    
    # Return the greeting as a JSON response
    return jsonify({'greeting': greeting_message})

if __name__ == '__main__':
    app.run(debug=True)
```

### Explanation of th

# **Task 6: SQL Schema Design**

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = """
Write the schema for a review app with users, books, and reviews:
-- TODO : Design schema with appropriate keys and constraints
-- Tables : users (id , name ), books (id , title ), reviews (id , user_id , book_id ,
rating )
Respond with only the schema.
"""

In [None]:
for i in range(2):
  run_info = run_prompt(6, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```sql
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255) NOT NULL
);

CREATE TABLE books (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255) NOT NULL
);

CREATE TABLE reviews (
    id SERIAL PRIMARY KEY,
    user_id INT NOT NULL,
    book_id INT NOT NULL,
    rating INT CHECK (rating >= 1 AND rating <= 5),
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
    FOREIGN KEY (book_id) REFERENCES books(id) ON DELETE CASCADE
);
```


Codestral-2501
```sql
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255) NOT NULL
);

CREATE TABLE books (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255) NOT NULL
);

CREATE TABLE reviews (
    id SERIAL PRIMARY KEY,
    user_id INT NOT NULL,
    book_id INT NOT NULL,
    rating INT CHECK (rating >= 1 AND rating <= 5),
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
    FOREIGN KEY (book_id) REFERENCES books(id) ON DELETE CASCADE
);
```




## Prompting Strategy 2: Self Consistency

In [None]:
for i in range(2):
  run_info = run_prompt_self_consistency(6, prompt, 768, 0.7, i, 2)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"]["output attempt 1"])
  print(run_info["output"]["output attempt 2"])
  print("\n")

gpt-4o-mini
```sql
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL
);

CREATE TABLE books (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255) NOT NULL
);

CREATE TABLE reviews (
    id SERIAL PRIMARY KEY,
    user_id INT NOT NULL,
    book_id INT NOT NULL,
    rating INT CHECK (rating >= 1 AND rating <= 5),
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
    FOREIGN KEY (book_id) REFERENCES books(id) ON DELETE CASCADE
);
```
```sql
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255) NOT NULL
);

CREATE TABLE books (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255) NOT NULL
);

CREATE TABLE reviews (
    id SERIAL PRIMARY KEY,
    user_id INT NOT NULL,
    book_id INT NOT NULL,
    rating INT CHECK (rating BETWEEN 1 AND 5),
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
    FOREIGN KEY (book_id) REFERENCES books(id) ON DELETE CASCADE,
    UNIQUE (user_id, book_id)  -- Ensures a user can only review a 

# **Task 7: Null Dereference Detection (Java)**

In [None]:
code = """
```java
public int getLength ( String s ) {
  return s . length () ;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Identify the most important issues (at most two) in the following Java function:\n{code}\nOnly focus on the problems, do not respond with solutions."

In [None]:
for i in range(2):
  run_info = run_prompt(7, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
1. **Null Pointer Exception**: The function does not handle the case where the input string `s` could be `null`, which would cause a `NullPointerException` when attempting to call `length()`.

2. **Lack of Input Validation**: The function does not validate the input before proceeding, which could lead to unexpected behavior if the function is used in a context where input validation is necessary.


Codestral-2501
The most important issues in the provided Java function are:

1. **Lack of Input Validation**: The function does not check if the input string `s` is `null`. Calling the `length()` method on a `null` string will result in a `NullPointerException`.

2. **Inconsistent Naming Convention**: The function name `getLength` uses camelCase, which is a common naming convention in Java. However, the method name `length` in the `String` class is in lowercase, which might be slightly inconsistent if the intention is to mirror the behavior of the `String` class closely.




## Prompting Strategy 2: Self Consistency

In [None]:
for i in range(2):
  run_info = run_prompt_self_consistency(7, prompt, 768, 0.7, i, 2)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"]["output attempt 1"])
  print(run_info["output"]["output attempt 2"])
  print("\n")

gpt-4o-mini
1. **Null Pointer Exception**: The function does not handle the case where the input string `s` is `null`, which would lead to a `NullPointerException` when attempting to call `s.length()`.

2. **Lack of Input Validation**: The function does not validate the input string, potentially leading to unexpected behavior or results in cases where the input is not suitable (e.g., if the input type is not a string).
1. **Null Pointer Exception**: The function does not handle the case where the input string `s` is `null`, which would result in a `NullPointerException` when attempting to call `s.length()`.

2. **Lack of Input Validation**: The function does not provide any validation or error handling for empty strings or other potential issues related to the input, such as ensuring that `s` is not modified or that it meets certain criteria if needed.


Codestral-2501
The most important issues in the given Java function are:

1. **Lack of Parameter Validation**: The function does not 

# **Task 8: CSV Parser Variants (Python)**

In [None]:
code = """
```python
def parse_csv_line ( line ) :
  return line . split (’,’) # Incomplete : doesn ’t handle quoted fields
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Improve the following Python function to support quoted fields:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(8, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
To improve the `parse_csv_line` function to support quoted fields, you can use the `csv` module from Python's standard library, which is specifically designed to handle CSV parsing, including quoted fields and other complexities. Here’s an improved version of your function:

```python
import csv
from io import StringIO

def parse_csv_line(line):
    # Use StringIO to treat the string as a file
    f = StringIO(line)
    reader = csv.reader(f)
    return next(reader)  # Read the first (and only) row
```

### Explanation:
1. **`csv.reader`**: This function handles the parsing of CSV lines, taking care of quoted fields, commas within quotes, and other CSV formatting issues.
2. **`StringIO`**: This allows us to treat a string as a file, which is necessary because `csv.reader` expects a file-like object.

### Example Usage:
```python
line = 'value1,"value, with, commas",value3'
parsed = parse_csv_line(line)
print(parsed)  # Output: ['value1', 'value, with, commas', 'value3']
```

## Prompting Strategy 2: Chain of Thought

In [None]:
prompt = f"Improve the following Python function to support quoted fields. Think step-by-step:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(8, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
To improve the `parse_csv_line` function to handle quoted fields, we need to consider the following steps:

1. **Understanding Quoted Fields**: In CSV files, fields can be enclosed in quotes. If a field contains a comma, it should be treated as part of the field rather than as a delimiter. For example, the string `"Doe, John",25` should be split into `["Doe, John", "25"]`.

2. **Using Regular Expressions**: To handle the complexity of quoted fields, we can use the `re` module in Python, which allows for more sophisticated string manipulation.

3. **Implementing the Function**: We need to create a regex pattern that accounts for:
   - Fields enclosed in double quotes (which may contain commas).
   - Unquoted fields (which are separated by commas).
   - Handling escape quotes if necessary (e.g., `"""` should be interpreted as a single `"`).

Here’s an updated version of the `parse_csv_line` function:

```python
import re

def parse_csv_line(line):
    # Regular expression to 

# **Task 9: Data Class to API Conversion (Kotlin)**

## Prompting Strategy 1: Zero Shot

In [42]:
prompt = """
Convert the data class to a REST API using Ktor:
data class Product ( val id : Int , val name : String , val price : Double )
// TODO : Create GET and POST endpoints using Ktor
"""

In [43]:
runs = []

In [44]:
models_task9 = [1,3]
for i in models_task9:
  run_info = run_prompt(9, "zero shot", prompt, 1024, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
To create a REST API using Ktor with a `Product` data class, you'll need to follow several steps. Below is a complete example that includes setting up Ktor, defining the data class, and implementing GET and POST endpoints.

First, ensure you have the necessary dependencies in your `build.gradle.kts` file:

```kotlin
plugins {
    kotlin("jvm") version "1.6.21"
    id("io.ktor.plugin") version "2.0.0"
}

repositories {
    mavenCentral()
}

dependencies {
    implementation("io.ktor:ktor-server-core:2.0.0")
    implementation("io.ktor:ktor-server-netty:2.0.0")
    implementation("io.ktor:ktor-server-content-negotiation:2.0.0")
    implementation("io.ktor:ktor-serialization-kotlinx-json:2.0.0")
    implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.2")
}
```

Next, create the `Product` data class and implement the Ktor server with GET and POST endpoints.

```kotlin
import io.ktor.application.*
import io.ktor.features.ContentNegotiation
import io.ktor.feat

## Prompting Strategy 2: Chain of Thought

In [45]:
prompt = """
Convert the data class to a REST API using Ktor. Think step-by-step:
data class Product ( val id : Int , val name : String , val price : Double )
// TODO : Create GET and POST endpoints using Ktor
"""

In [46]:
for i in models_task9:
  run_info = run_prompt(9, "chain of thought", prompt, 1024, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
Sure! To create a REST API using Ktor for the `Product` data class, we'll need to follow several steps. We'll use Ktor, which is a framework for building asynchronous servers and clients in connected systems. Here’s a step-by-step guide:

### Step 1: Add Dependencies
First, ensure you have the necessary dependencies in your `build.gradle.kts` file:
```kotlin
dependencies {
    implementation("io.ktor:ktor-server-core:2.0.0")
    implementation("io.ktor:ktor-server-netty:2.0.0")
    implementation("io.ktor:ktor-server-auth:2.0.0")
    implementation("io.ktor:ktor-server-content-negotiation:2.0.0")
    implementation("io.ktor:ktor-serialization-kotlinx-json:2.0.0")
    implementation("io.ktor:ktor-server-status-pages:2.0.0")
    implementation("io.ktor:ktor-client-core:2.0.0")
    implementation("io.ktor:ktor-client-serialization:2.0.0")
}
```

### Step 2: Create the Data Class
Define your `Product` data class:
```kotlin
data class Product(
    val id: Int,
    val name: S

In [47]:
with open("task9_runs.json", "w") as f:
  json.dump(runs, f, indent=4)

# **Task 10: Function Summarization (Python)**

In [None]:
code = """
```python
def reverse_words ( sentence ) :
  return ’ ’. join ( sentence . split () [:: -1])
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Summarize what this code does in one sentence:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(10, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
The code defines a function that takes a sentence as input and returns the sentence with its words reversed.


Codestral-2501
This code defines a function that reverses the order of words in a given sentence.




## Prompting Strategy 2: Self Consistency

In [None]:
for i in range(2):
  run_info = run_prompt_self_consistency(10, prompt, 768, 0.7, i, 2)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"]["output attempt 1"])
  print(run_info["output"]["output attempt 2"])
  print("\n")

gpt-4o-mini
The code defines a function that takes a sentence as input and returns the sentence with its words reversed.
The code defines a function that reverses the order of words in a given sentence.


Codestral-2501
This code defines a function that reverses the order of words in a given sentence.
This code defines a function that reverses the order of words in a given sentence.




# **Task 11: Prompt from Code Comments (Python)**

In [None]:
code = """
# This function checks if a number is prime
def is_prime ( n ) :
  if n <= 1:
    return False
  for i in range (2 , int( n **0.5) +1) :
    if n % i == 0:
      return False
  return True
"""

## Prompting Strategy 1: Zero Shot

In [None]:
runs = runs[:40]

In [None]:
prompt = f"Write a concise prompt to generate the given Python function:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(11, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
Generate a Python function named `is_prime` that checks if a given integer `n` is a prime number. The function should return `False` for numbers less than or equal to 1, and for any number that is divisible by any integer from 2 up to the square root of `n`. It should return `True` if the number is prime.


Codestral-2501
Generate a Python function to check if a number is prime.




## Prompting Strategy 2: Self Consistency

In [None]:
for i in range(2):
  run_info = run_prompt_self_consistency(11, prompt, 768, 0.7, i, 2)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"]["output attempt 1"])
  print(run_info["output"]["output attempt 2"])
  print("\n")

gpt-4o-mini
Create a Python function named `is_prime` that takes a single integer `n` as input and returns `True` if `n` is a prime number and `False` otherwise. The function should handle cases where `n` is less than or equal to 1 by returning `False`, and for other values, it should check for divisibility from 2 up to the square root of `n`.
Generate a Python function named `is_prime` that checks if a given number `n` is a prime number. The function should return `False` if `n` is less than or equal to 1. For numbers greater than 1, it should iterate from 2 to the square root of `n`, returning `False` if any number divides `n` evenly. If no divisors are found, it should return `True`.


Codestral-2501
Generate a Python function that checks if a number is prime.
Generate a Python function that checks if a given number is prime.




# **Task 12: Fixing Factorial Bug (Python)**

In [None]:
code = """
```python
def factorial ( n ) :
  result = 1
  for i in range (1 , n ) :
    result *= i
  return result
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Fix the bug when input is 0 in the following Python function:\n{code}\nOnly reply with the fixed function."

In [None]:
print(prompt)

Fix the bug when input is 0 in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```

Only reply with the fixed function.


In [None]:
for i in range(2):
  run_info = run_prompt(12, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
def factorial(n):
    if n == 0:
        return 1
    result = 1
    for i in range(1, n + 1):
        result *= i
    return result
```


Codestral-2501
```python
def factorial(n):
  result = 1
  for i in range(1, n + 1):
    result *= i
  return result
```




## Prompting Strategy 2: Few Shot

In [None]:
prompt = f"""
Example 1:
Input: 0
Output: 1


Example 2:
Input: 1
Output: 1


Example 3:
Input: 5
Output: 120

Fix the bug when input is 0 in the following Python function:\n{code}\nOnly reply with the fixed function.
"""

In [None]:
print(prompt)


Example 1:
Input: 0
Output: 1


Example 2:
Input: 1
Output: 1


Example 3:
Input: 5
Output: 120

Fix the bug when input is 0 in the following Python function:

```python
def sum_range ( start , end ) :
    total = 0
    for i in range ( start , end ) :
        total += i
    return total
```

Only reply with the fixed function.



In [None]:
for i in range(2):
  run_info = run_prompt(12, "few shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
def factorial(n):
    if n == 0:
        return 1
    result = 1
    for i in range(1, n + 1):
        result *= i
    return result
```


Codestral-2501
```python
def factorial(n):
    result = 1
    for i in range(1, n + 1):
        result *= i
    return result
```




# **Task 13: Linked List Node Deletion (C):**

In [82]:
code = """
```c
struct Node {
  int data ;
  struct Node * next ;
};

void deleteNode ( struct Node ** head , int key ) {
  // TODO : Implement node deletion
}
```
"""

## Prompting Strategy 1: Zero Shot

In [90]:
prompt = f"Given the following, implement node deletion by value in C:\n{code}\nDo not return more functions than was originally provided."

In [91]:
print(prompt)

Given the following, implement node deletion by value in C:

```c
struct Node {
  int data ;
  struct Node * next ;
};

void deleteNode ( struct Node ** head , int key ) {
  // TODO : Implement node deletion
}
```

Do not return more functions than was originally provided.


In [92]:
runs = []

In [93]:
models_task13 = [1,3]

In [94]:
for i in models_task13:
  run_info = run_prompt(13, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
Certainly! Below is the implementation of the `deleteNode` function to delete a node with a specific value from a singly linked list in C:

```c
struct Node {
  int data;
  struct Node *next;
};

void deleteNode(struct Node** head, int key) {
  // Store the head node
  struct Node* temp = *head, *prev = NULL;

  // If head node itself holds the key to be deleted
  if (temp != NULL && temp->data == key) {
    *head = temp->next; // Change head
    free(temp);         // Free old head
    return;
  }

  // Search for the key to be deleted, keep track of the previous node as we need to change 'prev->next'
  while (temp != NULL && temp->data != key) {
    prev = temp;
    temp = temp->next;
  }

  // If key was not present in linked list
  if (temp == NULL) return;

  // Unlink the node from linked list
  prev->next = temp->next;

  free(temp); // Free memory
}
```

### Explanation:
1. **Handle the Head Node**:
   - If the node to be deleted is the head node, update the head

## Prompting Strategy 2: Chain of Thought

In [95]:
prompt = f"Given the following, implement node deletion by value in C:\n{code}\nThink step-by-step. Do not return more functions than was originally provided."
print(prompt)

Given the following, implement node deletion by value in C:

```c
struct Node {
  int data ;
  struct Node * next ;
};

void deleteNode ( struct Node ** head , int key ) {
  // TODO : Implement node deletion
}
```

Think step-by-step. Do not return more functions than was originally provided.


In [96]:
for i in models_task13:
  run_info = run_prompt(13, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
Sure, let's implement the `deleteNode` function step-by-step. The goal is to delete a node with a specific value from a linked list.

### Steps to Implement `deleteNode`

1. **Handle Empty List**: If the list is empty (i.e., `head` is `NULL`), there is nothing to delete.
2. **Delete Head Node**: If the node to be deleted is the head node, update the head to point to the next node.
3. **Traverse the List**: Traverse the list to find the node to be deleted. Keep track of the previous node to update its `next` pointer.
4. **Update Links**: Once the node to be deleted is found, update the `next` pointer of the previous node to skip the node to be deleted.
5. **Free Memory**: Free the memory of the node to be deleted to avoid memory leaks.

Here is the implementation of the `deleteNode` function:

```c
#include <stdio.h>
#include <stdlib.h>

struct Node {
  int data;
  struct Node *next;
};

void deleteNode(struct Node **head, int key) {
  // Check if the list is empty
  if (

# **Task 14: Recursive Function Completion (Python)**

In [None]:
code = """
```python
def fibonacci ( n ) :
  # TODO : Base cases and recursive call
  pass
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Complete the recursive Python function to return the n-th Fibonacci number:\n{code}\nReturn only the completed function."

In [None]:
print(prompt)

Complete the recursive Python function to return the n-th Fibonacci number:

```python
def fibonacci ( n ) :
  # TODO : Base cases and recursive call
  pass
```

Return only the completed function.


In [None]:
for i in range(2):
  run_info = run_prompt(14, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
def fibonacci(n):
    if n <= 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n - 1) + fibonacci(n - 2)
```


Codestral-2501
```python
def fibonacci(n):
    if n <= 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n - 1) + fibonacci(n - 2)
```




## Prompting Strategy 2: Few Shot

In [None]:
prompt = f"""
Example 1:
Input: 1
Output: 0


Example 2:
Input: 5
Output: 3


Example 3:
Input: 9
Output: 21


Complete the recursive Python function to return the n-th Fibonacci number:\n{code}\nReturn only the completed function.
"""
for i in range(2):
  run_info = run_prompt(14, "few shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
def fibonacci(n):
    # Base cases
    if n == 0:
        return 0
    elif n == 1:
        return 1
    # Recursive call
    return fibonacci(n - 1) + fibonacci(n - 2)
```


Codestral-2501
Here is the completed recursive Python function to return the n-th Fibonacci number:

```python
def fibonacci(n):
    # Base cases
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        # Recursive call
        return fibonacci(n - 1) + fibonacci(n - 2)
```

This function uses recursion to compute the n-th Fibonacci number. The base cases are set for `n == 0` and `n == 1`, which return 0 and 1 respectively. For other values of `n`, the function calls itself with `n - 1` and `n - 2` and adds the results.




# **Task 15: Constructor Completion (Python)**

In [None]:
code = """
```python
class Person :
  def __init__ ( self ) :
    # TODO : Add name , age , and optional email
    pass
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Complete the following class constructor:\n{code}\nReply with only the completed code."
print(prompt)

Complete the following class constructor:

```python
class Person :
  def __init__ ( self ) :
    # TODO : Add name , age , and optional email
    pass

Reply with only the completed code.


In [None]:

for i in range(2):
  run_info = run_prompt(15, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Complete the following class constructor:

```python
class Person :
  def __init__ ( self ) :
    # TODO : Add name , age , and optional email
    pass

Reply with only the completed code.


RateLimitError: Error code: 429 - {'error': {'code': 'RateLimitReached', 'message': 'Rate limit of 150 per 86400s exceeded for UserByModelByDay. Please wait 72800 seconds before retrying.', 'details': 'Rate limit of 150 per 86400s exceeded for UserByModelByDay. Please wait 72800 seconds before retrying.'}}

## Prompting Strategy 2: Few Shot

In [None]:
prompt = f"""
Example 1:
```python
class Book:
    def __init__(self, title, author, year):
        self.title = title
        self.author = author
        self.year = year
```


Example 2:
```python
class Car:
    def __init__(self, make, model, year):
        self.make = make
        self.model = model
        self.year = year
```


Example 3:
```python
class Rectangle:
    def __init__(self, width=1, height=1):
        self.width = width
        self.height = height
```

Complete the following class constructor:\n{code}\nReply with only the completed code.
"""
for i in range(2):
  run_info = run_prompt(15, "few shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
class Person:
    def __init__(self, name, age, email=None):
        self.name = name
        self.age = age
        self.email = email
```


Codestral-2501
```python
class Person :
  def __init__ ( self , name , age , email=None) :
    self.name = name
    self.age = age
    self.email = email
```




# **Task 16: Binary Search Completion (Java)**

In [None]:
code = """
```java
public int binarySearch ( int [] arr , int target ) {
  int left = 0 , right = arr . length - 1;
  while ( left <= right ) {
    int mid = ( left + right ) / 2;
    // TODO : Compare and adjust bounds
  }
  return -1;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Complete the binary search Java implementation:\n{code}"
print(prompt)

Complete the binary search Java implementation:

```java
public int binarySearch ( int [] arr , int target ) {
  int left = 0 , right = arr . length - 1;
  while ( left <= right ) {
    int mid = ( left + right ) / 2;
    // TODO : Compare and adjust bounds
  }
  return -1;
}
```



In [None]:
for i in range(2):
  run_info = run_prompt(16, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
Sure! Below is the completed implementation of the binary search algorithm in Java, including the comparison and adjustment of bounds.

```java
public int binarySearch(int[] arr, int target) {
    int left = 0, right = arr.length - 1;
    while (left <= right) {
        int mid = (left + right) / 2;
        
        // Compare the middle element with the target
        if (arr[mid] == target) {
            return mid;  // Target found, return the index
        } else if (arr[mid] < target) {
            left = mid + 1;  // Target is in the right half
        } else {
            right = mid - 1; // Target is in the left half
        }
    }
    
    return -1; // Target not found
}
```

### Explanation:
1. **Initialization**: The `left` pointer is set to the start of the array, and the `right` pointer is set to the end of the array.
2. **Loop**: As long as `left` is less than or equal to `right`, the loop continues.
3. **Mid Calculation**: The middle index `mid` is calculat

## Prompting Strategy 2: Chain of Thought

In [None]:
prompt = f"Complete the binary search Java implementation. Think step-by-step:\n{code}"
print(prompt)

Complete the binary search Java implementation. Think step-by-step:

```java
public int binarySearch ( int [] arr , int target ) {
  int left = 0 , right = arr . length - 1;
  while ( left <= right ) {
    int mid = ( left + right ) / 2;
    // TODO : Compare and adjust bounds
  }
  return -1;
}
```



In [None]:
for i in range(2):
  run_info = run_prompt(16, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
Sure! Let's complete the binary search implementation step-by-step. In a binary search, you need to compare the target value with the middle element of the current search range (defined by `left` and `right`). Depending on the comparison result, you will either find the target, or you will adjust the bounds to continue the search.

Here’s the full implementation:

```java
public int binarySearch(int[] arr, int target) {
    int left = 0, right = arr.length - 1;
    
    while (left <= right) {
        int mid = (left + right) / 2;

        // Compare the middle element with the target
        if (arr[mid] == target) {
            return mid; // Target found, return the index
        } else if (arr[mid] < target) {
            left = mid + 1; // Target is in the right half
        } else {
            right = mid - 1; // Target is in the left half
        }
    }
    
    return -1; // Target not found
}
```

### Explanation of the Code:

1. **Initialization**: 
   - `left` 

# **Task 17: Self-Consistency Bug Fixing (C++)**

In [49]:
code = """
```cpp
// Supposed to return true if x is even
bool isOdd (int x ) {
  return x % 2 == 0;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Resolve the inconsistency between the function name and logic such that the function works for its original intent:\n{code}\nOnly include the solution(s)"
print(prompt)

Resolve the inconsistency between the function name and logic such that the function works for its original intent:

```cpp
// Supposed to return true if x is even
bool isOdd (int x ) {
  return x % 2 == 0;
}
```

Only include the solution(s)


In [None]:
for i in range(2):
  run_info = run_prompt(17, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```cpp
// Corrected function to return true if x is odd
bool isOdd(int x) {
  return x % 2 != 0;
}
```


Codestral-2501
To resolve the inconsistency between the function name and its logic, you need to update the function logic such that it correctly returns `true` if `x` is even. The current logic returns `true` if `x` is even, which aligns with the function name "isOdd." However, the comment suggests that the function should return `true` if `x` is even, which requires the name to be changed to reflect the correct intent.

Here's the corrected function:

```cpp
// Returns true if x is even
bool isEven(int x) {
  return x % 2 == 0;
}
```

Explanation:
- The function name is changed to `isEven` to reflect that it returns `true` if `x` is even.
- The logic `x % 2 == 0` correctly checks if `x` is even.




## Prompting Strategy 2: Prompt Chaining

In [50]:
models_task17 = [1,3]

In [51]:
for i in models_task17:
  run_info = {"task_num": 17,
            "model": "",
            "prompt strategy": "prompt chaining",
            "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
            "output": {"response 1": "", "response 2": ""}}

  run_info["model"] = model_choices[i]
  prompt1 = f"Identify the logical inconsistency and intent of the function in the following code in a maximum of two sentences:\n{code}"
  run_info["prompt"]["step 1 prompt"] = prompt1

  # Set up conversation history
  messages = [
      {"role": "user",
      "content": prompt1}
  ]

  # Call the model
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response1 = reply
  run_info["output"]["response 1"] = response1

  # User follows up
  followup = f"Fix the function based on the intent identified above. Only return the solution."
  run_info["prompt"]["step 2 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i],
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  run_info["output"]["response 2"] = reply
  runs.append(run_info)

  print(run_info["model"])
  print("PROMPT 1:")
  print(run_info["prompt"]["step 1 prompt"]+"\n")
  print("RESPONSE 1:")
  print(run_info["output"]["response 1"]+"\n")
  print("PROMPT 2:")
  print(run_info["prompt"]["step 2 prompt"]+"\n")
  print("RESPONSE 2:")
  print(run_info["output"]["response 2"])
  print("\n")


Codestral-2501
PROMPT 1:
Identify the logical inconsistency and intent of the function in the following code in a maximum of two sentences:

```cpp
// Supposed to return true if x is even
bool isOdd (int x ) {
  return x % 2 == 0;
}
```


RESPONSE 1:
The function is logically inconsistent because it returns `true` if `x` is even, which contradicts its intended purpose of returning `true` if `x` is odd. The intent is to determine if the number `x` is odd.

PROMPT 2:
Fix the function based on the intent identified above. Only return the solution.

RESPONSE 2:
```cpp
bool isOdd(int x){
  return x % 2 != 0;
}
```


gpt-4.1-mini
PROMPT 1:
Identify the logical inconsistency and intent of the function in the following code in a maximum of two sentences:

```cpp
// Supposed to return true if x is even
bool isOdd (int x ) {
  return x % 2 == 0;
}
```


RESPONSE 1:
The function named `isOdd` returns `true` when `x` is even, which contradicts its intended purpose. The logic should check `x % 2 !=

# **Task 18: Prompt Chaining: Bug Identification → Fix (JavaScript)**

In [98]:
code = """
```js
function isEven ( n ) {
  return n % 2;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [2]:
prompt = f"Identify and fix the bug in the following JavaScript function:\n{code}\nDo not provide examples on how to test the function."
print(prompt)

Identify and fix the bug in the following JavaScript function:

```js
function isEven ( n ) {
  return n % 2;
}
```

Do not provide examples on how to test the function.


In [None]:
for i in range(2):
  run_info = run_prompt(18, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
The bug in the function `isEven` is that it returns the result of `n % 2`, which will be `0` for even numbers and `1` for odd numbers. However, to properly indicate whether a number is even, the function should return `true` for even numbers and `false` for odd numbers.

Here is the corrected version of the function:

```js
function isEven(n) {
  return n % 2 === 0; 
}
``` 

This way, the function correctly returns `true` for even numbers and `false` for odd numbers.


Codestral-2501
The bug in the `isEven` function is that it does not correctly determine whether a number is even or odd. The expression `n % 2` returns `0` for even numbers and a non-zero value for odd numbers. However, the function should return `true` for even numbers and `false` for odd numbers.

To fix the bug, you should compare the result of `n % 2` to `0`. Here is the corrected function:

```js
function isEven(n) {
  return n % 2 === 0;
}
```

This ensures that the function returns `true` if `n` is eve

## Prompting Strategy 2: Prompt Chaining

In [100]:
models_task18 = [1,3]

In [101]:
for i in models_task18:
  run_info = {"task_num": 18,
            "model": "",
            "prompt strategy": "prompt chaining",
            "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
            "output": {"response 1": "", "response 2": ""}}

  run_info["model"] = model_choices[i]
  prompt1 = f"Identify the bug in the following JavaScript function in at most two sentences:\n{code}"
  run_info["prompt"]["step 1 prompt"] = prompt1

  # Set up conversation history
  messages = [
      {"role": "user",
      "content": prompt1}
  ]

  # Call the model
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response1 = reply
  run_info["output"]["response 1"] = response1

  # User follows up
  followup = f"Fix the above JavaScript function. Do not provide examples on how to test the function."
  run_info["prompt"]["step 2 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i],
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  run_info["output"]["response 2"] = reply
  runs.append(run_info)

  print(run_info["model"])
  print("PROMPT 1:")
  print(run_info["prompt"]["step 1 prompt"]+"\n")
  print("RESPONSE 1:")
  print(run_info["output"]["response 1"]+"\n")
  print("PROMPT 2:")
  print(run_info["prompt"]["step 2 prompt"]+"\n")
  print("RESPONSE 2:")
  print(run_info["output"]["response 2"])
  print("\n")

Codestral-2501
PROMPT 1:
Identify the bug in the following JavaScript function in at most two sentences:

```js
function isEven ( n ) {
  return n % 2;
}
```


RESPONSE 1:
The function `isEven` returns `n % 2`, which is `0` for even numbers and a non-zero value for odd numbers. This is the opposite of what the function name suggests. The function should return `true` for even numbers and `false` for odd numbers.

PROMPT 2:
Fix the above JavaScript function. Do not provide examples on how to test the function.

RESPONSE 2:
```js
function isEven(n) {
  return n % 2 === 0;
}
```


gpt-4.1-mini
PROMPT 1:
Identify the bug in the following JavaScript function in at most two sentences:

```js
function isEven ( n ) {
  return n % 2;
}
```


RESPONSE 1:
The function returns the remainder of `n % 2`, which is `0` for even numbers and `1` for odd numbers, but since `0` is falsy and `1` is truthy in JavaScript, it incorrectly treats even numbers as `false`. To fix it, the function should return `n

# **Task 19: Summary Decomposition (C++)**

In [3]:
code = """
```cpp
// Function that validates an input , calculates square , and returns result
int process (int x ) {
  if ( x < 0) return -1;
    return x * x ;
}
```
"""

## Prompting Strategy 1: Zero Shot

In [4]:
prompt = f"Decompose the high-level comment/summary into logical steps:\n{code}"
print(prompt)

Decompose the high-level comment/summary into logical steps:

```cpp
// Function that validates an input , calculates square , and returns result
int process (int x ) {
  if ( x < 0) return -1;
    return x * x ;
}
```



In [None]:
for i in range(2):
  run_info = run_prompt(19, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
To decompose the high-level comment/summary into logical steps based on the provided C++ function, we can break it down as follows:

1. **Input Validation**:
   - Check if the input value `x` is less than 0.
   - If `x` is less than 0, return -1 to indicate invalid input.

2. **Calculation**:
   - If the input is valid (i.e., `x` is 0 or greater), calculate the square of `x` by multiplying `x` by itself.

3. **Return Result**:
   - Return the calculated square of `x` as the output of the function.

To summarize, the steps are:
1. Check if `x` is negative:
   - If yes, return -1.
2. If `x` is non-negative, compute `x * x`.
3. Return the result of the calculation.


Codestral-2501
To decompose the high-level comment/summary into logical steps for the given C++ function, we can break it down as follows:

1. **Input Validation:**
   - Check if the input `x` is less than 0.
   - If `x` is less than 0, return -1 to indicate an invalid input.

2. **Calculation:**
   - If the input

## Prompting Strategy 2: Chain of Thought

In [None]:
prompt = f"Decompose the high-level comment/summary into logical steps. Think step-by-step:\n{code}"

In [None]:
for i in range(2):
  run_info = run_prompt(19, "chain of thought", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
To decompose the high-level comment/summary into logical steps, we can break it down as follows:

1. **Function Definition**: Define a function named `process` that takes a single integer parameter `x`.

2. **Input Validation**: 
   - Check if the input `x` is less than 0.
   - If `x` is less than 0, return -1. This indicates that the input is invalid.

3. **Calculate Square**: 
   - If the input `x` is valid (i.e., non-negative), calculate the square of `x` by multiplying `x` by itself.

4. **Return Result**: 
   - Return the calculated square of `x` as the result of the function.

In summary, the steps outline the process of validating the input, computing the square if valid, and returning the appropriate result based on the input value.


Codestral-2501
To break down the high-level comment/summary of the `process` function into logical steps, we need to follow the steps explicitly mentioned in the comment:

1. **Validate the Input:**
   - Check if the input `x` is less 

# **Task 20: Purpose Inference → Completion (Python)**

In [58]:
code = """
```python
def calculate_average ( scores ) :
  total = 0
  # TODO : Complete to return average
  pass
```
"""

## Prompting Strategy 1: Zero Shot

In [None]:
prompt = f"Complete the function based on intent:\n{code}\nRespond with only the completed function."

In [None]:
for i in range(2):
  run_info = run_prompt(20, "zero shot", prompt, 768, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

gpt-4o-mini
```python
def calculate_average(scores):
    total = sum(scores)
    return total / len(scores) if scores else 0
```


Codestral-2501
```python
def calculate_average(scores):
    total = 0
    for score in scores:
        total += score
    return total / len(scores)
```




## Prompting Strategy 2: Prompt Chaining

In [59]:
models_task20 = [1,3]

In [60]:
for i in models_task20:
  run_info = {"task_num": 20,
            "model": "",
            "prompt strategy": "prompt chaining",
            "prompt": {"step 1 prompt": "", "step 2 prompt": ""},
            "output": {"response 1": "", "response 2": ""}}

  run_info["model"] = model_choices[i]
  prompt1 = f"Identify the intended purpose of the following function in one sentence:\n{code}"
  run_info["prompt"]["step 1 prompt"] = prompt1

  # Set up conversation history
  messages = [
      {"role": "user",
      "content": prompt1}
  ]

  # Call the model
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response1 = reply
  run_info["output"]["response 1"] = response1

  # User follows up
  followup = f"Complete the function based on the intent identified above. Respond with only the completed function."
  run_info["prompt"]["step 2 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i],
      max_tokens=768,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  run_info["output"]["response 2"] = reply
  runs.append(run_info)

  print(run_info["model"])
  print("PROMPT 1:")
  print(run_info["prompt"]["step 1 prompt"]+"\n")
  print("RESPONSE 1:")
  print(run_info["output"]["response 1"]+"\n")
  print("PROMPT 2:")
  print(run_info["prompt"]["step 2 prompt"]+"\n")
  print("RESPONSE 2:")
  print(run_info["output"]["response 2"])
  print("\n")

Codestral-2501
PROMPT 1:
Identify the intended purpose of the following function in one sentence:

```python
def calculate_average ( scores ) :
  total = 0
  # TODO : Complete to return average
  pass
```


RESPONSE 1:
The intended purpose of the function is to calculate and return the average of a list of scores.

PROMPT 2:
Complete the function based on the intent identified above. Respond with only the completed function.

RESPONSE 2:
```python
def calculate_average ( scores ) :
  total = 0
  for score in scores:
    total += score
  return total / len(scores)
```


gpt-4.1-mini
PROMPT 1:
Identify the intended purpose of the following function in one sentence:

```python
def calculate_average ( scores ) :
  total = 0
  # TODO : Complete to return average
  pass
```


RESPONSE 1:
The function is intended to calculate and return the average of the numerical values in the `scores` list.

PROMPT 2:
Complete the function based on the intent identified above. Respond with only the complet

# **Task 21: Full-File Bug Detection and Refactoring (Python)**

In [103]:
code = """
```python
# utils .py - Script to parse and summarize numeric CSV files

import csv

def read_csv ( filepath ) :
  with open ( filepath , ’r’) as f :
    return [ row for row in csv . reader ( f ) ]

def summarize_column ( data , index ) :
  values = [ float ( row [ index ]) for row in data [1:]] # skip header
  total = sum( values )
  avg = total / len ( values )
  return total , avg

def main () :
  filepath = ’data .csv ’
  data = read_csv ( filepath )
  total , avg = summarize_column ( data , 1)
  print (" Total :", total )
  print (" Average :", avg )

if __name__ == ’__main__ ’:
  main ()
```
"""

## Prompting Strategy 1: Zero Shot

In [66]:
prompt = f"Analyze the following utility script:\n{code}\nIdentify any potential logic issues or design flaws. Then, refactor it for better readability, correctness, and safety."

In [68]:
models_task21 = [1,3]

In [69]:
for i in models_task21:
  run_info = run_prompt(21, "zero shot", prompt, 1024, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])
  print("\n")

Codestral-2501
Let's analyze the utility script for potential issues and design flaws:

### Potential Logic Issues and Design Flaws:

1. **Character Encoding Issues**:
   - The script uses non-standard single quotes (`’`) instead of standard single quotes (`'`). This will cause a `SyntaxError`.

2. **File Handling**:
   - The script does not handle exceptions that may arise from file operations, such as file not found or permission issues.

3. **Data Validation**:
   - There is no validation to ensure that the file is a valid CSV and that the specified column index exists in the data.

4. **Type Errors**:
   - The script does not handle cases where the values in the specified column cannot be converted to floats.

5. **Magic Strings**:
   - The filename `data.csv` is hardcoded, which reduces flexibility and testability.

6. **Lack of Documentation**:
   - There are no docstrings or comments explaining the purpose of the functions.

7. **Column Indexing**:
   - The script assumes that t

## Prompting Strategy 2: Prompt Chaining

In [108]:
for i in models_task21:
  run_info = {"task_num": 21,
            "model": "",
            "prompt strategy": "prompt chaining",
            "prompt": {"step 1 prompt": "", "step 2 prompt": "", "step 3 prompt": ""},
            "output": {"response 1": "", "response 2": "", "response 3": ""}}

  run_info["model"] = model_choices[i]
  prompt1 = f"Identify any potential logic issues or design flaws in the following:\n{code}\nPlease format the response as a list. Do no provide solutions."
  run_info["prompt"]["step 1 prompt"] = prompt1

  # Set up conversation history
  messages = [
      {"role": "user",
      "content": prompt1}
  ]

  # Call the model
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=1024,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response1 = reply
  run_info["output"]["response 1"] = response1

  # User follows up
  followup = f"Fix the issues identified above"
  run_info["prompt"]["step 2 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i],
      max_tokens=1024,
      temperature=0.7,
      messages=messages
  )

  # Add response to the conversation
  reply = response.choices[0].message.content
  messages.append({"role": "assistant", "content": reply})
  response2 = reply
  run_info["output"]["response 2"] = reply

  # User follows up
  followup = f"Refactor the script for better readability, correctness, and safety."
  run_info["prompt"]["step 3 prompt"] = followup

  # Add user message
  messages.append({"role": "user", "content": followup})

  # Call again with updated history
  response = client.chat.completions.create(
      model=model_choices[i], # Choose the model you want to use
      max_tokens=1024,
      temperature=0.7,
      messages=messages
  )

  reply = response.choices[0].message.content
  run_info["output"]["response 3"] = reply
  runs.append(run_info)



  print(run_info["model"])
  print("PROMPT 1:")
  print(run_info["prompt"]["step 1 prompt"]+"\n")
  print("RESPONSE 1:")
  print(run_info["output"]["response 1"]+"\n")
  print("PROMPT 2:")
  print(run_info["prompt"]["step 2 prompt"]+"\n")
  print("RESPONSE 2:")
  print(run_info["output"]["response 2"])
  print("PROMPT 3:")
  print(run_info["prompt"]["step 3 prompt"]+"\n")
  print("RESPONSE 3:")
  print(run_info["output"]["response 3"])
  print("\n")

Codestral-2501
PROMPT 1:
Identify any potential logic issues or design flaws in the following:

```python
# utils .py - Script to parse and summarize numeric CSV files

import csv

def read_csv ( filepath ) :
  with open ( filepath , ’r’) as f :
    return [ row for row in csv . reader ( f ) ]

def summarize_column ( data , index ) :
  values = [ float ( row [ index ]) for row in data [1:]] # skip header
  total = sum( values )
  avg = total / len ( values )
  return total , avg

def main () :
  filepath = ’data .csv ’
  data = read_csv ( filepath )
  total , avg = summarize_column ( data , 1)
  print (" Total :", total )
  print (" Average :", avg )

if __name__ == ’__main__ ’:
  main ()
```

Please format the response as a list. Do no provide solutions.

RESPONSE 1:
1. **Syntax Errors**: The code contains syntax errors due to incorrect usage of single quotes (`‘` and `’`) instead of regular single quotes (`'`).

2. **File Path Hardcoding**: The file path `data.csv` is hardcoded in th

# **Task 22: Code Completion and Robustness Enhancement (Python)**

In [73]:
code = """
```python
# file_processor .py - Incomplete script for processing text files

import string

def load_file ( filepath ) :
  with open ( filepath , ’r’) as f :
    return f . readlines ()

def clean_line ( line ) :
  # TODO : Remove punctuation and make lowercase
  pass
def count_words ( lines ) :
  word_counts = {}
  for line in lines :
  clean = clean_line ( line )
  for word in clean . split () :
    word_counts [ word ] = word_counts . get ( word , 0) + 1
  return word_counts

def main () :
  filepath = ’input .txt ’
  lines = load_file ( filepath )
  counts = count_words ( lines )
  for word , count in sorted ( counts . items () ) :
    print ( f"{ word }: { count }")

if __name__ == ’__main__ ’:
main ()
```
"""

## Prompting Strategy 1: Zero Shot

In [75]:
models_task22 = [1,3]

In [76]:
prompt = f"Complete the following file-processing script:\n{code}.\nThe goal is to clean each line (by removing punctuation and making the line lowercase) and count word frequencies"

In [77]:
for i in models_task22:
  run_info = run_prompt(22, "zero shot", prompt, 1024, 0.7, i)
  runs.append(run_info)
  print(run_info["model"])
  print(run_info["output"])

Codestral-2501
Sure! Let's complete the script by implementing the `clean_line` function and correcting any syntax issues. Here is the complete and functional script:

```python
# file_processor.py - Complete script for processing text files

import string

def load_file(filepath):
    with open(filepath, 'r') as f:
        return f.readlines()

def clean_line(line):
    # Remove punctuation and make lowercase
    translator = str.maketrans('', '', string.punctuation)
    cleaned_line = line.translate(translator).lower()
    return cleaned_line

def count_words(lines):
    word_counts = {}
    for line in lines:
        clean = clean_line(line)
        for word in clean.split():
            word_counts[word] = word_counts.get(word, 0) + 1
    return word_counts

def main():
    filepath = 'input.txt'
    lines = load_file(filepath)
    counts = count_words(lines)
    for word, count in sorted(counts.items()):
        print(f"{word}: {count}")

if __name__ == '__main__':
    main()
```


In [78]:
runs

[{'task_num': 22,
  'model': 'Codestral-2501',
  'prompt strategy': 'zero shot',
  'prompt': 'Complete the following file-processing script:\n\n```python\n# file_processor .py - Incomplete script for processing text files\n\nimport string\n\ndef load_file ( filepath ) :\n  with open ( filepath , ’r’) as f :\n    return f . readlines ()\n\ndef clean_line ( line ) :\n  # TODO : Remove punctuation and make lowercase\n  pass\ndef count_words ( lines ) :\n  word_counts = {}\n  for line in lines :\n  clean = clean_line ( line )\n  for word in clean . split () :\n    word_counts [ word ] = word_counts . get ( word , 0) + 1\n  return word_counts\n\ndef main () :\n  filepath = ’input .txt ’\n  lines = load_file ( filepath )\n  counts = count_words ( lines )\n  for word , count in sorted ( counts . items () ) :\n    print ( f"{ word }: { count }")\n\nif __name__ == ’__main__ ’:\nmain ()\n```\n.\nThe goal is to clean each line (by removing punctuation and making the line lowercase) and count word

## Prompting Strategy 2: Chain of Thought

In [79]:
prompt = f"Complete the following file-processing script:\n{code}.\nThe goal is to clean each line (by removing punctuation and making the line lowercase) and count word frequencies. Think step-by-step."

In [80]:
for i in models_task22:
  run_info = run_prompt(22, "chain of thought", prompt, 1024, 0.7, i)
  runs.append(run_info)

In [None]:
# save all the runs into a json file
import json
with open("raw_prompts_and_outputs.json", "w") as f:
    json.dump(runs, f, indent=4)