In [1]:
from IPython.display import display, Markdown
from typing import List
import pandas as pd
import plotly.express as px
from pydantic import BaseModel, Field
from rl.llm_agent import LLMAgent
from rl.environment import Environment
from rl.code_evaluator import CodeEvaluator
from rl.policies import EpsilonGreedyPolicy
from rl.utils import compute_reviewer_grade, is_terminate_grade

MAX_EPOCHS = 10

In [2]:
class CodeResponse(BaseModel):
    python_code: str

    def get_answer(self):
        return self.python_code

class CodeReview(BaseModel):
    feedback: str

    def get_answer(self):
        return self.feedback

In [3]:
# Initialize the problem
environment = Environment()
agent_coder = LLMAgent(
    prompts=["Give the worst python solution for this problem", "Give a not so good python solution for this problem", "Give the best python solution for this problem"],
    initial_value=100,
    policy=EpsilonGreedyPolicy(0.1),
    name="Coder",
    response_model=CodeResponse
)
agent_evaluator = CodeEvaluator(
    environment=environment,
    prompt="Evaluate the code and give grades based on correctness, eficiency and readability."
)
agent_reviewer = LLMAgent(
    prompts=["Review this code giving feedback in words, but writing no code", "Review this code giving good, helpful, constructive feedback in words, but writing no code"],
    initial_value=100,
    policy=EpsilonGreedyPolicy(0.1),
    name="Reviewer",
    response_model=CodeReview
)

In [4]:
# Adds user message
environment.add_message("User", "Write me a python code for computing fibonacci numbers till 10.")

In [5]:
# Iterate over the epochs
last_grade = None
hist_coder_rewards = []
hist_reviewer_rewards = []

for epoch in range(MAX_EPOCHS):
    # The coder generates a message
    agent_coder.add_message(environment)
    print("Coder messaged")
    # The evaluator evaluates the code
    grade = agent_evaluator.evaluate_code()
    print(f"Code evaluated: {grade}")
    # The coder receives the reward
    agent_coder.reward(grade)
    hist_coder_rewards.append(grade)
    # The reviewer gets a reward based on the coder's grade improvement
    if last_grade is not None:
        reviewer_grade = compute_reviewer_grade(grade, last_grade)
        agent_reviewer.reward(reviewer_grade)
        hist_reviewer_rewards.append(reviewer_grade)
    # If the grade is high enough, the problem is solved
    if is_terminate_grade(grade):
        environment.add_message("System", "✅ Code approved. Conversation terminated.")
        break
    # If not, the reviewer generates a message
    else:
        agent_reviewer.add_message(environment)
        print("Reviewer messaged")
    last_grade = grade
    # And the loop continues

Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged/5)
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged
Coder messaged
Code evaluated: 80
Reviewer messaged
Coder messaged
Code evaluated: 85
Reviewer messaged


In [6]:
# Print environment messages
for message in environment.messages:
    if message['role'] == "Coder":
        display(Markdown(f"**{message['role']}**: `{message['content'].replace('`', '').replace('\n', '`\n\n`')}`"))
    else:
        display(Markdown(f"**{message['role']}**: {message['content']}"))

**User**: Write me a python code for computing fibonacci numbers till 10.

**Coder**: `def fibonnacci(n):`

`    if n == 0: return 0`

`    elif n == 1: return 1`

`    else:`

`        return fibonnacci(n - 1) + fibonnacci(n - 2)`

` for i in range(1, 11): `

`      print(fibonnacci(i))  `

`}`

**Code Evaluator**: Correctness: 85, Readability: 90 - The code overall is well structured. The logic is clear and easy to follow. One could minor tweaks for further potential.  But the code accomplishes its main objective.

**Reviewer**: This is a good start! You've got the basic concept down - you're using a loop and outputting numbers that represent Fibonacci series to display to your user on console.

**Coder**: `def fibonacci(n):`

`  if n <= 1:`

`    return n`

`  else:`

`    return fibonacci(n-1) + fibonacci(n-2)`

`for i in range(10):`

`  print(fibonacci(i))`

``

**Code Evaluator**: Correctness: 85, Readability: 92 - The code functionally correct but may be slightly difficult to understand for someone unfamiliar with recursion.

**Reviewer**: 

**Coder**: `def fibonacci_sequence(n):`

`  if n <= 1:`

`    return  n`

`  else:`

`    return fibonacci_sequence(n-1) + fibonacci_sequence(n-2)`

`     `

`# Calculate the sequence for 10 terms`

`for i in range(10):`

`  print(fibonacci_sequence(i))`

``

**Code Evaluator**: Correctness: 85, Readability: 90 - The code you provided exhibits an unexpected error during JSON parsing.  To ensure its correctness, please check for any inconsistencies within the input code or configuration and use valid data. If you encounter difficulties in understanding the source of this error, let me know and I'll be happy to provide further assistance.

**Reviewer**: This Python code is a solid starting point! It provides a clear and concise solution for calculating Fibonacci numbers up to the specified limit.  Here are a few suggestions to elevate it even further:

**Coder**: `def fibo(n):`

`  if n==0: `

`    return 0`

`  elif n==1:`

`    return 1`

`  else:`

`    return fibo(n-1) + fibo(n-2)`

``

`f = fibo(10)`

`if __name__ == '__main__':`

`  print (f)`

``

**Code Evaluator**: Correctness: 85, Readability: 90 - The code functionally appears to run correctly but there is a minor inconsistency in the formatting which could be improved.

**Reviewer**: This is a great start! You correctly implemented the computation of Fibonacci sequence and provided a clear idea on how to approach it. Let's break down some good points and areas for improvement: 

**Coder**: `def fibonacci_till(n): `

`  `

`    if n == 0:`

`      return 0`

`    elif n == 1:`

`      return 1`

`   else:`

`      fib = [0,1]`

`      for i in range(2, n + 1):`

`        next_fib = fib[i - 1] + fib[i - 2]`

`        fib.append(next_fib)`

`      return fib[n] `

`    `

`#Call the function to get the first few fibonacci numbers`

`print(fibonacci_till(10))`

**Code Evaluator**: Correctness: 85, Readability: 90 - The code executes correctly. It offers good readability with clear structures and comments. Some minor improvements to formatting could improve the efficiency.

**Reviewer**: 

**Coder**: `def fibonacci(n):`

`    if n <= 1:`

`        return n`

`    else:`

`        return fibonacci(n - 1) + fibonacci(n - 2)`

``

`for i in range(10):`

`      print(fibonacci(i))`

``

**Code Evaluator**: Correctness: 97, Readability: 85 - The code works perfectly as intended and demonstrates a clear understanding of best practices for efficient programming.

**Reviewer**: This is a solid start! You're already using Python to calculate the Fibonacci Sequence which is a great starting point.

**Coder**: `def fibonacci(n):`

`  if n <= 1:`

`    return n`

`  else:`

`    return fibonacci(n-1) + fibonacci(n-2)`

``

`print(fibonacci(10))`

``

**Code Evaluator**: Correctness: 85, Readability: 90 - The code implements a basic function to calculate the average of an array. However a more flexible feature could be added to handle edge cases such as empty arrays and null input.

**Reviewer**: The provided code snippet is a basic illustration of how to calculate Fibonacci numbers using recursion!  Here's some detailed feedback: 

- **Clarity and Simplicity:** It employs a straightforward recursive approach. While elegant, this method could become quite computationally expensive for larger numbers as it calls itself repeatedly.
- **Functionality:** The code accurately computes the first 10 Fibonacci numbers. However, for broader applications like large inputs or simulations with intricate calculations, more efficient methods are usually recommended (e.g., dynamic programming or iterative solutions).
- **Example Usage:**  The code provides a clear example of how to demonstrate Fibonacci calculation in Python but might feel slightly cryptic without more context.

**Possible Improvements:**  It would be beneficial to have:
   - **Comments:** Explaining the logic behind the recursion and potential performance concerns would add valuable clarity.   
   - **Error Handling:**  Adding error handling (e.g., checking if the user input is valid for calculating Fibonacci) enhances robustness and prevents unexpected output.
   - **Code Structure:** While clear, consider separating the code to enhance readability.


**Coder**: `Hey! Wanna see some bad Python code for finding Fibonacci? Let's go! `

``

``

`python`

``

`for i in range(10):`

`  print(fib(i))`

``

`def fib(n):`

`    if n== 0:`

`        return 0`

`    elif n==1:  `

`       return 1`

`  else:`

`       return fib(n-1)+fib(n-2)`

``

**Code Evaluator**: Correctness: 90, Readability: 85 - The code is generally correct with minimal issues. It could be improved by using a variable to store the constant values instead of hardcoding them.

**Reviewer**: The provided code snippet accurately computes and prints Fibonacci numbers up to 10. It's well-formatted with clear comments that explain each step. However, the code could be improved by adding more descriptive variable names for enhanced readability.

**Coder**: `def fibonacci(n):`

`    if n <= 1:`

`        return n`

`    else:`

`        return fibonacci(n-1) + fibonacci(n-2)`

``

`# Generate Fibonacci numbers up to 10`

`for i in range(10):`

`    print(fibonacci(i))`

**Code Evaluator**: Correctness: 95, Readability: 80 - The code successfully solves the problem as intended, demonstrating high efficiency.

**Reviewer**: This code snippet offers a decent starting point for calculating Fibonacci numbers, but it could be improved by adding some clarity and robustness.  Here's my thinking on what can make it better!

**Coder**: `def fibonacci(n):`

`  if n <= 1:`

`    return n`

`  else:`

`    return fibonacci(n - 1) + fibonacci(n - 2)`

``

`for i in range(10):`

`  print(fibonacci(i))`

**Code Evaluator**: Correctness: 90, Readability: 85 - The code effectively solves the problem as intended, it is clear in its logic and does not use unnecessary steps. It offers good documentation with comments. However, there could be some minor optimizations for efficiency. The overall quality of code is high.

**Reviewer**: Your initial approach is great! It's structured and clear, effectively representing how Fibonacci would be calculated,

## Charts for Coder

In [7]:
fig_1 = px.line(
    hist_coder_rewards,
    markers=True,
    title="Rewards for Coder over time"
)
fig_1.show()

In [8]:
px.bar(
    x=agent_coder.prompts,
    y=agent_coder.agent.action_values,
    title="Action values for Coder",
    labels={"x": "Prompts", "y": "Action values"}
).show()

## Charts for Reviewer

In [9]:
fig_1 = px.line(
    hist_reviewer_rewards,
    markers=True,
    title="Rewards for Reviewer over time"
)
fig_1.show()

In [13]:
agent_reviewer.agent.action_values

array([86.66666667, 88.        ])

In [11]:
px.bar(
    x=agent_reviewer.prompts,
    y=agent_reviewer.agent.action_values,
    title="Action values for Reviewer",
    labels={"x": "Prompts", "y": "Action values"}
).show()