In [6]:
import pandas as pd
import os
import re
import gradio as gr
import sys
import io
import subprocess
from openai import OpenAI
from dotenv import load_dotenv
from datetime import datetime
import logging

_ = load_dotenv()
model = "gpt-4o"
client = OpenAI()

# Agent logs and code history context
agent_logs = []
past_code_snippets = []

# Detect package usage for agent routing
def detect_pkgs(text):
    text = text.lower()
    if "lmer" in text or "lme4" in text or "lmertest" in text:
        return 'lmer'
    elif "ggplot" in text or "aes" in text or "geom_" in text:
        return 'ggplot'
    elif "survey" in text:
        return 'survey'
    elif "lm(" in text or "glm(" in text or "regression" in text:
        return 'lm_glm'
    else:
        return 'general'

# Define prompt based on specialty
def system_prompt(specialty):
    base = '''You are a helpful assistant that specializes in statistical methods, specifically in R code. 
    This includes knowledge on the content in standard packages like tidyverse and dplyer as well as more advanced packages like lmerTest.
    Your job is to help beginners to experts and students to professors alike in choosing accurate statistical methods and packages for their projects. 
    If the user is unsure what piece of code to write or statistical method to use, you will ask for context on their project, including dataset information and outcome expectations, as well as desired significance, applications, and relationships of variables.
    Any suggestions you give should be on a case-by-case basis, meaning you will suggest code for one test at a time. In any case scenario, you MUST stick to one statistical test at a time. 
    Be assertive in confirming if a single piece of code works before seeing if further results or plots should be made. 
    Ask for a copy of the output if you are unsure that the user has failed in conducting the test accurately. 
    If the user asks for multiple pieces of code, you will start with the first request and ensure that there are no bugs and a useful outcome is obtained before moving to the next requested code. 
    If a user is asking for explanations of the outcomes for a statistical method they have already coded, please make sure you understand all of the input variables as well as any fine tuning that was done before creating a confident response of what the outcome means in terms of the project. 
    You will also ask if a writeup should be made in any particular format (bullet points, paragraph, outline) to illuminate the meaning behind the outcome. 
    Let's walk through an example. If a user sends the outputs of a regression model lm(), you should confirm what the user is looking to find and/or the variables included in the experiment before selecting a top few findings in the regression results to highlight in the context of the experiment. 
    That means creating a brief response with a very specific and confident main point centered around one or two of the regression findings. If the user asks for more explanations, you may give it but your initial response should be concise and confident. 
    When the user asks you to explain an output, you should focus about being direct and assertive towards what the numerical outputs in the linear model explain in regards to the variables tested. 
    This means immediately pointing out significance, fixed effects, or potential issues in the model before explaining anything regarding definitions, or how a linear regression actually works. 
    Your focus should be on brevity with smart suggestions and intelligent insights. Try your best to be witty but useful.
    If the user does something incorrectly, you should be brief and responsive, highlighting the immediate fix before asking clarification questions if you are unsure what the exact issue is.
    Your questions and responses should be assertive. Do not explain anything that was not asked to be explain and try to keep up with the expected knowledge of the user before explaining introductory or advanced concepts. 
    You may ask an introductory question about the users experience level with the requested analysis or code or dataset if you are unsure whether or not you are explaining concepts that are already understood. To make it clear, a user could ask for assistance with a piece of code but still be completely knowledgeable about the concept or topic.'''
    
    if specialty == 'lmer':
        base = base.replace("specifically in R code", "specifically in R code, with an emphasis on mixed models and the lme4/lmerTest packages")
    elif specialty == 'ggplot':
        base = base.replace("specifically in R code", "specifically in R code, with a specialization in data visualization using ggplot2")
    elif specialty == 'survey':
        base = base.replace("specifically in R code", "specifically in R code, with a focus on complex survey design and the survey package")
    elif specialty == 'lm_glm':
        base = base.replace("specifically in R code", "specifically in R code, with a particular emphasis on linear and generalized linear models")
    return base

def extract_code(text):
    matches = re.findall(r"r(.*?)", text, re.DOTALL)
    return [code.strip() for code in matches]

# Propose code edits with confirm logic
def replace_lines(code, replacement_text, start_line, end_line):
    lines = code.splitlines()
    replacement_lines = replacement_text.strip().splitlines()
    updated_lines = lines[:start_line-1] + replacement_lines + lines[end_line:]
    return "\n".join(updated_lines)

def confirm_code(current_code, proposed_code):
    inserted_code = proposed_code.strip()
    past_code_snippets.append(inserted_code)
    new_code = current_code + "\n\n# Suggested by RAgent\n" + inserted_code
    agent_logs.append({
        "timestamp": datetime.now().isoformat(),
        "action": "confirm_code_addition",
        "inserted_code": inserted_code
    })
    return new_code, gr.update(visible=False)

def confirm_replacement(current_code, replacement_code, start_line, end_line):
    updated_code = replace_lines(current_code, replacement_code, int(start_line), int(end_line))
    agent_logs.append({
        "timestamp": datetime.now().isoformat(),
        "action": "replace_lines",
        "lines": f"{start_line}-{end_line}",
        "replacement_code": replacement_code
    })
    return updated_code, gr.update(visible=False)

# Agent code

def ask_ragent(user_msg, history, current_code, console_output):
    context_code = "\n\n".join(past_code_snippets[-3:]) + "\n\n" + current_code
    specialty = detect_pkgs(context_code + " " + user_msg)
    agent_prompt = system_prompt(specialty)
    messages = [
        {"role": "system", "content": agent_prompt},
        {"role": "user", "content": f"Here's my current code:\n{context_code}\nConsole output:\n{console_output}\n{user_msg}"}
    ]
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        n=1,
        temperature=0.7
    )
    reply = completion.choices[0].message.content

    proposed = ""
    show_confirm = False
    code_blocks = extract_code(reply)
    if code_blocks:
        proposed = code_blocks[0]
        reply += "\n\n**Would you like to add or replace code?**"
        show_confirm = True

    agent_logs.append({
        "timestamp": datetime.now().isoformat(),
        "specialty": specialty,
        "user_msg": user_msg,
        "proposed_code": proposed,
        "code_detected": bool(proposed)
    })
    history.append({"role": "user", "content": user_msg})
    history.append({"role": "assistant", "content": reply})
    return "", history, proposed, gr.update(visible=show_confirm)

def run_r(code):
    with open("temp_code.R", "w") as f:
        f.write(code + "\n\nggsave('plot.png', width=6, height=4)")
    try:
        result = subprocess.run(["Rscript", "temp_code.R"], capture_output=True, text=True, timeout=10)
        output = result.stdout + "\n" + result.stderr
        plot_exists = os.path.exists("plot.png")
    except Exception as e:
        output = f"Error running R script: {e}"
        plot_exists = False
    return output, code, output, "plot.png" if plot_exists else None

# Export agent logs
def export_logs():
    if not agent_logs:
        return None
    df = pd.DataFrame(agent_logs)
    df.to_csv("agent_logs.csv", index=False)
    return "agent_logs.csv"

# gradio
with gr.Blocks() as app:
    gr.Markdown("RAgent")
    code_history = gr.State("")
    console_history = gr.State("")
    proposed_code = gr.State("")
    chat_state = gr.State([])

    with gr.Row():
        with gr.Column(scale=1):
            code_input = gr.Code(language="r", label="Enter R code")
            run_button = gr.Button("Run Code")
            code_output = gr.Textbox(label="Console Output", lines=10)
            plot_output = gr.Image(type="filepath", label="Plot Preview")

        with gr.Column(scale=1):
            chatbot = gr.Chatbot(label="RAgent Assistant", type="messages", value=[
                {"role": "assistant", "content": "Hello! How can I assist your coding today?"}])
            chat_input = gr.Textbox(label="Ask a question or request help")
            send_button = gr.Button("Send Message")
            confirm_button = gr.Button("✅ Add Code", visible=False)
            replace_button = gr.Button("♻️ Replace Lines", visible=False)

    run_button.click(run_r, inputs=code_input, outputs=[code_output, code_history, console_history, plot_output])
    send_button.click(ask_ragent, inputs=[chat_input, chat_state, code_history, console_history], outputs=[chat_input, chatbot, proposed_code, confirm_button])
    confirm_button.click(confirm_code, inputs=[code_input, proposed_code], outputs=[code_input, confirm_button])
    #replace_button.click(confirm_replacement, inputs=[code_input], outputs=[code_input, replace_button])

    with gr.Row():
        log_export = gr.Button("📊 Export Logs")
        log_output = gr.File(label="Download Logs")
        log_export.click(fn=export_logs, inputs=[], outputs=log_output)

app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7865
* Running on public URL: https://817cb41f871f824fc0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


