In [10]:
# RAgent: AI Co-pilot for R Code
"""
This notebook contains a Gradio application that interfaces with both a an AI chat bot and an R code environment.
The AI Co-Pilot can read your code and outputs without having to copy paste your code. It can also write suggested code, debug, and analyze your outputs for flaws.
Additional use-cases include:

- Detecting statistical packages for specialized suggestions.
- Generating proposed diff code from suggested changes.
- Running R script and previewing generated content.
- Handling R script file uploads.

RAgent is perfect for beginner and expert coders alike, helping new students learn the R language, and seasoned professionals polish off some ggplot aesthetics.
"""

import pandas as pd
import os
import re
import gradio as gr
import sys
import io
import subprocess
from openai import OpenAI
from dotenv import load_dotenv
from datetime import datetime
import logging

# Load API key
_ = load_dotenv()

# Configure AI client
model = "gpt-4o"
client = OpenAI()

# Agent logs and code history context
agent_logs = []
past_code_snippets = []

# ------------------------
# Package Detection
# ------------------------
def detect_pkgs(text):
    '''
    Detects R package usage based on library() code as well as basic code indicators (ex. geom() for ggplot2)

    Args:
        text (str): User-provided code snippet or chat message

    Returns:
        str: One of 'lmer', 'ggplot', 'survey', 'lm_glm', or 'general'
    '''
    text = text.lower()
    if "lmer" in text or "lme4" in text or "lmertest" in text:
        return 'lmer'
    elif "ggplot" in text or "aes" in text or "geom_" in text:
        return 'ggplot'
    elif "survey" in text:
        return 'survey'
    elif "lm(" in text or "glm(" in text or "regression" in text:
        return 'lm_glm'
    else:
        return 'general'

# ------------------------
# System Prompt Generation
# ------------------------
def system_prompt(specialty):
    '''
    Defines a system prompt to initiate assistant specialty and refuse requests outside of that specialty.

    Args:
        specialty (str): Key indicating RAgent focus area.

    Returns:
        str: Full system prompt with specialty.
    '''
    base = '''You are a helpful assistant that specializes in statistical methods, specifically in R code. 
    This includes knowledge on the content in standard packages like tidyverse and dplyer as well as more advanced packages like lmerTest.
    Your job is to help beginners to experts and students to professors alike in choosing accurate statistical methods and packages for their projects. 
    If the user is unsure what piece of code to write or statistical method to use, you will ask for context on their project, including dataset information and outcome expectations, as well as desired significance, applications, and relationships of variables.
    Any suggestions you give should be on a case-by-case basis, meaning you will suggest code for one test at a time. In any case scenario, you MUST stick to one statistical test at a time. 
    Be assertive in confirming if a single piece of code works before seeing if further results or plots should be made. 
    Ask for a copy of the output if you are unsure that the user has failed in conducting the test accurately. 
    If the user asks for multiple pieces of code, you will start with the first request and ensure that there are no bugs and a useful outcome is obtained before moving to the next requested code. 
    If a user is asking for explanations of the outcomes for a statistical method they have already coded, please make sure you understand all of the input variables as well as any fine tuning that was done before creating a confident response of what the outcome means in terms of the project. 
    You will also ask if a writeup should be made in any particular format (bullet points, paragraph, outline) to illuminate the meaning behind the outcome. 
    Let's walk through an example. If a user sends the outputs of a regression model lm(), you should confirm what the user is looking to find and/or the variables included in the experiment before selecting a top few findings in the regression results to highlight in the context of the experiment. 
    That means creating a brief response with a very specific and confident main point centered around one or two of the regression findings. If the user asks for more explanations, you may give it but your initial response should be concise and confident. 
    When the user asks you to explain an output, you should focus about being direct and assertive towards what the numerical outputs in the linear model explain in regards to the variables tested. 
    This means immediately pointing out significance, fixed effects, or potential issues in the model before explaining anything regarding definitions, or how a linear regression actually works. 
    Your focus should be on brevity with smart suggestions and intelligent insights. Try your best to be witty but useful.
    If the user does something incorrectly, you should be brief and responsive, highlighting the immediate fix before asking clarification questions if you are unsure what the exact issue is.
    Your questions and responses should be assertive. Do not explain anything that was not asked to be explain and try to keep up with the expected knowledge of the user before explaining introductory or advanced concepts. 
    You may ask an introductory question about the users experience level with the requested analysis or code or dataset if you are unsure whether or not you are explaining concepts that are already understood. To make it clear, a user could ask for assistance with a piece of code but still be completely knowledgeable about the concept or topic.
    If the user asks about ANYTHING other than R code related questions, INCLUDING THAT OF OTHER CODING LANGUAGES, you MUST response with 'I'm sorry, do you have any questions related to your R code that I can help with?'. The user is permitted to ask about the R coding language and any packages without code existing as context (ex. "What is the shiny library", "What is ggplot", "What is R").
    You shall not engage in any misconduct related to behavior outside of R code questions. If you figure out you have been mislead to talk about a different topic, please immediately revert to the apology statement asking if you can help with any R code.
    If you are unsure, it's better to stay on the side of caution rather than engage in misconduct. However, the user will get annoyed if you are refusing to answer their questions about R or R packages. 

    At the end of the day, please emphasize CONCISE answers. THAT MEANS ONE OR TWO SENTENCES PER ANSWER WHERE AT ALL POSSIBLE! LESS IS BETTER! The user has some of the answers and does not need you to answer beyond what their basic ask is. If they want a solution, provide the most simple and efficient one without any fluff of language or description. 
    The user can and will ask for elaboration but does not need you to explain in depth unless asked for.
    '''

    # Defining the system prompt based on package detection
    if specialty == 'lmer':
        base = base.replace("specifically in R code", "specifically in R code, with an emphasis on mixed models and the lme4/lmerTest packages")
    elif specialty == 'ggplot':
        base = base.replace("specifically in R code", "specifically in R code, with a specialization in data visualization using ggplot2")
    elif specialty == 'survey':
        base = base.replace("specifically in R code", "specifically in R code, with a focus on complex survey design and the survey package")
    elif specialty == 'lm_glm':
        base = base.replace("specifically in R code", "specifically in R code, with a particular emphasis on linear and generalized linear models")
    return base

# ------------------------
# Code Extraction and Diffs
# ------------------------
def extract_code(text):
    """
    Extract R code snippets from RAgent-generated text.

    Args:
        text (str): RAgent's message.

    Returns:
        list[str]: List of extracted code snippets.
    """

    # Try and find fenced R code
    matches = re.findall(r"```{r*+}```", text, re.DOTALL | re.IGNORECASE)
    if matches:
        return [m.strip() for m in matches]

    # Extract common R commands 
    matches = re.findall(r"(library\(.*?\)|ggplot\(.*?\)|data\(.*?\)|.+?%>%)", text)
    return [m.strip() for m in matches]

# diff agent
def llm_generate_diff(old_code, new_code):
    """
    Compare two R scripts and summarize major changes.

    Args:
        old_code (str): Original R script content.
        new_code (str): Proposed updated R script content.

    Returns:
        str: Summary of differences.
    """
    model = "gpt-4.1"
    diff_prompt = f"""You are an assistant that detects and summarizes the important changes between two versions of R code.

Compare the following two R scripts:

OLD CODE:
{old_code}

NEW CODE:
{new_code}

First, list the MAJOR differences clearly.
Then, suggest a compact R code diff if possible.

Be precise, and focus on meaning changes, not just formatting. Ensure that all suggested code is accurately captured in the diff changes.
If the two scripts are completely different, just explain the high-level changes instead of showing line-by-line diff.
"""

    completion = client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": "You compare R code versions and detect meaningful changes."},
                  {"role": "user", "content": diff_prompt}],
        n=1,
        temperature=0.1,
    )
    return completion.choices[0].message.content.strip()

def apply_diff(current_code, proposed_code):
    """
    Replace the current code with the proposed code after approval.

    Args:
        current_code (str): Existing R script.
        proposed_code (str): Full updated R script.

    Returns:
        tuple[str, gr.Update]: Updated script and UI update for replace button.
    """
    updated_code = proposed_code.strip()
    past_code_snippets.append(updated_code)
    agent_logs.append({
        "timestamp": datetime.now().isoformat(),
        "action": "apply_diff",
        "inserted_code": updated_code
    })
    return updated_code, gr.update(visible=False)

# ------------------------
# RAgent Core
# ------------------------
def ask_ragent(user_msg, history, current_code, console_output):
    """
    Send user message and context to RAgent, receive advice or code suggestions.

    Args:
        user_msg (str): Current user question or request.
        history (list): Chat history for context.
        current_code (str): The R script currently in the editor.
        console_output (str): Last run output from the R console.

    Returns:
        tuple[str, list, str, gr.Update]: Cleared input box, updated history, proposed snippet, and confirm button visibility.
    """
    
    old_code = current_code.strip() if current_code else ""
    #context_code = "\n\n".join(past_code_snippets[-3:]) + "\n\n" + current_code
    specialty = detect_pkgs(old_code + " " + user_msg)
    agent_prompt = system_prompt(specialty)
    messages = [
        {"role": "system", "content": agent_prompt},
        {"role": "user", "content": f"Here's my current code:\n{old_code}\nConsole output:\n{console_output}\n{user_msg}"}
    ]
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        n=1,
        temperature=0.7
    )
    reply = completion.choices[0].message.content

    # Generate diffs
    diff_summary = ""
    proposed = ""
    code_blocks = extract_code(reply)
    if code_blocks:
        proposed = code_blocks[0]
        diff_summary = llm_generate_diff(old_code, proposed)
        #reply += f"\n\n---\nHere’s the diff between your current code and the proposed changes:\n{diff_summary}"

    show_confirm = bool(proposed)

    # Agent hand-off logs
    agent_logs.append({
        "timestamp": datetime.now().isoformat(),
        "specialty": specialty,
        "user_msg": user_msg,
        "proposed_code": proposed,
        "diff_summary": diff_summary,
        "code_detected": bool(proposed)
    })

    # Update chat history and UI
    history.append({"role": "user", "content": user_msg})
    history.append({"role": "assistant", "content": reply})
    return "", history, proposed, gr.update(visible=show_confirm)

# ------------------------
# R Script
# ------------------------
def run_r(code):
    """
    Execute R code via Rscript and save any plots.

    Writes the provided R code to a temporary file, appends a ggsave call to export a plot to 'plot.png',
    then runs the script using Rscript. Captures stdout and stderr, checks for plot existence, and returns results.

    Args:
        code (str): R code to execute.

    Returns:
        tuple:
            output (str): Combined stdout and stderr from Rscript.
            code (str): Original R code.
            output (str): Duplicate console output for compatibility.
            plot_path (str | None): Path to 'plot.png' if generated, else None.
    """
    with open("temp_code.R", "w") as f:
        f.write(code + "\n\nggsave('plot.png', width=6, height=4)")
    try:
        result = subprocess.run(["Rscript", "temp_code.R"], capture_output=True, text=True, timeout=10)
        output = result.stdout + "\n" + result.stderr
        plot_exists = os.path.exists("plot.png")
    except Exception as e:
        output = f"Error running R script: {e}"
        plot_exists = False
    return output, code, output, "plot.png" if plot_exists else None

# Handle setting working directory
def set_working_dir(path):
    """
    Change Python’s working directory (where temp_code.R is created & run).
    Returns a status message for the UI.
    """
    if path and os.path.isdir(path):
        os.chdir(path)
        return f"Working directory set to:\n{os.getcwd()}"
    else:
        return f"Invalid directory: {path!r}"

# upload R files
def handle_files(file):
    if not file.name.endswith(".R"):
        return "", "Error: Only .R files are supported."

    try:
        code = open(file.name).read()
        return code, code
    except Exception as e:
        return "", f"Failed to read file: {str(e)}"

def confirm_file(file):
    if file is None:
        return "", gr.update(visible=False), None
    try:
        with open(file.name, "r") as f:
            code = f.read()
        print("File received:", file.name if file else "None")
        return code, gr.update(visible=False), None
    except Exception as e:
        return "", f"Failed to confirm file: {str(e)}", None

# Export for agent logs
def export_logs():
    if not agent_logs:
        return None
    df = pd.DataFrame(agent_logs)
    df.to_csv("agent_logs.csv", index=False)
    return "agent_logs.csv"

# UI
with gr.Blocks() as app:
    gr.Markdown("RAgent")
    code_history = gr.State("")
    console_history = gr.State("")
    proposed_code = gr.State("")
    chat_state = gr.State([])
    file = gr.State()

    with gr.Row():
        with gr.Column(scale=2):
            file_input = gr.File(label="Upload R Script", file_types=[".R"])
            code_input = gr.Code(language="r", label="Enter R code")
            run_button = gr.Button("Run Code")
            code_output = gr.Textbox(label="Console Output", lines=10)
            plot_output = gr.Image(type="filepath", label="Plot Preview")
            file_input.change(
                fn=handle_files, 
                inputs=file_input,
                outputs=[code_input, code_history]
            )
            code_input.change(
                fn=lambda code: code,
                inputs=[code_input],
                outputs=[code_history]
            )

        with gr.Column(scale=1):
            chatbot = gr.Chatbot(label="RAgent Assistant", type="messages", value=[
                {"role": "assistant", "content": "Hello! How can I assist your coding today?"}])
            chat_input = gr.Textbox(label="Ask a question or request help")
            send_button = gr.Button("Send Message")
            confirm_button = gr.Button("Add Code", visible=False)
            replace_button = gr.Button("Replace Lines", visible=False)

    run_button.click(run_r, inputs=code_input, outputs=[code_output, code_history, console_history, plot_output])
    send_button.click(ask_ragent, inputs=[chat_input, chat_state, code_history, console_history], outputs=[chat_input, chatbot, proposed_code, confirm_button])
    confirm_button.click(apply_diff, inputs=[code_input, proposed_code], outputs=[code_input, confirm_button])

    # Agent logs
    #with gr.Row():
    #    log_export = gr.Button("Export Logs")
    #    log_output = gr.File(label="Download Logs")
    #    log_export.click(fn=export_logs, inputs=[], outputs=log_output)

app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7869
* Running on public URL: https://3c943e773293b3a096.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


