# Integration of Prebuilt Code using RAG and LLM

## Project Breakdown
### Overview
This project leverages Retrieval-Augmented Generation (RAG) to enhance educational modules by dynamically integrating tested, prebuilt code snippets from the "stable_properties" folder. This approach ensures the generated modules are more reliable, functional, and controlled, by embedding established code directly where needed.

Use Case Examples:
- Unit Conversion: Automatically integrate a unit converter  to facilitate calculations, 
practical application.
- Chemical Properties: Enhance chemistry modules by embedding code to fetch and display current chemical properties of elements or compounds.
- Thermodynamic Tables: Integrate code for accessing thermodynamic tables into modules

In [2]:
# Standard library imports
import asyncio
import os
import re
import warnings
from pprint import pprint

# Suppress warnings
warnings.filterwarnings("ignore")

# Third-party library imports
import bs4

# langchain imports
from langchain import hub
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# langchain_community imports
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_community.vectorstores import Chroma

# langchain_core imports
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

# Utility imports
from util_code_generation import PromptFormatterFromRepository, builder_server_js
from util_semantic_search import SemanticSearch
from util_example_based_prompt import ExampleBasedPromptFormatter

# Get current working directory
os.getcwd()


'c:\\Users\\lberm\\OneDrive\\Desktop\\GitHub_Repository\\educational_code_gen'

In [3]:
# Insert API Key
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

# Adding History

In [4]:

# Load documents from the filesystem with specific criteria (JS files)
loader = GenericLoader.from_filesystem(
    r"C:\Users\lberm\OneDrive\Desktop\GitHub_Repository\educational_code_gen\stable_properties",
    glob="*",
    suffixes=[".js"],
    parser=LanguageParser(language=Language.JS)
)
docs = loader.load()
for document in docs:
    print(docs)
# Text Splitting
# Split loaded documents into smaller chunks for processing

# Vector Store and Retriever Initialization
# Initialize vector store with documents and set up a retriever
vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Large Language Model (LLM) Initialization
# Initialize ChatGPT model with specific parameters
llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0)

# Prompt Template
# Define a custom prompt template for use in retrieval
prompt = hub.pull("rlm/rag-prompt")

# Formatting Documents
# Utility function to format documents for presentation
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Question-Answering System Setup
# Define the template and chain for processing questions and generating answers
template = """As a developer with access to our extensive code database, your goal is to effectively find and utilize information for the tasks at hand. This concise guide outlines a streamlined approach to optimize your workflow:
1. Understanding the Query: Grasp the essence of the provided question or task.
2. Accessing the Code Database: Conduct a targeted search within the database.
3. Evaluating the Results: Assess the relevance and utility of each potential match.
4. Handling Unknown Answers: Acknowledge the absence of direct answers without speculation.
5. Only use the context provided nothing else aka only code from the codebase
{context}


Answer:"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", template),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_system_prompt = """Given a chat history and the latest user question, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

# Chain for Contextualizing Questions
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

# RAG Chain Setup
# Setup for Retrieval-Augmented Generation for answering questions
rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

[Document(page_content='const math = require(\'mathjs\');\n/**\n * Object containing properties of various fluids.\n * Each fluid is represented as a key with an object containing its properties:\n * - sg: Specific gravity\n * - mu: Dynamic viscosity (Pa.s)\n * - Cp: Specific heat capacity (kJ/kg.K)\n * - Tfreeze: Freezing temperature (°C)\n * - Tboil: Boiling temperature (°C)\n */\nconst fluidProperties = {\n    "water": {\n        "sg": 1,\n        "mu": 0.001,\n        "Cp": 1,\n        "Tfreeze": 0,\n        "Tboil": 100\n    },\n    "gasoline": {\n        "sg": 0.72,\n        "mu": 0.00029,\n        "Cp": 2.22,\n        "Tfreeze": -50,\n        "Tboil": 150\n    },\n    "diesel": {\n        "sg": 0.8,\n        "mu": 0.0022,\n        "Cp": 2.05,\n        "Tfreeze": -60,\n        "Tboil": 300\n    },\n    "benzene": {\n        "sg": 0.88,\n        "mu": 0.0006,\n        "Cp": 1.19,\n        "Tfreeze": 5.5,\n        "Tboil": 80\n    },\n    "ethanol": {\n        "sg": 0.79,\n        

## Simple Implementation
We will demonstrate how to merge code retrieval and generation. Initially, I'll request the LLM to examine the question and identify suitable modules from our dataset for integration. Following the analysis, I'll instruct it to create a basic JavaScript module. The objective is to enable it to export the necessary values efficiently

In [5]:
question = "A ball is thrown vertically upward and reaches a height of 80 ft. What is its initial velocity (in ft/s)?"
html_question = """
<pl-question-panel>
  <p> A ball is thrown vertically upwards and continues to rise for {{params.t}} seconds before it starts to fall. </p>
  <p> Determine the initial velocity of the ball. </p>
</pl-question-panel>

<pl-number-input answers-name="u" comparison="sigfig" digits="3" label="Initial velocity $u=$ (in {{params.unitsSpeed}})"></pl-number-input>
"""

analyze_question_prompt = f"""
Your task involves analyzing a question that will be transformed into a dynamic module. This module will feature varying units, values, and parameters. Carefully inspect the question to determine which modules, classes, or functions from our code database could be integrated to provide a solution.

Guidelines:
Dynamic Placeholder Values: Understand that the placeholders (e.g., {{params.placeholder_value}}, {{params.placeholder_units}}) will contain dynamically generated values. These placeholders are designed to adapt the question to various scenarios and units.

Module Integration: Focus on identifying modules, classes, or functions from our database that are relevant to the question's requirements. Consider the mathematical or logical operations needed to solve the problem based on the dynamic parameters provided. Do not include any modules that do not contribute to the original quesiton

Database Search: If our database does not offer a direct answer or relevant code snippet for the question, clearly state that the information is not available. Avoid making speculative suggestions outside of our database's scope.

Code Implementation Proposals: Sumbit max 3 sentences concise and to-the-point ideas for integrating our code database resources into the dynamic module. Only mention code that is currently in our database
Do not attempt to solve the question, only provide information about code implementation

Formatted HTML Question: {html_question}
"""


In [6]:
chat_history = []
first_response = rag_chain.invoke({"question": analyze_question_prompt, "chat_history": chat_history})
first_response_dict = first_response.dict()
content = first_response_dict["content"]
print(content)
chat_history.extend([HumanMessage(content=question), first_response])


Number of requested results 20 is greater than number of elements in index 3, updating n_results = 3


To address the question regarding determining the initial velocity of a ball thrown vertically upwards, the `UnitConverter` class from our codebase can be utilized for unit conversions if needed. Additionally, the `generateRandomValueForUnit` method within the `UnitConverter` class can assist in generating random values for time ({params.t}) if required. Lastly, the `convert` method from the `UnitConverter` class can be employed to handle any necessary unit conversions for velocity calculations.


In [7]:
new_question = "Based on the implementation create a javascript module for the question"
second_response = rag_chain.invoke({"question": new_question, "chat_history": chat_history})
second_response_dict = second_response.dict()
content = second_response_dict["content"]
print(content)

Number of requested results 20 is greater than number of elements in index 3, updating n_results = 3


```javascript
const { UnitConverter } = require('./UnitConverter');

/**
 * Calculates the initial velocity of a ball thrown vertically upward.
 * @param {number} height - The height reached by the ball (in ft).
 * @returns {number} The initial velocity of the ball (in ft/s).
 */
const calculateInitialVelocity = (height) => {
    // Constants
    const accelerationDueToGravity = 32.174; // ft/s^2 (acceleration due to gravity)

    // Calculate initial velocity using the equation: v^2 = u^2 + 2as
    // where v = 0 (final velocity at the peak), a = -32.174 ft/s^2 (acceleration due to gravity), s = height
    const initialVelocity = Math.sqrt(2 * accelerationDueToGravity * height);

    return initialVelocity;
}

// Example calculation for a height of 80 ft
const height = 80;
const initialVelocity = calculateInitialVelocity(height);
console.log(`The initial velocity of the ball thrown vertically upward to reach a height of ${height} ft is approximately ${initialVelocity.toFixed(2)} ft/s.

## More Complex Implementation


In [8]:
csv_path = r"Question_Embedding_20240128.csv"
semantic_search_instance = SemanticSearch(
    csv_path=csv_path,
    embedding_column_name="question_embedding",
    embedding_engine="text-embedding-ada-002",
    api_key=os.environ["OPENAI_API_KEY"])

def _extract_examples(question: str) -> str:
    examples = semantic_search_instance.extract_examples(
        input_string=question,
        search_column="question.html",
        output_column="server.js",
        n_examples=1
    )
    pretty_examples = semantic_search_instance.pretty_print_extracted_examples(
        input_string=question,
        search_column="question.html",
        output_column="server.js",
        n_examples=1
    )
    return examples,pretty_examples

In [9]:
question = """<pl-question-panel>
  <p> A Boeing 747 with a take-off weight of {{params.weight}} {{params.unitsWeight}} has 4 engines that each generate a maximum thrust of {{params.maxThrust}} {{params.unitsWeight}}. </p>
  <p> If the plane is to start from rest and accelerate to a take-off speed of {{params.speed}} {{params.unitsSpeed}} in {{params.time}} seconds, determine the percentage of maximum thrust each engine was operating at. </p>
</pl-question-panel>

<pl-number-input answers-name="percentageThrust" comparison="sigfig" digits="3" label="Percentage of Maximum Thrust"></pl-number-input>"""
examples,pretty_examples = _extract_examples(question)
print(pretty_examples)

Extracted Examples for: '<pl-question-panel>
  <p> A Boeing 747 with a take-off weight of {{params.weight}} {{params.unitsWeight}} has 4 engines that each generate a maximum thrust of {{params.maxThrust}} {{params.unitsWeight}}. </p>
  <p> If the plane is to start from rest and accelerate to a take-off speed of {{params.speed}} {{params.unitsSpeed}} in {{params.time}} seconds, determine the percentage of maximum thrust each engine was operating at. </p>
</pl-question-panel>

<pl-number-input answers-name="percentageThrust" comparison="sigfig" digits="3" label="Percentage of Maximum Thrust"></pl-number-input>'

Input Example                                               Output Example                                              
------------------------------------------------------------------------------------------------------------------------
<pl-question-panel>
  <p> A truck traveling at {{params.v...const math = require('mathjs');



/**
 * The SpeedConver...


None


In [10]:
base_template = f"""
        Design a robust JavaScript module adept at generating computational problems for various STEM disciplines. This module will ingest an HTML file containing a structured query and will output a JavaScript snippet that carries out the calculation for the problem described. The JavaScript code must conform to the following outline:

        const generate = () => {{
            // 1. Dynamic Parameter Selection:
            // - Thoroughly analyze the HTML or data source to identify an extensive range of categories and units for computation.
            // - Ensure the inclusion of a wide variety of units and values, covering different global measurement systems.
            // - Develop a randomized selection algorithm to fairly choose a category or unit system, ensuring equitable representation.

            // 2. Appropriate Transformations:
            // - Implement precise and tailored conversion processes for each unit, ensuring accuracy in the transformation.
            // - Adapt the computations to maintain integrity, taking into account the specific characteristics of each unit system.
            // - Include robust validation and error handling to manage unusual or incorrect unit inputs, preserving computation reliability.

            // 3. Value Generation:
            // Produce random values relevant to the problem's context.
            // These values are the basis for the problem's arithmetic or logical operations.

            // 4. Solution Synthesis:
            // Utilize the selected parameters and the generated values to formulate the solution.
            // The computation should honor the problem's context, limitations, and nature to guarantee its validity.

            return {{
                params: {{
                    // Input parameters relevant to the problem's context and the chosen category/unit system.
                    // Export all calculated variables
                }},
                correct_answers: {{
                    // Calculate the correct answer(s) using the selected parameters and generated values.
                }},
                nDigits: 3,  // Define the number of digits after the decimal place.
                sigfigs: 3   // Define the number of significant figures for the answer.
            }};
        }}

        module.exports = {{
            generate
        }}

        Your mission is to flesh out the 'generate' function within this framework. It should dynamically select parameters and units, apply necessary transformations, spawn values, and deduce a legitimate solution. The function must return an object containing 'params' and 'correct_answers' properties, which abide by the prescribed structure. This alignment ensures that the HTML and JavaScript components integrate seamlessly. Below is a sample illustration:
        
        ```insert code here```
        """
        
template = ExampleBasedPromptFormatter.run(examples,base_template)
complete_template = f"{template}\ninput: {question}"
print(complete_template)


        Design a robust JavaScript module adept at generating computational problems for various STEM disciplines. This module will ingest an HTML file containing a structured query and will output a JavaScript snippet that carries out the calculation for the problem described. The JavaScript code must conform to the following outline:

        const generate = () => {{
            // 1. Dynamic Parameter Selection:
            // - Thoroughly analyze the HTML or data source to identify an extensive range of categories and units for computation.
            // - Ensure the inclusion of a wide variety of units and values, covering different global measurement systems.
            // - Develop a randomized selection algorithm to fairly choose a category or unit system, ensuring equitable representation.

            // 2. Appropriate Transformations:
            // - Implement precise and tailored conversion processes for each unit, ensuring accuracy in the transformation.
            /

## Utilizing Agent

In [11]:
from langchain.tools.retriever import create_retriever_tool
from langchain_openai import ChatOpenAI

retriever_tool = create_retriever_tool(
    retriever,
    "Database_Retriever",
    "Whenever you need to Search Code Database for relevant code including modules, functions, and classes. You will return any information about the code base",
)
tools = [retriever_tool]


llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)

# Get the prompt to use - you can modify this!
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages

from langchain.agents import create_openai_functions_agent

agent = create_openai_functions_agent(llm, tools, prompt)

from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


In [12]:
code =""" const math = require('mathjs');

/**
 * The ForceMassTimeCalculator module calculates the time required for a vehicle to accelerate to a certain speed given its mass and the force applied.
 */
const ForceMassTimeCalculator = {
    /**
     * Calculates the acceleration from force and mass using Newton's second law.
     * 
     * @param {number} force - The force applied in newtons.
     * @param {number} mass - The mass of the vehicle in kilograms.
     * @return {number} The acceleration in meters per second squared.
     */
    calculateAcceleration: function(force, mass) {
        return force / mass;
    },

    /**
     * Calculates the time required to reach a certain velocity from acceleration.
     * 
     * @param {number} velocity - The final velocity in meters per second.
     * @param {number} acceleration - The acceleration in meters per second squared.
     * @return {number} The time in seconds.
     */
    calculateTime: function(velocity, acceleration) {
        return velocity / acceleration;
    }
};

const generate = () => {
    // Value Generation
    const mass = math.randomInt(1000, 3001); // Mass in kilograms
    const v = math.randomInt(10, 101); // Final velocity in meters per second
    const f = math.randomInt(1000, 10001); // Force in newtons

    // Calculating Acceleration
    const a = ForceMassTimeCalculator.calculateAcceleration(f, mass);

    // Calculating Time
    const t = ForceMassTimeCalculator.calculateTime(v, a);

    // Rounding off the final value
    const timeRequired = math.round(t, 3); // rounding to 3 decimal places

    // Return the structured data
    return {
        params: {
            mass: mass,
            v: v,
            f: f,
            unitsMass: 'kg',
            unitsSpeed: 'm/s',
            unitsForce: 'N',
            unitsTime: 's'
        },
        correct_answers: {
            t: timeRequired
        },
        nDigits: 3,
        sigfigs: 3
    };
};

module.exports = {
    generate
};

// Example usage:
const result = generate();
console.log(result);"""
text = f"""
Carefully review the provided code snippet with the objective of enhancing its functionality and performance. If you identify any modules from our code base that can be integrated to achieve this, focus on importing these modules rather than copying and pasting their contents directly into the new code. Your task is to seamlessly incorporate these enhancements through imports, ensuring the original code is optimized. Please deliver the fully refined and improved code, distinctly enclosed within insert_code markers
old code {code}"""
agent_executor.invoke({"input": text})



[1m> Entering new AgentExecutor chain...[0m


Number of requested results 20 is greater than number of elements in index 3, updating n_results = 3


[32;1m[1;3m
Invoking: `Database_Retriever` with `{'query': 'mathjs'}`


[0m[36;1m[1;3mconst math = require('mathjs');
/**
 * Object containing properties of various fluids.
 * Each fluid is represented as a key with an object containing its properties:
 * - sg: Specific gravity
 * - mu: Dynamic viscosity (Pa.s)
 * - Cp: Specific heat capacity (kJ/kg.K)
 * - Tfreeze: Freezing temperature (°C)
 * - Tboil: Boiling temperature (°C)
 */
const fluidProperties = {
    "water": {
        "sg": 1,
        "mu": 0.001,
        "Cp": 1,
        "Tfreeze": 0,
        "Tboil": 100
    },
    "gasoline": {
        "sg": 0.72,
        "mu": 0.00029,
        "Cp": 2.22,
        "Tfreeze": -50,
        "Tboil": 150
    },
    "diesel": {
        "sg": 0.8,
        "mu": 0.0022,
        "Cp": 2.05,
        "Tfreeze": -60,
        "Tboil": 300
    },
    "benzene": {
        "sg": 0.88,
        "mu": 0.0006,
        "Cp": 1.19,
        "Tfreeze": 5.5,
        "Tboil": 80
    },
    "ethanol": {
    

{'input': "\nCarefully review the provided code snippet with the objective of enhancing its functionality and performance. If you identify any modules from our code base that can be integrated to achieve this, focus on importing these modules rather than copying and pasting their contents directly into the new code. Your task is to seamlessly incorporate these enhancements through imports, ensuring the original code is optimized. Please deliver the fully refined and improved code, distinctly enclosed within insert_code markers\nold code  const math = require('mathjs');\n\n/**\n * The ForceMassTimeCalculator module calculates the time required for a vehicle to accelerate to a certain speed given its mass and the force applied.\n */\nconst ForceMassTimeCalculator = {\n    /**\n     * Calculates the acceleration from force and mass using Newton's second law.\n     * \n     * @param {number} force - The force applied in newtons.\n     * @param {number} mass - The mass of the vehicle in kil