In [6]:
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_ibm import WatsonxLLM
from chromadb import PersistentClient
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import shutil
import torch

# Load environment variables
load_dotenv()

WATSONX_APIKEY = os.getenv("WATSONX_APIKEY")
PROJECT_ID = os.getenv("PROJECT_ID")

def initialize_llm():
    """
    Initialize the WatsonxLLM model.
    """
    print("Initializing LLM...")
    llm = WatsonxLLM(
        model_id="ibm/granite-13b-instruct-v2",
        url="https://eu-gb.ml.cloud.ibm.com",
        project_id=PROJECT_ID,
        params={
            "max_new_tokens": 300,
            "min_new_tokens": 50,
            "decoding_method": "greedy",
            "temperature": 0.7,
        },
    )
    print("Done with LLM initialization.")
    return llm

def create_dataframe_from_project(project_path):
    """
    Step 1: Create a DataFrame of the project files.
    """
    print("Step 1: Creating DataFrame from project...")
    data = []
    for root, _, files in os.walk(project_path):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                readable = True
            except Exception:
                content = ""
                readable = False
            data.append({
                "path": file_path,
                "content": content,
                "readable": "Yes" if readable else "No",
                "extension": os.path.splitext(file)[-1],
            })
    df = pd.DataFrame(data)
    print("Done with Step 1.")
    return df

def enhance_metadata(df, llm):
    """
    Step 2: Enhance metadata with descriptions generated by LLM.
    """
    print("Step 2: Enhancing metadata...")
    
    def generate_description(row):
        if row['readable'] == "No":
            return "Unreadable file."
        prompt = f"Describe the purpose and content of this file:\n{row['content'][:500]}"
        try:
            return llm.invoke(prompt)
        except Exception as e:
            return f"Error generating description: {str(e)}"
    
    df['description'] = df.apply(generate_description, axis=1)
    print("Done with Step 2.")
    return df

def create_vector_database(df, db_path):
    """
    Step 3: Create a vector database from the DataFrame.
    """
    print("Step 3: Creating vector database...")
    chroma_client = PersistentClient(path=db_path)
    collection_name = "project_vectors"
    if collection_name in [col.name for col in chroma_client.list_collections()]:
        chroma_client.delete_collection(name=collection_name)
    collection = chroma_client.create_collection(name=collection_name)
    
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"}
    )
    documents = df['description'].tolist()
    ids = df.index.astype(str).tolist()
    embeddings = hf.embed_documents(documents)
    collection.add(documents=documents, ids=ids, embeddings=embeddings)
    
    print("Done with Step 3.")
    return collection

def infer_modifications(llm, collection_name, persist_directory, feature_instructions):
    """
    Step 4-6: Infer project modifications based on instructions.

    Args:
        llm: The WatsonxLLM instance for inference.
        collection_name: Name of the Chroma collection to use.
        persist_directory: Directory where the Chroma database is stored.
        feature_instructions: Instructions describing the modifications to be made.

    Returns:
        Response from the LLM with the inferred modifications and additions.
    """
    print("Step 4-6: Inferring modifications...")

    # Initialize the embedding model
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"},
        encode_kwargs={'normalize_embeddings': True},
    )
    print("Embedding model initialized.")

    # Initialize the Chroma Persistent Client
    chroma_client = PersistentClient(path=persist_directory)
    print(f"Connected to ChromaDB at {persist_directory}")

    # Check if the collection exists
    if collection_name not in [col.name for col in chroma_client.list_collections()]:
        raise ValueError(f"Collection '{collection_name}' does not exist. Ensure it is created before calling this function.")

    # Get the existing collection
    collection = chroma_client.get_collection(name=collection_name)
    print(f"Using collection: {collection_name}")

    # Initialize the vector store with the Chroma client and embedding function
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=hf,
        client=chroma_client,  # Use PersistentClient directly
    )

    # Create a retriever from the vector store
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})
    print("Retriever initialized.")

    # Build the RetrievalQA chain
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="question",
    )
    print("RetrievalQA chain initialized.")

    # Construct the prompt for LLM inference
    prompt = (
        f"Given the following project descriptions and instructions:\n\n{feature_instructions}\n\n"
        "Based on the current project components, provide the following:\n"
        "1. Suggested modifications to existing components.\n"
        "2. New components or features to add.\n"
        "3. A brief explanation of why these changes are necessary.\n"
    )
    print("Prompt constructed for LLM inference.")

    # Run the RetrievalQA chain
    try:
        response = chain.invoke({"question": prompt})
        print("Inference completed successfully.")
    except Exception as e:
        raise RuntimeError(f"Error during LLM inference: {str(e)}")

    print("Done with Step 4-6.")
    return response


import os

import os

def update_project_tree(modifications, new_project_path):
    """
    Step 7-9: Apply modifications to create a new project.

    Args:
        modifications (list): List of modifications, where each item can be a string or a dictionary.
        new_project_path (str): The root directory for the new project.

    Returns:
        None
    """
    print("Step 7-9: Updating project tree...")

    # Ensure the base directory for the new project exists
    os.makedirs(new_project_path, exist_ok=True)

    for index, item in enumerate(modifications):
        # Check if the item is a string or a dictionary
        if isinstance(item, str):
            # Treat string as a direct project-wide update (e.g., a general suggestion)
            relative_path = f"update_{index + 1}.txt"  # Save it as a text file for reference
            content = item
        elif isinstance(item, dict):
            # Treat dictionary as a specific file update
            relative_path = item.get('path')
            content = item.get('content', '')
        else:
            print(f"Warning: Skipping modification #{index + 1} due to invalid type: {type(item)}")
            continue

        if not relative_path:
            print(f"Warning: Skipping modification #{index + 1} due to missing 'path'.")
            continue

        # Create the full path for the new file
        file_path = os.path.join(new_project_path, relative_path)

        try:
            # Ensure the directory structure exists
            os.makedirs(os.path.dirname(file_path), exist_ok=True)

            # Write the content to the file
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(content)

            print(f"File created: {file_path}")

        except Exception as e:
            print(f"Error creating file '{file_path}': {e}")
            continue

    print("Done with Step 7-9. Project tree updated successfully.")



def generate_project(old_project_path, new_project_path, feature_instructions):
    """
    Main function to generate a new project based on feature instructions.
    """
    # Initialize LLM
    llm = initialize_llm()
    
    # Step 1: Create a DataFrame of the project
    df = create_dataframe_from_project(old_project_path)
    
    # Step 2: Enhance metadata
    df = enhance_metadata(df, llm)
    df.to_pickle("enhanced_project.pkl")
    
    # Step 3: Create a vector database
    collection = create_vector_database(df, "./chroma_db")
    collection_name = "project_vectors"
    persist_directory="./chroma_db"
    # Step 4-6: Infer modifications
   
    modifications = infer_modifications(llm, collection_name, persist_directory, feature_instructions)
    # Step 7-9: Update the project tree and create new project
    update_project_tree(modifications, new_project_path)
    print(f"Project successfully generated at {new_project_path}")

# Example Usage
if __name__ == "__main__":
    old_path = "./project_old"
    new_path = "./project_new"
    instructions = "Add logging to all major modules and improve error handling."
    generate_project(old_path, new_path, instructions)


Initializing LLM...
Done with LLM initialization.
Step 1: Creating DataFrame from project...
Done with Step 1.
Step 2: Enhancing metadata...
Done with Step 2.
Step 3: Creating vector database...
Done with Step 3.
Step 4-6: Inferring modifications...


Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


Embedding model initialized.
Connected to ChromaDB at ./chroma_db
Using collection: project_vectors
Retriever initialized.
RetrievalQA chain initialized.
Prompt constructed for LLM inference.
Inference completed successfully.
Done with Step 4-6.
Step 7-9: Updating project tree...
File created: ./project_new/update_1.txt
File created: ./project_new/update_2.txt
Done with Step 7-9. Project tree updated successfully.
Project successfully generated at ./project_new


In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_ibm import WatsonxLLM
from chromadb import PersistentClient
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import shutil
import torch

# Load environment variables
load_dotenv()

WATSONX_APIKEY = os.getenv("WATSONX_APIKEY")
PROJECT_ID = os.getenv("PROJECT_ID")


def initialize_llm():
    """
    Initialize the WatsonxLLM model.
    """
    print("Initializing LLM...")
    llm = WatsonxLLM(
        model_id="ibm/granite-13b-instruct-v2",
        url="https://eu-gb.ml.cloud.ibm.com",
        project_id=PROJECT_ID,
        params={
            "max_new_tokens": 300,
            "min_new_tokens": 50,
            "decoding_method": "greedy",
            "temperature": 0.7,
        },
    )
    print("LLM initialized successfully.")
    return llm


def create_dataframe_from_project(project_path):
    """
    Step 1: Create a DataFrame of the project files.
    """
    print("Step 1: Creating DataFrame from project files...")
    data = []
    for root, _, files in os.walk(project_path):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                readable = True
            except Exception:
                content = ""
                readable = False
            data.append({
                "path": file_path,
                "content": content,
                "readable": "Yes" if readable else "No",
                "extension": os.path.splitext(file)[-1],
            })
    df = pd.DataFrame(data)
    print("Project DataFrame created.")
    return df


def enhance_metadata(df, llm):
    """
    Step 2: Enhance metadata with descriptions generated by LLM.
    """
    print("Step 2: Enhancing metadata...")
    
    def generate_description(row):
        if row['readable'] == "No":
            return "Unreadable file."
        prompt = f"Describe the purpose and content of this file:\n{row['content'][:500]}"
        try:
            return llm.invoke(prompt)
        except Exception as e:
            return f"Error generating description: {str(e)}"
    
    df['description'] = df.apply(generate_description, axis=1)
    print("Metadata enhancement completed.")
    return df


def create_vector_database(df, db_path):
    """
    Step 3: Create a vector database from the DataFrame.
    """
    print("Step 3: Creating vector database...")
    chroma_client = PersistentClient(path=db_path)
    collection_name = "project_vectors"
    if collection_name in [col.name for col in chroma_client.list_collections()]:
        chroma_client.delete_collection(name=collection_name)
    collection = chroma_client.create_collection(name=collection_name)
    
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"}
    )
    documents = df['description'].tolist()
    ids = df.index.astype(str).tolist()
    embeddings = hf.embed_documents(documents)
    collection.add(documents=documents, ids=ids, embeddings=embeddings)
    
    print("Vector database created successfully.")
    return collection


def infer_modifications(llm, collection_name, persist_directory, feature_instructions):
    """
    Step 4-6: Infer modifications using LLM and vector database.
    """
    print("Inferring modifications...")
    
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"},
    )
    chroma_client = PersistentClient(path=persist_directory)
    collection = chroma_client.get_collection(name=collection_name)
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=hf,
        client=chroma_client,
    )
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="question",
    )
    prompt = (
        f"Based on these project components and the instruction:\n{feature_instructions}\n\n"
        "Provide:\n"
        "1. Components to modify\n"
        "2. New components to add\n"
        "3. Reasoning for each change\n"
    )
    response = chain.invoke({"question": prompt})
    return response


def update_project_tree(modifications, old_project_path, new_project_path):
    """
    Step 7-9: Update the project tree with modifications.
    """
    print("Updating project tree...")
    shutil.copytree(old_project_path, new_project_path, dirs_exist_ok=True)
    for mod in modifications:
        if isinstance(mod, dict) and 'path' in mod and 'content' in mod:
            new_file_path = os.path.join(new_project_path, mod['path'])
            os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
            with open(new_file_path, 'w', encoding='utf-8') as f:
                f.write(mod['content'])
    print(f"New project generated at {new_project_path}.")


def generate_project(old_project_path, new_project_path, feature_instructions):
    """
    Main function to generate the new project.
    """
    llm = initialize_llm()
    df = create_dataframe_from_project(old_project_path)
    df = enhance_metadata(df, llm)
    df.to_pickle("enhanced_project.pkl")
    create_vector_database(df, "./chroma_db")
    modifications = infer_modifications(
        llm, "project_vectors", "./chroma_db", feature_instructions
    )
    update_project_tree(modifications, old_project_path, new_project_path)


# Example Usage
if __name__ == "__main__":
    old_path = "./project_old"
    new_path = "./project_new"
    feature_request = "Add logging to all modules and create a utils module."
    generate_project(old_path, new_path, feature_request)


Initializing LLM...
LLM initialized successfully.
Step 1: Creating DataFrame from project files...
Project DataFrame created.
Step 2: Enhancing metadata...
Metadata enhancement completed.
Step 3: Creating vector database...


  from .autonotebook import tqdm as notebook_tqdm


Vector database created successfully.
Inferring modifications...


  vector_store = Chroma(
Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


Updating project tree...
New project generated at ./project_new.


In [56]:
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_ibm import WatsonxLLM
from chromadb import PersistentClient
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import shutil
import torch

# Load environment variables
load_dotenv()

WATSONX_APIKEY = os.getenv("WATSONX_APIKEY")
PROJECT_ID = os.getenv("PROJECT_ID")


def initialize_llm():
    """
    Initialize the WatsonxLLM model.
    """
    print("Initializing LLM...")
    llm = WatsonxLLM(
        model_id="mistralai/mixtral-8x7b-instruct-v01",
        url="https://eu-gb.ml.cloud.ibm.com",
        project_id=PROJECT_ID,
        params={
            "max_new_tokens": 300,
            "min_new_tokens": 50,
            "decoding_method": "greedy",
            "temperature": 0.7,
        },
    )
    print("LLM initialized successfully.")
    return llm


def create_dataframe_from_project(project_path):
    """
    Step 1: Create a DataFrame of the project files.
    """
    print("Step 1: Creating DataFrame from project files...")
    data = []
    for root, _, files in os.walk(project_path):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                readable = True
            except Exception:
                content = ""
                readable = False
            data.append({
                "path": file_path,
                "content": content,
                "readable": "Yes" if readable else "No",
                "extension": os.path.splitext(file)[-1],
            })
    df = pd.DataFrame(data)
    print("Project DataFrame created.")
    return df


def enhance_metadata(df, llm):
    """
    Step 2: Enhance metadata with descriptions generated by LLM.
    """
    print("Step 2: Enhancing metadata...")
    
    def generate_description(row):
        if row['readable'] == "No":
            return "Unreadable file."
        prompt = f"Please generate a small description about the content of this file  \n{row['path'][:500]} and what it does with content:\n{row['content'][:500]}"
        try:
            return llm.invoke(prompt)
        except Exception as e:
            return f"Error generating description: {str(e)}"
    
    df['description'] = df.apply(generate_description, axis=1)
    print("Metadata enhancement completed.")
    return df


def create_vector_database(df, db_path):
    """
    Step 3: Create a vector database from the DataFrame.
    """
    print("Step 3: Creating vector database...")
    chroma_client = PersistentClient(path=db_path)
    collection_name = "project_vectors"
    if collection_name in [col.name for col in chroma_client.list_collections()]:
        chroma_client.delete_collection(name=collection_name)
    collection = chroma_client.create_collection(name=collection_name)
    
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"}
    )
    documents = df['description'].tolist()
    ids = df.index.astype(str).tolist()
    embeddings = hf.embed_documents(documents)
    collection.add(documents=documents, ids=ids, embeddings=embeddings)
    
    print("Vector database created successfully.")
    return collection


def infer_modifications(llm, collection_name, persist_directory, feature_instructions):
    """
    Step 4-6: Infer modifications using LLM and vector database.
    """
    print("Inferring modifications...")
    
    hf = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2",
        model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"},
    )
    chroma_client = PersistentClient(path=persist_directory)
    collection = chroma_client.get_collection(name=collection_name)
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=hf,
        client=chroma_client,
    )
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="question",
    )
    prompt = (
        f"Based on these project components and the instruction:\n{feature_instructions}\n\n"
        "Provide:\n"
        "1. Components to modify\n"
        "2. New components to add\n"
        "3. Reasoning for each change\n"
    )
    response = chain.invoke({"question": prompt})
    return response


def update_project_tree(modifications, old_project_path, new_project_path):
    """
    Step 7-9: Update the project tree with modifications and return report data.
    """
    print("Updating project tree...")
    shutil.copytree(old_project_path, new_project_path, dirs_exist_ok=True)
    report = []

    for mod in modifications:
        if isinstance(mod, dict) and 'path' in mod and 'content' in mod:
            new_file_path = os.path.join(new_project_path, mod['path'])
            old_file_path = os.path.join(old_project_path, mod['path'])
            os.makedirs(os.path.dirname(new_file_path), exist_ok=True)

            # Write the new content
            with open(new_file_path, 'w', encoding='utf-8') as f:
                f.write(mod['content'])

            # Check if the file existed in the old project
            exists_in_old = os.path.exists(old_file_path)
            report.append({
                "old_path": old_file_path if exists_in_old else None,
                "new_path": new_file_path,
                "status": "Modified" if exists_in_old else "Created"
            })

    print(f"New project generated at {new_project_path}.")
    return report


def generate_report(report, output_path):
    """
    Step 10: Generate a report of modifications and save to a file.
    """
    print("Generating report...")
    report_df = pd.DataFrame(report)
    report_file = os.path.join(output_path, "modification_report.csv")
    report_df.to_csv(report_file, index=False)
    print(f"Report generated and saved to {report_file}.")


def generate_project(old_project_path, new_project_path, feature_instructions):
    """
    Main function to generate the new project.
    """
    llm = initialize_llm()
    df = create_dataframe_from_project(old_project_path)
    df = enhance_metadata(df, llm)
    df.to_pickle("enhanced_project.pkl")
    create_vector_database(df, "./chroma_db")
    modifications = infer_modifications(
        llm, "project_vectors", "./chroma_db", feature_instructions
    )
    report = update_project_tree(modifications, old_project_path, new_project_path)
    generate_report(report, new_project_path)


# Example Usage
if __name__ == "__main__":
    old_path = "./project_old"
    new_path = "./project_new"
    feature_request = "Add logging to all modules and create a utils module."
    generate_project(old_path, new_path, feature_request)


Initializing LLM...
LLM initialized successfully.
Step 1: Creating DataFrame from project files...
Project DataFrame created.
Step 2: Enhancing metadata...
Metadata enhancement completed.
Step 3: Creating vector database...
Vector database created successfully.
Inferring modifications...


Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


Updating project tree...
New project generated at ./project_new.
Generating report...
Report generated and saved to ./project_new/modification_report.csv.


In [57]:
old_project_path = "./project_old"
new_path = "./project_new"
feature_instructions = "Add logging to all modules and create a utils module."
llm = initialize_llm()
df = create_dataframe_from_project(old_project_path)

Initializing LLM...
LLM initialized successfully.
Step 1: Creating DataFrame from project files...
Project DataFrame created.


In [58]:
df

Unnamed: 0,path,content,readable,extension
0,./project_old/app.py,from utils.helpers import greet\n\ndef main():...,Yes,.py
1,./project_old/requirements.txt,# Python dependencies\nflask\n,Yes,.txt
2,./project_old/utils/helpers.py,"def greet(name):\n """"""\n Returns a greet...",Yes,.py


In [59]:
df = enhance_metadata(df, llm)

Step 2: Enhancing metadata...
Metadata enhancement completed.


In [60]:
df["description"].values

array(['\nThis file path  ./project_old/app.py is the main entry point for the application. It imports a greet function from a utils.helpers module and uses it in the main function to greet the user. The user is prompted to enter their name, and the greet function is called with the entered name as an argument. The result is then printed to the console.',
       'flask_sqlalchemy\nflask_migrate\nflask_login\nflask_wtf\nflask_bootstrap\nflask_socketio\ngevent\ngevent-websocket\npsycopg2\ngunicorn\n\nThis file path ./project_old/requirements.txt is a list of Python dependencies for a Flask web application. Each line in the file specifies a Python package that is required to run the application. The packages are used for various purposes such as database management, user authentication, form handling, and real-time communication.\n\nHere is a brief description of each package:\n\n* flask: A lightweight web framework for building web applications in Python.\n* flask_sqlalchemy: An extensio

In [61]:
df.to_pickle("enhanced_project.pkl")

In [62]:
create_vector_database(df, "./chroma_db")

Step 3: Creating vector database...
Vector database created successfully.


Collection(name=project_vectors)

In [63]:
modifications = infer_modifications(
llm, "project_vectors", "./chroma_db", feature_instructions
    )

Inferring modifications...


Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


In [64]:
modifications

{'question': 'Based on these project components and the instruction:\nAdd logging to all modules and create a utils module.\n\nProvide:\n1. Components to modify\n2. New components to add\n3. Reasoning for each change\n',
 'result': '\n\n1. Components to modify:\n   - All modules: Add logging functionality to all modules. This can be done by importing the logging module and adding logging statements throughout the code.\n\n2. New components to add:\n   - utils module: Create a new module called utils.py and add the get_file_extension, is_valid_file_type, read_file, and write_file functions to this module. This will help to centralize and reuse common functionality across the application.\n\n3. Reasoning for each change:\n   - Adding logging to all modules: Logging is an important tool for debugging and monitoring applications. By adding logging to all modules, we can easily track the flow of execution and identify any errors or issues that may arise.\n   - Creating a utils module: By cr

In [32]:
feature_instructions

'Add logging to all modules and refactor utilities to improve performance.'

In [65]:
import os
import json
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ibm import WatsonxLLM
from chromadb import PersistentClient
import torch

# Load credentials from .env file
load_dotenv()
WATSONX_APIKEY = os.getenv("WATSONX_APIKEY")
PROJECT_ID = os.getenv("PROJECT_ID")

if not WATSONX_APIKEY or not PROJECT_ID:
    raise ValueError("API key or Project ID is missing. Please check your .env file.")


def get_lang_chain_model(model_type, max_tokens, min_tokens, decoding_method, temperature):
    """
    Initializes and returns a WatsonxLLM instance with the specified parameters.
    """
    print("Initializing WatsonxLLM model...")
    return WatsonxLLM(
        model_id=model_type,
        url="https://eu-gb.ml.cloud.ibm.com",
        project_id=PROJECT_ID,
        params={
            "max_new_tokens": max_tokens,
            "min_new_tokens": min_tokens,
            "decoding_method": decoding_method,
            "temperature": temperature,
        },
    )


def infer_modifications(
    collection_name, persist_directory, feature_instructions, output_json_path
):
    """
    Step 4-6: Infer modifications using WatsonxLLM and vector database.

    Args:
        collection_name (str): Name of the ChromaDB collection.
        persist_directory (str): Directory path for ChromaDB persistence.
        feature_instructions (str): Feature request instructions for modifications.
        output_json_path (str): Path to save the JSON report of modifications.

    Returns:
        List[dict]: A structured list of modifications, including paths and actions.
    """
    print("Inferring modifications with WatsonxLLM...")

    # WatsonxLLM configuration
    model_type = "meta-llama/llama-3-1-70b-instruct"
    max_tokens = 300
    min_tokens = 100
    decoding_method = "greedy"
    temperature = 0.7

    # Initialize the WatsonxLLM model
    llm = get_lang_chain_model(
        model_type, max_tokens, min_tokens, decoding_method, temperature
    )

    # Initialize the embedding model
    model_name = "sentence-transformers/all-mpnet-base-v2"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    hf_embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={"device": device},
        encode_kwargs={"normalize_embeddings": True},
    )

    # Initialize ChromaDB Persistent Client
    chroma_client = PersistentClient(path=persist_directory)
    if collection_name not in [col.name for col in chroma_client.list_collections()]:
        raise ValueError(f"Collection '{collection_name}' does not exist. Ensure it is created.")
    collection = chroma_client.get_collection(name=collection_name)

    # Use Chroma vectorstore with the given collection
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=hf_embeddings,
        client=chroma_client,
    )

    # Create a retriever from the vectorstore
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})

    # Build the RetrievalQA chain
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="question",
    )

    # Construct the prompt
    prompt = (
        f"Based on the project descriptions and these instructions:\n{feature_instructions}\n\n"
        "Provide a list of:\n"
        "1. Paths to files that should be modified.\n"
        "2. The specific modifications required for each file.\n"
        "3. Any new files or directories to be created and their purposes.\n\n"
        "Return the output in JSON format for further analysis."
        '''For example:
        "Generate a JSON response with the following structure:\n"
        [
        {"path": "./src/main.py", "modification": "Add logging"},
        {"path": "./utils/helpers.py", "modification": "Create the logins function"},
        {"path": "requirements.txt", "modification": "Add Python requirements compatible with pip install"}
        ]
        '''
 
    )

    # Run the chain with the prompt
    response_text = chain.invoke({"question": prompt})

    print("--------------------------------- Generated response -----------------------------------")
    print(response_text)
    print("*********************************************************************************************")


    return response_text






In [None]:
feature_instructions = (
    "Add logging to all modules and refactor utilities to improve performance."
)
collection_name = "project_vectors"
persist_directory = "./chroma_db"
output_json_path = "./modifications.json"

modifications = infer_modifications(
    collection_name, persist_directory, feature_instructions, output_json_path
)

#print("Modifications:")
#print(json.dumps(modifications, indent=4))

Inferring modifications with WatsonxLLM...
Initializing WatsonxLLM model...


Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


--------------------------------- Generated response -----------------------------------
{'question': 'Based on the project descriptions and these instructions:\nAdd logging to all modules and refactor utilities to improve performance.\n\nProvide a list of:\n1. Paths to files that should be modified.\n2. The specific modifications required for each file.\n3. Any new files or directories to be created and their purposes.\n\nReturn the output in JSON format for further analysis.For example:\n        "Generate a JSON response with the following structure:\n"\n        [\n        {"path": "./src/main.py", "modification": "Add logging"},\n        {"path": "./utils/helpers.py", "modification": "Create the logins function"},\n        {"path": "requirements.txt", "modification": "Add Python requirements compatible with pip install"}\n        ]\n        ', 'result': ' \n[\n{"path": "./project_old/requirements.txt", "modification": "Add Python requirements compatible with pip install"},\n{"path":

In [67]:
modifications

{'question': 'Based on the project descriptions and these instructions:\nAdd logging to all modules and refactor utilities to improve performance.\n\nProvide a list of:\n1. Paths to files that should be modified.\n2. The specific modifications required for each file.\n3. Any new files or directories to be created and their purposes.\n\nReturn the output in JSON format for further analysis.For example:\n        "Generate a JSON response with the following structure:\n"\n        [\n        {"path": "./src/main.py", "modification": "Add logging"},\n        {"path": "./utils/helpers.py", "modification": "Create the logins function"},\n        {"path": "requirements.txt", "modification": "Add Python requirements compatible with pip install"}\n        ]\n        ',
 'result': ' \n[\n{"path": "./project_old/requirements.txt", "modification": "Add Python requirements compatible with pip install"},\n{"path": "./project_old/app.py", "modification": "Add logging"},\n{"path": "./project_old/utils/

In [26]:
response=modifications

In [27]:
# Extract relevant content
try:
    if isinstance(response, dict) and "question" in response:
        # Extract and clean the response text
        response_text = response.get("question", "")
        start_index = response_text.find("[")
        end_index = response_text.rfind("]") + 1

        if start_index != -1 and end_index != -1:
            relevant_response = response_text[start_index:end_index]
        else:
            raise ValueError("Could not extract JSON response from the LLM output.")
    else:
        raise ValueError("Unexpected response format from LLM.")

    # Parse the JSON content
    modifications = json.loads(relevant_response)

    if isinstance(modifications, list):
        # Save the modifications to a JSON file
        with open(output_json_path, "w", encoding="utf-8") as f:
            json.dump(modifications, f, indent=4)
        print(f"Modifications saved to {output_json_path}.")
    else:
        raise ValueError("Response is not in the expected JSON format.")

except (json.JSONDecodeError, ValueError) as e:
    raise RuntimeError(f"Failed to parse LLM response into JSON: {str(e)}")

RuntimeError: Failed to parse LLM response into JSON: Could not extract JSON response from the LLM output.

In [22]:
print(modifications)

{'question': 'Based on these project components and the instruction:\nAdd logging to all modules and create a utils module.\n\nProvide:\n1. Components to modify\n2. New components to add\n3. Reasoning for each change\n', 'result': '1. psycopg2\n2. utils\n3. Add logging to all modules and create a utils module.\n\n\n\n psycopg2: psycopg2 is a Python PostgreSQL database adapter. psycopg2 is a fork of the PostgreSQL adapter psycopg, and is the most actively maintained and most widely used PostgreSQL adapter for Python. psycopg2 is a drop-in replacement for psycopg, and can be used with any Python code that uses psycopg. psycopg2 is a mature and stable project, and has been tested with Python 2.6, 2.7, and 3.3 through 3.7. psycopg2 is a mature and stable project, and has been tested with Python 2.6, 2.7, and 3.3 through 3.7. psycopg2 is a mature and stable project, and has been tested with Python 2.6, 2.7, and 3.3 through 3.7. psycopg2 is a mature and stable project, and has been tested wi

In [None]:
def display_first_five_documents(collection):
    """
    Reads the first 5 documents from the given ChromaDB collection and displays them.

    Args:
        collection (chromadb.Collection): The ChromaDB collection to read from.

    Returns:
        None
    """
    # Retrieve all documents from the collection
    results = collection.get()
    
    # Extract documents and IDs
    documents = results.get("documents", [])
    ids = results.get("ids", [])
    metadatas = results.get("metadatas", [])

    # Display the first 5 documents
    print(f"Displaying the first {min(len(documents), 5)} documents:")
    for i in range(min(len(documents), 5)):
        print(f"Document ID: {ids[i]}")
        print(f"Content: {documents[i]}")
        if metadatas:
            print(f"Metadata: {metadatas[i]}")
        print("-" * 80)

# Usage Example
display_first_five_documents(collection)def display_first_five_documents(collection):
    """
    Reads the first 5 documents from the given ChromaDB collection and displays them.

    Args:
        collection (chromadb.Collection): The ChromaDB collection to read from.

    Returns:
        None
    """
    # Retrieve all documents from the collection
    results = collection.get()
    
    # Extract documents and IDs
    documents = results.get("documents", [])
    ids = results.get("ids", [])
    metadatas = results.get("metadatas", [])

    # Display the first 5 documents
    print(f"Displaying the first {min(len(documents), 5)} documents:")
    for i in range(min(len(documents), 5)):
        print(f"Document ID: {ids[i]}")
        print(f"Content: {documents[i]}")
        if metadatas:
            print(f"Metadata: {metadatas[i]}")
        print("-" * 80)

# Usage Example
display_first_five_documents(collection)