In [1]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import StrOutputParser
from operator import itemgetter
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA


In [2]:
import os
from dotenv import load_dotenv

load_dotenv()  # .env to os.environ

MY_API_KEY = os.getenv("MY_API_KEY")


In [3]:
def file_loader(folder):
    print("- Loading source code files...")
    loader = DirectoryLoader(
        folder,
        glob="**/*.py",
        loader_cls=TextLoader,
        use_multithreading=True
    )
    documents = loader.load()

    return documents

In [4]:
docs = file_loader('my_project')
docs

- Loading source code files...


[Document(metadata={'source': 'my_project\\main.py'}, page_content='def calculate_area(width, height):\n    return width + height\n\ndef greet_user(name):\n    print(f"Hello, {name2}!")\n\nif __name__ == "__main__":\n    greet_user("Viktor")\n\n    area = calculate_area(5, 10)\n    print(f"The area is: {area}")\n'),
 Document(metadata={'source': 'my_project\\utils.py'}, page_content='# utils.py\n\ndef list_to_string(input_list):\n    """Converts a list of items to a comma-separated string."""\n    result = ""\n    for item in input_list:\n        result += str(item) + ", "\n    return result.strip().strip(\',\')\n\ndef add_numbers(a, b):\n    return a + b')]

In [5]:
# Docs to vectior
def split_n_vectirize(docs):
    print("- Splitting documents...")
    #splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
    #docs_split = splitter.create_documents(docs)

    splitter = RecursiveCharacterTextSplitter.from_language(
        language="python",
        chunk_size=1000,
        chunk_overlap=100
    )
    docs_split = splitter.split_documents(docs) 
    #print(docs_split)
    
    print("- Сreating a Vector Storage (FAISS)...")
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=MY_API_KEY)
    vectorstore = FAISS.from_documents(docs_split, embeddings)
    retriever = vectorstore.as_retriever()

    return retriever

In [6]:
retriever = split_n_vectirize(docs)
retriever

- Splitting documents...
- Сreating a Vector Storage (FAISS)...


VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001591C2B8A00>, search_kwargs={})

In [7]:
#LLM initialization
llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=MY_API_KEY)

In [8]:
#Prompt
prompt_template = """
    You are an experienced software engineer who helps find and fix bugs in Python code and add new features.
Use only the information provided below from the source files to find bugs, suggest fixes, or write new code.
If the information provided does not contain an answer, say you cannot help with it.

Provided code:
{context}

Please answer the following prompt:
Question: {question}
Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [9]:
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    You are an experienced software engineer who helps find and fix bugs in Python code and add new features.\nUse only the information provided below from the source files to find bugs, suggest fixes, or write new code.\nIf the information provided does not contain an answer, say you cannot help with it.\n\nProvided code:\n{context}\n\nPlease answer the following prompt:\nQuestion: {question}\nAnswer:\n')

In [10]:
#RAG-chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True # for debuging
)

In [11]:
#Start RAG-system
def get_code_help(query):
    print("\n" + "="*50)
    print(f"Your question: {query}")
    print("="*50)
    response = qa_chain.invoke({"query": query})
    print("Answer:")
    print(response['result'])
    print("\nSource code:")
    for doc in response['source_documents']:
        print("-" * 20)
        print(f"Файл: {doc.metadata['source']}")
        print(doc.page_content)

In [12]:
get_code_help("There is an error in the calculate_area function in main.py. Find it and suggest a fix.")


Your question: There is an error in the calculate_area function in main.py. Find it and suggest a fix.
Answer:
The error in the `calculate_area` function is that it incorrectly calculates the area by adding the `width` and `height` instead of multiplying them. To fix this, you should update the function to multiply the two parameters. 

Here is the corrected function:

```python
def calculate_area(width, height):
    return width * height
```

With this change, the function will correctly calculate the area of a rectangle.

Source code:
--------------------
Файл: my_project\main.py
def calculate_area(width, height):
    return width + height

def greet_user(name):
    print(f"Hello, {name2}!")

if __name__ == "__main__":
    greet_user("Viktor")

    area = calculate_area(5, 10)
    print(f"The area is: {area}")
--------------------
Файл: my_project\utils.py
# utils.py

def list_to_string(input_list):
    """Converts a list of items to a comma-separated string."""
    result = ""
    

In [13]:
get_code_help("There is add_numbers() functiokn in utils.py file. Create same function for multiplication of variables and use it in calculate_area()")


Your question: There is add_numbers() functiokn in utils.py file. Create same function for multiplication of variables and use it in calculate_area()
Answer:
To create a function for multiplication of variables and use it in the `calculate_area()` function, you can follow these steps:

1. Define a new function called `multiply_numbers` in `utils.py`.
2. Modify the `calculate_area` function to use the new multiplication function, instead of adding the width and height.

Here's the updated code:

```python
# utils.py

def list_to_string(input_list):
    """Converts a list of items to a comma-separated string."""
    result = ""
    for item in input_list:
        result += str(item) + ", "
    return result.strip().strip(',')

def add_numbers(a, b):
    return a + b

def multiply_numbers(a, b):
    return a * b  # New multiplication function

# main.py

def calculate_area(width, height):
    return multiply_numbers(width, height)  # Use multiplication for area calculation

def greet_use

In [14]:
get_code_help("""Fix a next bug: 
Traceback (most recent call last):
  File "F:\work\AI\LangChain_01\my_project\main.py", line 8, in <module>
    greet_user("Alice")
  File "F:\work\AI\LangChain_01\my_project\main.py", line 5, in greet_user
    print(f"Hello, {name2}!")
NameError: name 'name2' is not defined. Did you mean: 'name'?
""")



Your question: Fix a next bug: 
Traceback (most recent call last):
  File "F:\work\AI\LangChain_01\my_project\main.py", line 8, in <module>
    greet_user("Alice")
  File "F:\work\AI\LangChain_01\my_project\main.py", line 5, in greet_user
    print(f"Hello, {name2}!")
NameError: name 'name2' is not defined. Did you mean: 'name'?

Answer:
To fix the bug, you need to replace `name2` with `name` in the `greet_user` function. Here's the corrected code:

```python
def greet_user(name):
    print(f"Hello, {name}!")
```

Now, the function correctly uses the `name` parameter that is passed to it. After making this change, the code will work without raising a `NameError`.

Source code:
--------------------
Файл: my_project\main.py
def calculate_area(width, height):
    return width + height

def greet_user(name):
    print(f"Hello, {name2}!")

if __name__ == "__main__":
    greet_user("Viktor")

    area = calculate_area(5, 10)
    print(f"The area is: {area}")
--------------------
Файл: my_pr