In [26]:
import json
from minsearch import AppendableIndex
import chat_assistant_2
from chat_assistant_2 import IPythonChatInterface, Tools, ChatAssistant
from groq import Groq
import requests 
import os
from typing import Any, Dict, List
import inspect
import markdown

In [2]:
docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [4]:
index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x797cb05bd6d0>

In [5]:
index.search('Can i still join the course?')

[{'text': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',
  'section': 'General course-related questions',
  'question': 'The course has already started. Can I still join it?',
  'course': 'machine-learning-zoomcamp'},
 {'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp'},
 {'text': "Here’s how you join a in Slack: https://slack.com/

In [6]:
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [31]:
class CourseFAQTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results


    def add_entry(self, question: str, answer: str):
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)




In [8]:
faq_tools = CourseFAQTools(index)

In [9]:
tools = Tools()
tools.add_tools(faq_tools)
tools.get_tools()

[{'type': 'function',
  'function': {'name': 'add_entry',
   'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
   'parameters': {'type': 'object',
    'properties': {'question': {'type': 'string',
      'description': 'question parameter'},
     'answer': {'type': 'string', 'description': 'answer parameter'}},
    'required': ['question', 'answer'],
    'additionalProperties': False}}},
 {'type': 'function',
  'function': {'name': 'search',
   'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
   'parameters': {'type': 'object',
    'properties': {'query': {'type': 'string',
      'description': 'query parameter'}},
    'required': ['query

In [17]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Before making any function calls, explain your reasoning why you want to perform something.

When searching in our FAQ, perform multiple search queries with diffierently phrased questions.

At the end, as the user a question to make it more engaging
""".strip()

interface = IPythonChatInterface()

chat = chat_assistant_2.ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    interface=interface,
    groq_client=client
)

In [28]:
chat.run()

Chat ended


In [32]:
# Check specifically for user-added entries in data engineering zoomcamp
user_de_entries = [
    doc for doc in index.docs 
    if doc.get('section') == 'user added' 
    and doc.get('course') == 'data-engineering-zoomcamp'
]

print(f"Total user-added data engineering entries: {len(user_de_entries)}")

if user_de_entries:
    print("\nLast user-added entry:")
    last_entry = user_de_entries[-1]
    print(f"Question: {last_entry['question']}")
    print(f"Answer: {last_entry['text']}")
    print(f"Section: {last_entry['section']}")
    print(f"Course: {last_entry['course']}")
else:
    print("No user-added entries found for data engineering zoomcamp")

Total user-added data engineering entries: 0
No user-added entries found for data engineering zoomcamp


In [33]:
# Check if both tools are properly registered
print("Registered tools:")
for tool in tools.get_tools():
    print(f"- {tool['function']['name']}: {tool['function']['description']}")

Registered tools:
- add_entry: Add a new entry to the FAQ database.

Args:
    question (str): The question to be added to the FAQ database.
    answer (str): The corresponding answer to the question.
- search: Search the FAQ database for entries matching the given query.

Args:
    query (str): Search query text to look up in the course FAQ.

Returns:
    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
