In [21]:
!pip install -q langchain langchain-community chromadb sentence-transformers openai tiktoken python-dotenv langchain_openai

In [22]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [23]:
import os
from typing import Dict, Any

In [24]:
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("API_KEY")
project_id = os.getenv("PROJECT_ID")

client = OpenAI(api_key=api_key, project=project_id)

In [25]:
%pip install -qU pypdf

Note: you may need to restart the kernel to use updated packages.


In [26]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader


file_path = "data/"
loader = PyPDFDirectoryLoader(file_path)
docs = loader.load()
print(len(docs))
docs[144]
# loader = PyPDFLoader(file_path)
# pages = []
# async for page in loader.alazy_load():
#     pages.append(page)

1218


Document(metadata={'producer': 'PyPDF', 'creator': 'Google', 'creationdate': '', 'title': 'Introduction to Conda & File I/O Review.pptx', 'source': 'data/introtoconda_fileIO-P1W3D1-3-24.pdf', 'total_pages': 44, 'page': 7, 'page_label': '8'}, page_content='Demonstration of opening VSCode')

In [27]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(api_key=api_key)

solutions_db = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./solutions")
solutions_db.persist()

In [28]:
solutions_retriever = solutions_db.as_retriever(search_kwargs={"k": 3})

In [35]:
def solution_proposer(student_bug: str) -> str:

    context = solutions_retriever.get_relevant_documents(student_bug)
    prompt = f"""
        Student Error Message: {student_bug} 
        You are a master expert at teaching, implementing code and debugging code for students at a data science fellowship.
        Use the solutions context which contains class slides and PDF to point students to where they can refer to
        to find the solution or topic to their question. If possible include phase, week and day and page number - extract this information from the title of the pdf and display it like Phase 1, Week 1, Day 1 (P represents Phase, W represents week and D represents day)
        {context}
        """

    resp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
        )
    return resp.choices[0].message.content

In [None]:
def solution_checker(student_bug: str, solution: str) -> str: 
    prompt = f"""
        Review this response and make sure the answer was not directly given to the student.
        If it does, please update it and remove any given solutions.
        You should point to one or more resource(s) and only guide student and give hints to the solution. Be sure that the phase, week and day is included if it was provided by the slides. 
        Follow this format:
        
        This is the student's error message: {student_bug}
        This is the response {solution}
        Direct this message to the student.
    """

    resp = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
        )

    return resp.choices[0].message.content

In [53]:
def handle_bug(issue: str):
    # print("Student bug:", issue, "\n")
    draft = solution_proposer(issue)
    # print("Proposed Solution:", draft, "\n")
    final_response = solution_checker(issue, draft)
    print("Debugger:", final_response, "\n")

In [54]:
student_issue_1 = f"""
   def divide(a, b):
    return a / b
print(divide(10, 0))
Error: ZeroDivisionError: division by zero
"""

In [55]:
handle_bug(student_issue_1)

Debugger: It looks like you're encountering a `ZeroDivisionError` in your code when trying to divide by zero. This is a common error in programming, and it's important to handle such cases to prevent your program from crashing.

To guide you in resolving this issue, I recommend reviewing the materials from your fellowship. Specifically, you can check the following resources:

- **Phase 1, Week 1, Day 1** (First Steps in Programming, Page 46): This document covers basic programming concepts, including how to handle mathematical operations. While it may not directly address exceptions, it provides foundational knowledge that is crucial for understanding how to manage errors in your code.

Additionally, consider exploring the concept of `try` and `except` blocks in Python. This will help you catch and handle errors gracefully. If you have any questions about this or need further hints, feel free to ask! 



Evaluation for student issue 1: 

In [51]:
student_issue_2 = f"""
    SELECT name, COUNT(*) 
    FROM customers 
    WHERE signup_date > '2023-01-01' 
    ORDER BY COUNT(*) DESC;
    Error: column "customers.name" must appear in the GROUP BY clause or be used in an aggregate function.
    """

In [56]:
handle_bug(student_issue_2)

Debugger: It looks like you're encountering an error related to the use of the `COUNT()` function in your SQL query. The error message indicates that when you use aggregate functions like `COUNT()`, all non-aggregated columns must be included in the `GROUP BY` clause.

To resolve this, think about how you can structure your query to group the results appropriately. Consider what you need to include in the `GROUP BY` clause to ensure that your query runs without errors.

For additional guidance, I recommend reviewing the class slides and PDF. Specifically, look at **Phase 1, Week 9, Day 1** of the **"Introduction to Structured Databases I"** document. This section should provide you with valuable insights into SQL grouping and aggregation functions that will help you understand how to adjust your query correctly. 



In [None]:
student_issue_3 = f"""
