In [42]:
from dotenv import load_dotenv
import pickle
import os
from langchain.retrievers import TFIDFRetriever
from langchain.schema import Document

In [43]:
load_dotenv()
API_KEY = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = API_KEY

In [44]:
db = None
with open("../../data/dev/db_cs_with_sources.pkl", "rb") as f:
    db = pickle.load(f)

In [59]:
query = "On what days does Deep Learning meet?"
res = db.similarity_search_with_relevance_scores(query, k=12) # score_threshold

In [None]:
res

In [68]:
query = "On what days does Deep Learning meet?"
res = db.search(query, k=12, search_type="mmr") # score_threshold

In [69]:
res

[Document(page_content="CSCI 381 (Deep Learning) Section 01 year: 2024, semester: Spring, courseID: 022329, sectionType: in-person, peoplesoftNumber: 3244, classType: Lecture, instructors: [{'id': 13529, 'name': 'Mark Hopkins'}], meetings: [{'days': 'MWF', 'start': '11:00', 'end': '11:50', 'facility': ''}]\n", metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=381&crsid=022329&req_year=0'}),
 Document(page_content="CSCI 374 (Machine Learning) classReqEval: Presentations, problem sets, programming exercises, empirical analyses of algorithms, critical analysis of current literature; the final two weeks are focused on a project of the student's design.\n", metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=374&crsid=017427&req_year=0'}),
 Document(page_content='CSCI 381 (Deep Learning) prereqs: CSCI 136 and fulfillment of the Discrete Mathematics Proficiency requirement\n', metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=38

In [65]:
res

[(Document(page_content='CSCI 361 (Theory Of Computation) prereqs: CSCI 256 or both a 300-level MATH course and permission of instructor\n', metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=361&crsid=010814&req_year=0'}),
  0.8150098322326814),
 (Document(page_content='MAJOR REQUIREMENTS Core Courses: Computer Science 237 Computer Organization, Computer Science 256 Algorithm Design and Analysis, Computer Science 334 Principles of Programming Languages, Computer Science 361 Theory of Computation \n', metadata={'source': 'https://catalog.williams.edu/pdf/csci.pdf'}),
  0.8128031496851965),
 (Document(page_content='MAJOR REQUIREMENTS Introductory Courses: Computer Science 134 Introduction to Computer Science, Computer Science 136 Data Structures and Advanced Programming \n', metadata={'source': 'https://catalog.williams.edu/pdf/csci.pdf'}),
  0.8122888106193938),
 (Document(page_content='CSCI 256 (Algorithm Design And Analysis) prereqs: CSCI 136 and fulfillment of t

In [66]:
query = "How many computer labs does the Computer Science Department have?"
res = db.similarity_search_with_relevance_scores(query, k=12, score_threshold=0.75)

In [67]:
res

[(Document(page_content='LABORATORY FACILITIES: The Computer Science Department maintains five departmental computer laboratories for students taking Computer Science courses, as well as a lab that can be configured for teaching specialized topics such as robotics. The workstations in these laboratories also support student and faculty research in computer science.\n', metadata={'source': 'https://catalog.williams.edu/pdf/csci.pdf'}),
  0.861918494908609),
 (Document(page_content='INTRODUCTORY COURSES: The department offers a choice of introductory courses; Computer Science 102 The Socio-Techno Web, 103 Electronic Textiles, 104 Data Science and Computation for All, and 134 Introduction to Computer Science.\n', metadata={'source': 'https://catalog.williams.edu/pdf/csci.pdf'}),
  0.7952909866149502),
 (Document(page_content='Alternate Year Courses: Computer Science 102, 103, 104, and our electives are each usually offered at least every other year. All other Computer Science courses are 

In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectordb = Chroma(persist_directory="../../data/dev/cs_courses_vectorstore", embedding_function=embeddings)

Using embedded DuckDB with persistence: data will be stored in: ../../data/dev/cs_courses_vectorstore


In [38]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info=[
    AttributeInfo(
        name="department",
        description="The academic department", 
        type="string"
    ),
    AttributeInfo(
        name="course_number",
        description="The number that comes after the department in the course name", 
        type="string"
    ),
    AttributeInfo(
        name="year",
        description="The year this course is being offered", 
        type="string"
    ),
    AttributeInfo(
        name="semester",
        description="The semester this course is being offered (Fall or Spring)", 
        type="string"
    )
]

document_content_description = "Information about a college department's courses and sections of each course"

In [39]:
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(llm, vectordb, document_content_description, metadata_field_info, verbose=True, kwargs={"k": 6})
retriever.search_kwargs["k"] = 6
retriever.search_type = "mmr"

In [40]:
res = retriever.get_relevant_documents("Which CSCI course taught in the Fall of 2024 will teach me about dynamic programming?")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


query='dynamic programming' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='department', value='CSCI'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='year', value='2024'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='semester', value='Fall')])


In [41]:
res

[Document(page_content="CSCI 136: Data Structures And Advanced Programming, Section 01\n Year: 2024, Semester: Fall, Section type: in-person, Class type: Lecture, Meetings: [{'days': 'MWF', 'start': '09:00', 'end': '09:50', 'facility': ''}], Instructors: ['James M. Bern']", metadata={'source_url': 'https://catalog.williams.edu/csci/detail/?strm=&cn=136&crsid=010803&req_year=0', 'department': 'CSCI', 'course_number': 136, 'year': 2024, 'semester': 'Fall'}),
 Document(page_content="CSCI 104: Data Science And Computing For All, Section 01\n Year: 2024, Semester: Fall, Section type: in-person, Class type: Lecture, Meetings: [{'days': 'MWF', 'start': '10:00', 'end': '10:50', 'facility': ''}], Instructors: ['Stephen N. Freund']", metadata={'source_url': 'https://catalog.williams.edu/csci/detail/?strm=&cn=104&crsid=022117&req_year=0', 'department': 'CSCI', 'course_number': 104, 'year': 2024, 'semester': 'Fall'}),
 Document(page_content="CSCI 334: Principles Of Programming Languages, Section 0