In [35]:
import os
from dotenv import load_dotenv
load_dotenv()

API_KEY = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = API_KEY

In [36]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.vectorstores.faiss import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI
import pickle
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

In [95]:
def pretty_print_res_qa(res, include_sources=True):
    print(f"[USER]: {res['question']}")
    print(f"[PenDragon]: {res['output_text']}\n")
    print("[Passages Consulted]:")
    
    for idx, source in enumerate(res["input_documents"]):
        print(f"--- [{idx+1}] {source.page_content}")

def ask_n_print(retriever, chain, query):
    docs = retriever.get_relevant_documents(query)
    res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
    pretty_print_res_qa(res)

In [74]:
embeddings = OpenAIEmbeddings()

In [75]:
data_w_sources = None
with open("./data/everything_ultra_processed_w_sources.pkl", "rb") as f:
    data_w_sources = pickle.load(f)

In [76]:
texts, sources = list(zip(*data_w_sources))

In [77]:
# Create & Save vectorstore
# db = FAISS.from_texts(texts, embeddings, metadatas=[{"source": source} for source in sources])
# with open("./db_courses_processed_w_sources.pkl", "wb") as f:
#     pickle.dump(db, f)

In [78]:
db = None
with open("./db_courses_processed_w_sources.pkl", "rb") as f:
    db = pickle.load(f)

retriever = db.as_retriever(search_kwargs={"k": 6})

In [79]:
chain = load_qa_with_sources_chain(OpenAI(temperature=0.5), chain_type="stuff")

In [80]:
query = "Which CS professors are on leave?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)

In [86]:
pretty_print_res_qa(res)

[USER]: Which CS professors are on leave?
[PenDragon]:  Samuel McCauley, Aaron M. Williams, and Shikha Singh are CS professors on leave. 
SOURCES: https://catalog.williams.edu/pdf/csci.pdf

[PenDragon Sources Consulted]:
--- [1] Samuel McCauley, Assistant Professor of Computer Science; on leave 2022-2023 

--- [2] Aaron M. Williams, Associate Professor of Computer Science; on leave Spring 2023 

--- [3] Shikha Singh, Assistant Professor of Computer Science; on leave 2022-2023 

--- [4] Iris Howley, Assistant Professor of Computer Science 

--- [5] Mark Hopkins, Assistant Professor of Computer Science 

--- [6] Bill K. Jannen, Assistant Professor of Computer Science 



In [89]:
query = "What are the prereqs for CS 379?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
pretty_print_res_qa(res)

[USER]: What are the prereqs for CS 379?
[PenDragon]:  The prerequisites for CS 379 are CSCI 136, and either CSCI 256 or STAT 201/202.
SOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=379&crsid=021861&req_year=0

[Passages Consulted]:
--- [1] CSCI 379 prereqs: CSCI 136, and either CSCI 256 or STAT 201/202.

--- [2] CSCI 378 prereqs: CSCI 136, and at least one of CSCI 237, 256, or 334

--- [3] CSCI 345 prereqs: CSCI 237

--- [4] CSCI 397 prereqs: Permission of department

--- [5] CSCI 331 prereqs: CSCI 237

--- [6] CSCI 432 prereqs: CSCI 237 and either CSCI 256 or 334



In [90]:
query = "On what days does CSCI 381 meet?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
pretty_print_res_qa(res)

[USER]: On what days does CSCI 381 meet?
[PenDragon]:  CSCI 381 meets on Mondays, Wednesdays, and Fridays.
SOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=381&crsid=022329&req_year=0

[Passages Consulted]:
--- [1] CSCI 381 departmentNotes: 

--- [2] CSCI 331 classFormat: This Course Has Twice-Weekly Lecture Meetings As Well As A Weekly Lab Meeting., extraInfo: , crossListing: ['CSCI 331'], components: ['Laboratory', 'Lecture']

--- [3] CSCI 381 Section 01 year: 2024, semester: Spring, courseID: 022329, sectionType: in-person, peoplesoftNumber: 3244, classType: Lecture, instructors: [{'id': 13529, 'name': 'Mark Hopkins'}], meetings: [{'days': 'MWF', 'start': '11:00', 'end': '11:50', 'facility': ''}]

--- [4] CSCI 381 Section 02 year: 2024, semester: Spring, courseID: 022329, sectionType: in-person, peoplesoftNumber: 3932, classType: Lecture, instructors: [{'id': 13529, 'name': 'Mark Hopkins'}], meetings: [{'days': 'MWF', 'start': '12:00', 'end': '12:50', 'facility': ''}]

--

In [91]:
query = "Can I take Intro To Computer Security as a fifth course?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
pretty_print_res_qa(res)

[USER]: Can I take Intro To Computer Security as a fifth course?
[PenDragon]:  Yes, you can take Intro To Computer Security as a fifth course.
SOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=331&crsid=020794&req_year=0

[Passages Consulted]:
--- [1] CSCI 331 titleLong: Introduction To Computer Security, titleShort: Intro To Computer Security

--- [2] CSCI 134 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [3] CSCI 498 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [4] CSCI 497 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [5] CSCI 494 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [6] CSCI 398 gradingBasisDesc: Pass/Fail Available, Fifth Course Available



In [97]:
query = "Can I take CSCI 331 as a fifth course?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
pretty_print_res_qa(res)

[USER]: Can I take CSCI 331 as a fifth course?
[PenDragon]:  No, you cannot take CSCI 331 as a fifth course.
SOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=331&crsid=020794&req_year=0

[Passages Consulted]:
--- [1] CSCI 134 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [2] CSCI 331 gradingBasisDesc: No Pass/Fail and No Fifth Course

--- [3] CSCI 497 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [4] CSCI 398 gradingBasisDesc: Pass/Fail Available, Fifth Course Available

--- [5] CSCI 136 gradingBasisDesc: Pass/Fail Available, Fifth Course Unavailable

--- [6] CSCI 493 gradingBasisDesc: Pass/Fail Available, Fifth Course Available



In [98]:
query = "Is CSCI 345 offered in the fall, spring, or both in 2024?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
pretty_print_res_qa(res)

[USER]: Is CSCI 345 offered in the fall, spring, or both in 2024?
[PenDragon]:  CSCI 345 is offered in the spring semester in 2024.
SOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=345&crsid=022328&req_year=0

[Passages Consulted]:
--- [1] CSCI 345 Section 01 year: 2024, semester: Spring, courseID: 022328, sectionType: in-person, peoplesoftNumber: 3206, classType: Lecture, instructors: [{'id': 13530, 'name': 'James M. Bern'}], meetings: [{'days': 'TR', 'start': '09:55', 'end': '11:10', 'facility': ''}]

--- [2] CSCI 345 enrolmentPreferences: Current or expected Computer Science majors

--- [3] CSCI 345 Section 03 year: 2024, semester: Spring, courseID: 022328, sectionType: in-person, peoplesoftNumber: 3208, classType: Laboratory, instructors: [{'id': 13530, 'name': 'James M. Bern'}], meetings: [{'days': 'T', 'start': '14:30', 'end': '16:00', 'facility': ''}]

--- [4] CSCI 345 Section 02 year: 2024, semester: Spring, courseID: 022328, sectionType: in-person, peoplesoftNumber:

In [99]:
query = "Is CSCI 345 offered in the fall, spring, or both in 2024?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
res

{'output_text': ' CSCI 345 is offered in the spring semester of 2024.\nSOURCES: https://catalog.williams.edu/csci/detail/?strm=&cn=345&crsid=022328&req_year=0'}

In [100]:
query = "Is CSCI 345 offered in the fall, spring, or both in 2024?"
docs = retriever.get_relevant_documents(query)
res = chain({"input_documents": docs, "question": query}, return_only_outputs=False)
res

{'input_documents': [Document(page_content="CSCI 345 Section 01 year: 2024, semester: Spring, courseID: 022328, sectionType: in-person, peoplesoftNumber: 3206, classType: Lecture, instructors: [{'id': 13530, 'name': 'James M. Bern'}], meetings: [{'days': 'TR', 'start': '09:55', 'end': '11:10', 'facility': ''}]\n", metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=345&crsid=022328&req_year=0'}),
  Document(page_content='CSCI 345 enrolmentPreferences: Current or expected Computer Science majors\n', metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=345&crsid=022328&req_year=0'}),
  Document(page_content="CSCI 345 Section 03 year: 2024, semester: Spring, courseID: 022328, sectionType: in-person, peoplesoftNumber: 3208, classType: Laboratory, instructors: [{'id': 13530, 'name': 'James M. Bern'}], meetings: [{'days': 'T', 'start': '14:30', 'end': '16:00', 'facility': ''}]\n", metadata={'source': 'https://catalog.williams.edu/csci/detail/?strm=&cn=3