In [1]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [2]:
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter your LangSmith API key: ")
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your openai API key: ")

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [4]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

In [5]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = InMemoryVectorStore(embeddings)

In [8]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

In [None]:
# # Load and chunk contents of the blog
# from langchain_community.document_loaders import WebBaseLoader

# loader = WebBaseLoader("https://cs2240.graphics")
# docs = loader.load()

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# all_splits = text_splitter.split_documents(docs)

# # Index chunks
# _ = vector_store.add_documents(documents=all_splits)

In [14]:
# from langchain_unstructured import UnstructuredLoader
# file_paths = ["csci_courses.txt"]
# loader = UnstructuredLoader(file_paths)
# docs = loader.load()

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
# all_splits = text_splitter.split_documents(docs)

# # Index chunks
# _ = vector_store.add_documents(documents=all_splits)

In [13]:
from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path


path = "web_scrape/web_scrape_output/csci_courses_with_descriptions.json"

#data = json.loads(Path(path).read_text())
loader = JSONLoader(
         file_path=path,
         jq_schema='.courses',
         text_content=False)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [15]:
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [16]:
response = graph.invoke({"question": "Tell me good courses on machine learning?"})
print(response["answer"])

INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Some good courses on machine learning include "Deep Learning," which covers key techniques such as convolutional and recurrent neural networks, and offers hands-on practice with implementations in Python. Another course focuses on the theoretical foundations and algorithm design in machine learning with provable guarantees. These courses provide a comprehensive understanding of both practical applications and theoretical concepts in the field.


In [17]:
response = graph.invoke({"question": "What is Instructor: K. Fisler teaching?"})
print(response["answer"])

INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Instructor K. Fisler is teaching a course focused on proving properties about systems and programs. The course includes studying the distinction between programs and specifications, alongside the use of tools like model constructors and proof assistants. It emphasizes real-world applications through problems and projects.


In [18]:
# response = graph.invoke({"question": "If i want to pursue a career as a AI Enginner in graphics, is this course good?"})
# print(response["answer"])

In [19]:
response = graph.invoke({"question": "Is D. Ritchie teaching any course?"})
print(response["answer"])

INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Yes, D. Ritchie is teaching a course titled "Advanced Computer Graphics" (CSCI 2240) that meets MWF from 11-11:50 AM. The course is scheduled to run from January 22, 2025, to May 16, 2025.
