In [19]:
from glob import glob
from utils import to_markdown

In [20]:
files = glob("../data/text files/* www.stevens.edu program *")
print(len(files))
files

186


['../data/text files\\https_  www.stevens.edu program accelerated-chemical-biology-bachelor-degree.txt',
 '../data/text files\\https_  www.stevens.edu program actuarial-mathematics-quantitative-risk-masters.txt',
 '../data/text files\\https_  www.stevens.edu program Advanced-Systems-Engineering-graduate-certificate.txt',
 '../data/text files\\https_  www.stevens.edu program algorithmic-trading-strategies.txt',
 '../data/text files\\https_  www.stevens.edu program analytics-mba-and-graduate-management-program-dual-degree-masters-program.txt',
 '../data/text files\\https_  www.stevens.edu program analytics-mba.txt',
 '../data/text files\\https_  www.stevens.edu program application-machine-learning-pharmaceutical-development-graduate-certificate.txt',
 '../data/text files\\https_  www.stevens.edu program applied-artificial-intelligence-masters-program-online.txt',
 '../data/text files\\https_  www.stevens.edu program applied-artificial-intelligence-masters.txt',
 '../data/text files\\http

In [21]:
for file in files:
    if "machine" in file:
        print(file)

../data/text files\https_  www.stevens.edu program application-machine-learning-pharmaceutical-development-graduate-certificate.txt
../data/text files\https_  www.stevens.edu program machine-learning-masters-ses-grad-online.txt
../data/text files\https_  www.stevens.edu program machine-learning-masters.txt
../data/text files\https_  www.stevens.edu program ssb-machine-learning-finance-certificate.txt


In [22]:
def extract_program_name(path):
    return path.split(" ")[-1].split(".")[0].replace("-", " ").title()

In [23]:
def assign_program_type(path):
    if "master" in path: return "Masters/MS"
    if "bachelor" in path: return "Bachelors/BS"
    return "Certificate"

In [24]:
def assign_program_availability(path):
    if "online" in path: return "Online and On Campus"
    return "On Campus"

In [25]:
main = {}
for path in files:
    program_type = assign_program_type(path)
    program_availability = assign_program_availability(path)
    program_name = extract_program_name(path)
    with open(path, "r") as fp:
        page_content = fp.read()
    main[path] = {"program_type": program_type,
                 "program_availability": program_availability,
                 "program_name": program_name,
                 "page_content": page_content}

In [26]:
main["../data/text files\https_  www.stevens.edu program machine-learning-masters.txt"]

{'program_type': 'Masters/MS',
 'program_availability': 'On Campus',
 'program_name': 'Machine Learning Masters',
 'page_content': 'Get on the fast track to make an impact in one of today\'s fastest growing fields with a machine learning master\'s degree.\nThe machine learning master\'s program establishes the theoretical and practical foundations necessary to be at the forefront of progress in the next technological revolution. Advancements made in machine learning and related disciplines will soon touch every piece of technology, making an advanced degree an essential asset for a successful career.\nThe rapidly expanding field of machine learning has a multitude of applications in diverse areas such as intelligent systems, computer vision, speech recognition, natural language processing, robotics, finance, information retrieval, bioinformatics, healthcare, and weather prediction.\nOur unique machine learning master\'s program develops a thorough understanding of deep learning theory 

In [27]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [28]:
model_kwargs = {
    "trust_remote_code": True,
    # "device": "cpu"
    }
encode_kwargs={"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

In [29]:
docs = [
    Document(
        page_content=main[doc]["page_content"],
        metadata={"program_type": main[doc]["program_type"],
                "program_availability": main[doc]["program_availability"],
                "program_name": main[doc]["program_name"]}
    )
    for doc in main
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(docs, embeddings)

In [30]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

metadata_field_info = [
    AttributeInfo(
        name="program_type",
        description="The type of the program offered by the university ['Masters/MS', 'Bachelors/BS', 'Certificate']",
        type="string",
    ),
    AttributeInfo(
        name="program_availability",
        description="The mode in which the program will be available and delivered to the students['Online and On Campus', 'On Campus']",
        type="string",
    ),
    AttributeInfo(
        name="program_name",
        description="The name of the Program",
        type="string",
    ),
]
document_content_description = "Details regarding the course"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)

In [33]:
question = "What is Masters in Machine Learning alll about?"
context = retriever.invoke(question)
context

[Document(page_content='Get on the fast track to make an impact in one of today\'s fastest growing fields with a machine learning master\'s degree.\nThe machine learning master\'s program establishes the theoretical and practical foundations necessary to be at the forefront of progress in the next technological revolution. Advancements made in machine learning and related disciplines will soon touch every piece of technology, making an advanced degree an essential asset for a successful career.\nThe rapidly expanding field of machine learning has a multitude of applications in diverse areas such as intelligent systems, computer vision, speech recognition, natural language processing, robotics, finance, information retrieval, bioinformatics, healthcare, and weather prediction.\nOur unique machine learning master\'s program develops a thorough understanding of deep learning theory and familiarizes you with the most important paradigms. This allows you to apply or develop the appropriate 

In [34]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [   
        ("system", "You are an assistant for question-answering tasks related to Stevens Institute Of Technology."),
        ("human", """ 
        Use the following pieces of retrieved context to answer the question.
        If you don't know the answer, just say that you don't know.
        If the topic is related to a course then ensure to mention to course numbers and display the result as a table.
        Answer in markdown format and render tables without code 
        Question: {question}
        Context: {context}
        Answer:"""),
    ]
)
chat_template

ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an assistant for question-answering tasks related to Stevens Institute Of Technology.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=" \n        Use the following pieces of retrieved context to answer the question.\n        If you don't know the answer, just say that you don't know.\n        If the topic is related to a course then ensure to mention to course numbers and display the result as a table.\n        Answer in markdown format and render tables without code \n        Question: {question}\n        Context: {context}\n        Answer:"))])

In [35]:
combine_docs_chain = create_stuff_documents_chain(llm, chat_template)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [36]:
response = retrieval_chain.invoke({"input": question, "question": question})
to_markdown(response["answer"])

> ### Masters in Machine Learning at Stevens Institute of Technology
> 
> The Masters in Machine Learning program at Stevens Institute of Technology is designed to equip students with the knowledge and skills to be at the forefront of progress in the rapidly expanding field of machine learning. Here are some key points about the program:
> 
> - **Program Overview**:
>   - Establishes theoretical and practical foundations necessary for success in the next technological revolution.
>   - Develops a thorough understanding of deep learning theory and important paradigms.
>   - Offers a flexible curriculum with part-time or full-time enrollment options.
>   - Provides thesis and non-thesis tracks, as well as internship opportunities.
>   - Available fully online with Curricular Practical Training (CPT) for international students.
> 
> - **Admissions Requirements**:
>   - Bachelor's degree from an accredited institution with a minimum GPA of 3.0.
>   - Official college transcripts, two letters of recommendation, and TOEFL/IELTS scores for international students.
>   - Competitive GRE or GMAT score (not required for part-time students).
> 
> - **Career Opportunities**:
>   - Graduates can pursue roles such as Research Scientist, Machine Learning Engineer, Data Scientist, Business Intelligence Developer, and more.
>   - Alumni have been hired at top companies like Amazon, Bloomberg, Facebook, Google, IBM, and Intel.
> 
> - **Location and Industry Connections**:
>   - Located in the heart of the New York City metropolitan area with networking opportunities at over 7,500 tech companies.
>   - Strong emphasis on practical skills needed for industry work.
> 
> - **Course Sequence**:
>   - The program offers a suggested term-by-term sequence of courses, including foundational courses in machine learning, deep learning, and artificial intelligence.
>   - Elective courses cover topics like Natural Language Processing, Deep Learning, and Applied Machine Learning.
> 
> For more detailed information on specific courses and elective options, students can refer to the Academic Catalog for the most up-to-date offerings.
> 
> Stevens Institute of Technology's Machine Learning Master's program aims to prepare students for successful careers in industry, academia, or research by providing a comprehensive and practical education in this rapidly evolving field.