In [27]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://unswfounders.com/startup-year")
data = loader.load()

In [28]:
data

[Document(page_content='\n\n400 Bad Request\n\n\n\n\n\n400 Bad Request\nPlease visit status.squarespace.com for updates\n\n\nmt1sLB6d/plEWwYvN @ Wed, 20 Sep 2023 05:00:48 GMT\nSEC-43\n\n\n\n', metadata={'source': 'https://unswfounders.com/startup-year', 'title': '400 Bad Request'})]

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(data)

#
Add Embedding model from HuggingFace

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


#
Add Vector Store from Chroma

In [5]:
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

#
Add Chat Model (Jina Chat)

In [6]:
import os
jina_api_key = os.environ['JINA_API_KEY']

In [7]:
from langchain.chat_models import JinaChat
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage

In [8]:
chat = JinaChat(temperature=0, jinachat_api_key=jina_api_key)

#
Create memory for the conversation

In [9]:
from langchain.memory import ConversationSummaryMemory
memory = ConversationSummaryMemory(llm=chat,memory_key="chat_history",return_messages=True)

#
Creater Retriever from Vector Store

In [10]:
from langchain.chains import ConversationalRetrievalChain

retriever = vectorstore.as_retriever()


#
Promp Template

In [20]:
template = (
    r"""You are a helpful English speaking assistant. Use the following pieces of context to answer the users question. If you cannot find the answer from the pieces of context, just say that you don't know, don't try to make up an answer. 
	---------------- 
	{context}
	"""
)
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{question}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [21]:
chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)

#
Create a chain (ConversationalRetrieverChain) to link the Chat model, Retriever, and Memory together

In [22]:
qa = ConversationalRetrievalChain.from_llm(chat, retriever=retriever, memory=memory, combine_docs_chain_kwargs={'prompt': chat_prompt})

# 
Answer Queries

In [23]:
qa("What are the negatives of COMP3411 at UNSW?")

{'question': 'What are the negatives of COMP3411 at UNSW?',
 'chat_history': [SystemMessage(content='', additional_kwargs={})],
 'answer': 'The disadvantages of taking COMP3411 (Artificial Intelligence) at UNSW include:\n\n1. Poor organization and confusing assessments. \n   - The course structure is poorly organized, making it difficult for students to understand the material and assessments.\n   - Assessments are often irrelevant and confusing, which adds to the overall frustration.\n\n2. Outdated content. \n   - The course content is outdated and does not provide much value in terms of practical knowledge and skills.\n\n3. Lack of responsiveness from course staff. \n   - Despite being aware of the problems, the course staff did not take any action to address them.\n   - Last-minute changes were made without informing the students, leading to further confusion.\n\n4. Negative learning experience. \n   - Many students felt that they did not learn much from the course, resulting in an 