In [1]:
from langchain.document_loaders import WebBaseLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders.image import UnstructuredImageLoader

loader = UnstructuredImageLoader("context/img/course_preq.png")
data = loader.load()

In [2]:
data

[Document(page_content='9322\n\n96\n\n48\n\n4336 4337 6733 9332 9312 9243 9313 3801 9242 3331 3311 9315 3211 <— 3222 32312111 9319 9318 48 UOC OR 6841 [ | 6441 2121 1521 eA 6752 * 3153 3161 2041 9334 6752 6324 2521 3121 3821 OR +— 9321 <— 1531 PD 8 UOC UOC 4920 2511 4141 6714 3131 3900 6721 6451 3421 6452 3411 4418 6771 6080 9844 9444 3431 9491 9433 9517 UOC 3511 4511 9418 «—— 9417 6451 6443 6843 6445 6845 6447 6448 6449 9447 4121 4128 6741 O Systems Theory Al & ML Security', metadata={'source': 'context/img/course_preq.png'})]

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(data)

#
Add Embedding model from HuggingFace

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


#
Add Vector Store from Chroma

In [5]:
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

#
Add Chat Model (Jina Chat)

In [1]:
import os
# jina_api_key = os.environ['JINA_API_KEY']

In [2]:
os.getenv("AI21_API_KEY")

'M207Fk9vvQ6chFNFrGJX5Gip8hlrIYqD'

In [3]:
from langchain.llms import AI21
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [4]:
llm = AI21(ai21_api_key=os.getenv("AI21_API_KEY"))

In [5]:
llm

AI21(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, model='j2-jumbo-instruct', temperature=0.7, maxTokens=256, minTokens=0, topP=1.0, presencePenalty=AI21PenaltyData(scale=0, applyToWhitespaces=True, applyToPunctuations=True, applyToNumbers=True, applyToStopwords=True, applyToEmojis=True), countPenalty=AI21PenaltyData(scale=0, applyToWhitespaces=True, applyToPunctuations=True, applyToNumbers=True, applyToStopwords=True, applyToEmojis=True), frequencyPenalty=AI21PenaltyData(scale=0, applyToWhitespaces=True, applyToPunctuations=True, applyToNumbers=True, applyToStopwords=True, applyToEmojis=True), numResults=1, logitBias=None, ai21_api_key='M207Fk9vvQ6chFNFrGJX5Gip8hlrIYqD', stop=None, base_url=None)

In [1]:
import os
os.getenv("ANYSCALE_API_KEY")


'esecret_wnk9gzt14bcyn1eb7gdgsc1yxk'

In [2]:
from langchain.chat_models import ChatAnyscale

chat = ChatAnyscale(model_name='meta-llama/Llama-2-7b-chat-hf', temperature=1.0, anyscale_api_key=os.environ["ANYSCALE_API_KEY"])


dict_keys(['meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-7b-chat-hf', 'meta-llama/Llama-2-70b-chat-hf', 'codellama/CodeLlama-34b-Instruct-hf'])


In [7]:
from langchain.chat_models import JinaChat
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage

In [8]:
# chat = JinaChat(temperature=0, jinachat_api_key=jina_api_key)

#
Create memory for the conversation

In [9]:
from langchain.memory import ConversationSummaryMemory
memory = ConversationSummaryMemory(llm=chat,memory_key="chat_history",return_messages=True)

#
Creater Retriever from Vector Store

In [10]:
from langchain.chains import ConversationalRetrievalChain

retriever = vectorstore.as_retriever()


#
Promp Template

In [20]:
template = (
    r"""You are a helpful English speaking assistant. Use the following pieces of context to answer the users question. If you cannot find the answer from the pieces of context, just say that you don't know, don't try to make up an answer. 
	---------------- 
	{context}
	"""
)
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{question}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [21]:
chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)

#
Create a chain (ConversationalRetrieverChain) to link the Chat model, Retriever, and Memory together

In [22]:
qa = ConversationalRetrievalChain.from_llm(chat, retriever=retriever, memory=memory, combine_docs_chain_kwargs={'prompt': chat_prompt})

# 
Answer Queries

In [23]:
qa("What are the negatives of COMP3411 at UNSW?")

{'question': 'What are the negatives of COMP3411 at UNSW?',
 'chat_history': [SystemMessage(content='', additional_kwargs={})],
 'answer': 'The disadvantages of taking COMP3411 (Artificial Intelligence) at UNSW include:\n\n1. Poor organization and confusing assessments. \n   - The course structure is poorly organized, making it difficult for students to understand the material and assessments.\n   - Assessments are often irrelevant and confusing, which adds to the overall frustration.\n\n2. Outdated content. \n   - The course content is outdated and does not provide much value in terms of practical knowledge and skills.\n\n3. Lack of responsiveness from course staff. \n   - Despite being aware of the problems, the course staff did not take any action to address them.\n   - Last-minute changes were made without informing the students, leading to further confusion.\n\n4. Negative learning experience. \n   - Many students felt that they did not learn much from the course, resulting in an 