In [1]:
#Loading pdf file
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('NLP-2_Problem Statement-1.pdf')
doc = loader.load()
doc

[Document(metadata={'source': 'NLP-2_Problem Statement-1.pdf', 'page': 0}, page_content=' \n \n©Great  Learning.  Proprietary  content.  All Rights  Reserved.  Unauthorised  use or distribution  prohibited  MODULE  \nPROJECT  AIML   \n'),
 Document(metadata={'source': 'NLP-2_Problem Statement-1.pdf', 'page': 1}, page_content=' \n \n• DOMAIN : Digital  content  and  entertainment  industry  \n• CONTEXT : The  objective  of this  project  is to build  a text  classification  model  that  analyses  the customer\'s  sentiments  \nbased  on their  reviews  in the IMDB  database.  The  model  uses  a complex  deep  learning  model  to build  an embedding  layer  \nfollowed  by a classification  algorithm  to analyse  the sentiment  of the customers.  \n• DATA  DESCRIPTION : The  Dataset  of 50,000  movie  reviews  from  IMDB,  labelled  by sentiment  (positive/negative).  Reviews  \nhave been  preprocessed,  and  each  review  is encoded  as a sequence  of word  indexes  (integers).  For con

In [2]:
#split document
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
split_doc = splitter.split_documents(doc)

In [3]:
#Create vector datbase
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(split_doc,HuggingFaceEmbeddings())

  from tqdm.autonotebook import tqdm, trange


In [4]:
#Loading Ollama model
from langchain_community.llms import Ollama
llm = Ollama(model='llama2')

In [42]:
from langchain_core.messages import SystemMessage, trim_messages, HumanMessage, AIMessage


trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=llm,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant"),
 HumanMessage(content="hi! I'm bob"),
 AIMessage(content='hi!'),
 HumanMessage(content='I like vanilla ice cream'),
 AIMessage(content='nice'),
 HumanMessage(content='whats 2 + 2'),
 AIMessage(content='4'),
 HumanMessage(content='thanks'),
 AIMessage(content='no problem!'),
 HumanMessage(content='having fun?'),
 AIMessage(content='yes!')]

In [43]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages(
[
    ('system',"""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer.
Please give accurate and precise answer.
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}"""),
MessagesPlaceholder(variable_name='messages')
]
)

In [44]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=RunnablePassthrough.assign(messages=itemgetter("messages")|trimmer) |create_stuff_documents_chain(llm,prompt)
#document_chain1 = RunnablePassthrough.assign(messages=itemgetter("messages") | document_chain

In [45]:
#create retriever
db_retriever = db.as_retriever()

In [46]:
#create retruval chain
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(db_retriever,document_chain)

In [49]:
response=retrieval_chain.invoke({"messages": messages,
                                 "input":"How many steps are there in the project?"})

In [51]:
response['answer']

'Based on the provided context, there are 8 steps in the project. They are:\n\n1. Import and analyze the data set.\n2. Perform relevant sequence adding on the data.\n3. Print the shape of features and labels.\n4. Print the value of any one feature and its label.\n5. Decode the feature value to get the original sentence.\n6. Design, train, tune, and test a sequential model.\n7. Get length of each sentence.\n8. Define parameters.'

# Adding Chat history

In [52]:
### Contextualize question ###
from langchain.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


history_retriever = create_history_aware_retriever(llm,db_retriever,contextualize_q_prompt)

In [53]:
rag_chain = create_retrieval_chain(history_retriever,document_chain)

In [54]:
#Managing state chat history
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
store = {}

def get_message_history(session_id :str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    
    return store[session_id]

conv_rag_chain = RunnableWithMessageHistory(rag_chain,get_message_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",)

In [56]:
response = conv_rag_chain.invoke({"messages": messages,
    'input':"How many steps are there in the project"},config={
        "configurable": {"session_id": "ad12"}
    },)

In [57]:
response['answer']

'Based on the provided context, there are a total of 8 steps in the project. These steps are:\n\n1. Import and analyse the data set.\n2. Perform relevant sequence adding on the data.\n3. Print the shape of features and labels.\n4. Print the value of any one feature and its label.\n5. Decode the feature value to get the original sentence.\n6. Design, train, tune, and test a sequential model.\n7. Get length of each sentence.\n8. Create features and labels.'

In [58]:
response = conv_rag_chain.invoke({"messages": messages,
    'input':"What is the objective of this"},config={
        "configurable": {"session_id": "ad12"}},)

In [59]:
response['answer']

'The objective of the system is to build a sequential NLP classifier that can use input text parameters to determine customer sentiments. Specifically, the tasks and steps involved in the project are:\n\n1. Import and analyze the data set: 5 marks\n\t* Use `imdb.load_data()` method\n\t* Get train and test sets\n\t* Take the top 10000 most frequent words\n2. Perform relevant sequence adding on the data: 5 marks\n\t* Add sequential data to the input text parameters\n3. Perform following data analysis: 5 marks\n\t* Print shape of features and labels\n\t* Print value of any one feature and its label\n4. Decode the feature value to get original sentence: 5 marks\n\t* Use GloVe embeddings to decode the feature values\n5. Design, train, tune, and test a sequential model: 5 marks\n\t* Use LSTM model with GloVe embeddings as input features\n\t* Train the model using the data set\n\t* Tune the model by adjusting the hyperparameters\n\t* Test the model on a new dataset to evaluate its performance

In [60]:
response = conv_rag_chain.invoke({"messages": messages,
    'input':"What is the objective of this"},config={
        "configurable": {"session_id": "ad15"}},)

In [61]:
response['answer']

'The objective of the project is to build a sequential NLP classifier that can use input text parameters to determine customer sentiments. The steps and tasks involved in the project are:\n\n1. Import and analyze the data set. (5 marks)\n2. Perform relevant sequence adding on the data. (5 marks)\n3. Perform data analysis, including printing the shape of features and labels, and printing the value of any one feature and its label. (5 marks)\n4. Decode the feature value to get the original sentence. (5 marks)\n5. Design, train, tune, and test a sequential model. (5 marks)\n\nThe project also provides a reference link for collecting supplementary data. (1 mark)\n\nTotal score: 30 marks'