In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage



In [2]:
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT="chatraj"

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4", api_key=OPENAI_API_KEY)

In [4]:
OPENAI_API_KEY

'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'

In [5]:
llm.invoke("What is Task Decomposition?")

AIMessage(content='Task decomposition is a process in project management and systems engineering, where a large task or project is broken down into smaller, more manageable sub-tasks. This is done to make the project more understandable, easier to manage and monitor. By decomposing tasks, teams can work on individual components without affecting the development of other components. This method also helps in identifying potential risks at the earliest stage.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 78, 'prompt_tokens': 13, 'total_tokens': 91, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-0613', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-53e15b71-0543-4aa4-87a0-ff04d2e9ce6d-0', usage_metadata={'input_tokens': 13, 'output

In [11]:
output_parser = StrOutputParser()

In [8]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY))

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is the process of breaking down a complex task into smaller, simpler steps. Models are instructed to "think step by step" to utilize more computation to decompose hard tasks. This technique transforms big tasks into multiple manageable tasks and provides insights into the model\'s thinking process.'

In [9]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
prompt.invoke({"topic": "programming"})

ChatPromptValue(messages=[HumanMessage(content='Tell me a short joke about programming', additional_kwargs={}, response_metadata={})])

In [14]:
chain = prompt | llm | output_parser
chain.invoke({"topic": "programer"})

"Why don't programmers like nature?\n\nBecause it has too many bugs."

In [None]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from typing import List
from langchain_core.documents import Document

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

docx_loader = Docx2txtLoader(r"C:\Users\rajes\PycharmProjects\chatraj\Data\Case Studies GenAI.docx")
documents = docx_loader.load()

print(len(documents))

splits = text_splitter.split_documents(documents)

print(f"Split the documents into {len(splits)} chunks.")

1
Split the documents into 10 chunks.


In [23]:
documents[0]

Document(metadata={'source': 'C:\\Users\\rajes\\PycharmProjects\\chatraj\\Data\\Case Studies GenAI.docx'}, page_content="AI-Powered Blog Content Creator\n\nIndustry: Digital Marketing and Content Creation\n\nProblem: In the world of digital marketing, it's tough to keep up with the demand for fresh, interesting, and relevant blog content. Content creators often struggle with the pressure to constantly produce new material, which can lead to burnout and a drop in the quality of their work.\n\nSolution: Development of an AI-powered tool that helps writers and marketers generate original and engaging blog posts. This tool can come up with topic ideas, structure articles, write drafts, and even fine-tune the tone and style to better match the intended audience.\n\nAI Implementation:\n\nNatural Language Processing (NLP): This tool uses advanced GenAI techniques to create text that sounds natural and fits the style and complexity that the content needs.\n\nContent Personalization: It analyze

In [24]:
splits[1]

Document(metadata={'source': 'C:\\Users\\rajes\\PycharmProjects\\chatraj\\Data\\Case Studies GenAI.docx'}, page_content='Content Personalization: It analyzes past successful posts and audience interactions to customize future content, making sure it resonates well with readers.\n\nBenefits to Client:\n\nConsistent Content Production: Keep up a continuous flow of high-quality blog posts, which is key for maintaining and growing reader interest and driving traffic to the website.\n\nEnhanced Content Quality: The AI helps make each post better and more attractive to both readers and search engines.\n\nScalability: It’s easy to increase content production without needing a lot more resources, which helps the business grow and reach more readers.\n\nTime and Cost Efficiency: Save time and money in content creation, allowing for quicker content turnaround and the opportunity to use resources for other important tasks.\n\nOutcome: This AI tool greatly cuts down the effort and time needed to c

In [26]:
splits[0].page_content

"AI-Powered Blog Content Creator\n\nIndustry: Digital Marketing and Content Creation\n\nProblem: In the world of digital marketing, it's tough to keep up with the demand for fresh, interesting, and relevant blog content. Content creators often struggle with the pressure to constantly produce new material, which can lead to burnout and a drop in the quality of their work.\n\nSolution: Development of an AI-powered tool that helps writers and marketers generate original and engaging blog posts. This tool can come up with topic ideas, structure articles, write drafts, and even fine-tune the tone and style to better match the intended audience.\n\nAI Implementation:\n\nNatural Language Processing (NLP): This tool uses advanced GenAI techniques to create text that sounds natural and fits the style and complexity that the content needs.\n\nContent Personalization: It analyzes past successful posts and audience interactions to customize future content, making sure it resonates well with reader

In [30]:
# 1. Function to load documents from a folder

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

# Load documents from a folder
folder_path = r"C:\Users\rajes\PycharmProjects\chatraj\Data"
documents = load_documents(folder_path)

print(f"Loaded {len(documents)} documents from the folder.")
splits = text_splitter.split_documents(documents)
print(f"Split the documents into {len(splits)} chunks.")

Loaded 1 documents from the folder.
Split the documents into 10 chunks.


In [31]:
embeddings = OpenAIEmbeddings()

# 4. Embedding Documents

document_embeddings = embeddings.embed_documents([split.page_content for split in splits])

print(f"Created embeddings for {len(document_embeddings)} document chunks.")

Created embeddings for 10 document chunks.


In [35]:
len(document_embeddings[0])

1536

In [36]:
from langchain_chroma import Chroma

embedding_function = OpenAIEmbeddings()
collection_name = "my_collection"
vectorstore = Chroma.from_documents(collection_name=collection_name, documents=splits, embedding=embedding_function, persist_directory="./chroma_db")
#db.persist()

print("Vector store created and persisted to './chroma_db'")

Vector store created and persisted to './chroma_db'


In [39]:
query = "What is the use of AI driven chatbot"
search_results = vectorstore.similarity_search(query, k=2)

print(f"\nTop 2 most relevant chunks for the query: '{query}'\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query: 'What is the use of AI driven chatbot'

Result 1:
Source: C:\Users\rajes\PycharmProjects\chatraj\Data\Case Studies GenAI.docx
Content: Outcome: This AI tool greatly cuts down the effort and time needed to create high-quality content. 























AI-Driven Chatbot for Enhanced Customer Support

Industry: Customer Support and Operations

Problem: Standard chatbots often fail to resolve complex customer queries effectively, leading to a poor user experience and increased workload for human agents.

Solution: Implement a smart chatbot powered by Generative AI and Large Language Models, utilizing helpdesk articles and user profile data to provide accurate, personalized assistance. 

AI Implementation:

Content Integration:The chatbot utilizes an extensive database of helpdesk articles and user profiles to inform its responses, ensuring that answers are both accurate and highly relevant to each user's specific query. It uses Vector Datab

In [40]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever.invoke("What is the use of AI driven chatbot")

[Document(metadata={'source': 'C:\\Users\\rajes\\PycharmProjects\\chatraj\\Data\\Case Studies GenAI.docx'}, page_content="Outcome: This AI tool greatly cuts down the effort and time needed to create high-quality content. \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAI-Driven Chatbot for Enhanced Customer Support\n\nIndustry: Customer Support and Operations\n\nProblem: Standard chatbots often fail to resolve complex customer queries effectively, leading to a poor user experience and increased workload for human agents.\n\nSolution: Implement a smart chatbot powered by Generative AI and Large Language Models, utilizing helpdesk articles and user profile data to provide accurate, personalized assistance. \n\nAI Implementation:\n\nContent Integration:The chatbot utilizes an extensive database of helpdesk articles and user profiles to inform its responses, ensuring that answers are both accurate and highly relevant to each user's specific query. It uses Vector Database such as Pinecone t

In [41]:
from langchain_core.prompts import ChatPromptTemplate
template = """Answer the question based only on the following context:
{context}

Question: {question}

Answer: """
prompt = ChatPromptTemplate.from_template(template)

In [44]:
def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [45]:
from langchain.schema.runnable import RunnablePassthrough
rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()} | prompt
)
rag_chain.invoke("What is the use of AI driven chatbot")

ChatPromptValue(messages=[HumanMessage(content="Answer the question based only on the following context:\nOutcome: This AI tool greatly cuts down the effort and time needed to create high-quality content. \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAI-Driven Chatbot for Enhanced Customer Support\n\nIndustry: Customer Support and Operations\n\nProblem: Standard chatbots often fail to resolve complex customer queries effectively, leading to a poor user experience and increased workload for human agents.\n\nSolution: Implement a smart chatbot powered by Generative AI and Large Language Models, utilizing helpdesk articles and user profile data to provide accurate, personalized assistance. \n\nAI Implementation:\n\nContent Integration:The chatbot utilizes an extensive database of helpdesk articles and user profiles to inform its responses, ensuring that answers are both accurate and highly relevant to each user's specific query. It uses Vector Database such as Pinecone to store, search 

In [47]:
rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
question = "What is the use of AI driven chatbot??"
response = rag_chain.invoke(question)
print(response)

The AI-driven chatbot is used to handle customer service operations, particularly routine queries, thereby reducing the workload of human agents. It provides personalized support by using detailed user data and past interactions. It also offers the flexibility of handing off to human agents when necessary for more complex or sensitive issues. In the financial services industry, it can greatly cut down the effort and time needed to create high-quality content. The chatbot uses an extensive database of helpdesk articles and user profile data to provide accurate and personalized assistance.


# Conversation Rag

In [51]:
from langchain_core.messages import HumanMessage, AIMessage
chat_history = []
chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=response)
])

In [53]:
from langchain_core.prompts import MessagesPlaceholder
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# history_aware_retriever = create_history_aware_retriever(
#     llm, retriever, contextualize_q_prompt
# )
contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()
contextualize_chain.invoke({"input": "Where are its uses?", "chat_history": chat_history})

'What are the applications of AI-driven chatbots?'

In [54]:
from langchain.chains import create_history_aware_retriever
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
history_aware_retriever.invoke({"input": "Where are its uses?", "chat_history": chat_history})

[Document(metadata={'source': 'C:\\Users\\rajes\\PycharmProjects\\chatraj\\Data\\Case Studies GenAI.docx'}, page_content="Outcome: This AI tool greatly cuts down the effort and time needed to create high-quality content. \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAI-Driven Chatbot for Enhanced Customer Support\n\nIndustry: Customer Support and Operations\n\nProblem: Standard chatbots often fail to resolve complex customer queries effectively, leading to a poor user experience and increased workload for human agents.\n\nSolution: Implement a smart chatbot powered by Generative AI and Large Language Models, utilizing helpdesk articles and user profile data to provide accurate, personalized assistance. \n\nAI Implementation:\n\nContent Integration:The chatbot utilizes an extensive database of helpdesk articles and user profiles to inform its responses, ensuring that answers are both accurate and highly relevant to each user's specific query. It uses Vector Database such as Pinecone t

In [55]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    #  ("system", "Tell me joke on Programming"),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [56]:
rag_chain.invoke({"input": "Where are its uses?", "chat_history":chat_history})

{'input': 'Where are its uses?',
 'chat_history': [HumanMessage(content='What is the use of AI driven chatbot??', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The AI-driven chatbot is used to handle customer service operations, particularly routine queries, thereby reducing the workload of human agents. It provides personalized support by using detailed user data and past interactions. It also offers the flexibility of handing off to human agents when necessary for more complex or sensitive issues. In the financial services industry, it can greatly cut down the effort and time needed to create high-quality content. The chatbot uses an extensive database of helpdesk articles and user profile data to provide accurate and personalized assistance.', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={'source': 'C:\\Users\\rajes\\PycharmProjects\\chatraj\\Data\\Case Studies GenAI.docx'}, page_content="Outcome: This AI tool greatly cuts down the