In [1]:
import streamlit as st
from PIL import Image
from PyPDF2 import PdfReader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from fastapi import FastAPI
import os

In [2]:
import os
os.environ["OPENAI_API_KEY"] = ""
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [3]:
# provide the path of  pdf file/files.
pdfreader1 = PdfReader('sustainability-16-01864-v2.pdf')
pdfreader2 = PdfReader('AI Adoption Strategy.pdf')

In [4]:
from typing_extensions import Concatenate
# Read text from the first PDF
raw_text_1 = ''
for page in pdfreader1.pages:
    content = page.extract_text()
    if content:
        raw_text_1 += content

# Read text from the second PDF
raw_text_2 = ''
for page in pdfreader2.pages:
    content = page.extract_text()
    if content:
        raw_text_2 += content

# Combine raw text from both PDFs
raw_text_combined = raw_text_1 + ' ' + raw_text_2

In [5]:
raw_text_combined

"Citation: Badghish, S.; Soomro, Y.A.\nArtificial Intelligence Adoption by\nSMEs to Achieve Sustainable Business\nPerformance: Application of\nTechnology–Organization–\nEnvironment Framework.\nSustainability 2024 ,16, 1864.\nhttps://doi.org/10.3390/su16051864\nAcademic Editor: Alina Badulescu\nReceived: 28 September 2023\nRevised: 21 December 2023\nAccepted: 25 December 2023\nPublished: 24 February 2024\nCopyright: ©2024 by the authors.\nLicensee MDPI, Basel, Switzerland.\nThis article is an open access article\ndistributed under the terms and\nconditions of the Creative Commons\nAttribution (CC BY) license (https://\ncreativecommons.org/licenses/by/\n4.0/).\nsustainability\nArticle\nArtificial Intelligence Adoption by SMEs to Achieve Sustainable\nBusiness Performance: Application of Technology–Organization–\nEnvironment Framework\nSaeed Badghish\n and Yasir Ali Soomro *\nFaculty of Economics and Administration, King Abdulaziz University, Jeddah 21589, Saudi Arabia;\nsbadghish@kau.edu.

In [6]:
# Split the combined text
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=800,
    chunk_overlap=200,
    length_function=len,
)
texts = text_splitter.split_text(raw_text_combined)

In [8]:
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts, embeddings)
retriever = vectorstore.as_retriever()
chain = load_qa_chain(llm, chain_type="stuff")

In [9]:
### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


### Answer question ###
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [10]:
### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


In [11]:
conversational_rag_chain.invoke(
    {"input": "Give me the summary of ai adoption strategy?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'The AI adoption strategy involves various approaches with advantages and disadvantages outlined in existing adoption frameworks. It is recommended that organizations adopt AI sooner rather than later, considering their specific requirements and concerns around adoption. The framework is applicable across industries and government sectors, with variations based on factors like industry dynamics, competitive intensity, and organization size.'

In [None]:
conversational_rag_chain.invoke(
    {"input": "What are his sayings?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

"I don't have information on specific sayings or quotes from Zoë Baird."

In [None]:
import pickle
# Pickle objects for later use in Streamlit
with open('chatbot.pkl', 'wb') as f:
    pickle.dump((vectorstore, chain), f)

TypeError: cannot pickle '_thread.RLock' object