In [1]:
# Installing Required Libraries
%pip install python-docx
%pip install python-pptx
%pip install PyPDF2
%pip install langchain
%pip install langchain_community
%pip install langchain_google_genai
%pip install langchain_text_splitters
%pip install sentence-transformers
%pip install faiss-cpu
%pip install cohere

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2
Note: you may need to restart the kernel to use updated packages.
Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading XlsxWriter-3.2.0-py3-none-any.whl.metadata (2.6 kB)
Downloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m10.3 MB/s[0m eta [36

In [2]:
# necessary Imports
from docx import Document
from PyPDF2 import PdfReader
from pptx import Presentation
from langchain_community.llms import Cohere
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts  import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder

In [8]:

pdf_file = open('/kaggle/input/history-book/NCERT-Class-10-History.pdf', 'rb')

In [10]:
pdf_text = ""
pdf_reader = PdfReader(pdf_file)
for page in pdf_reader.pages:
    pdf_text += page.extract_text()

In [12]:
all_text = pdf_text 
len(all_text)

425639

In [13]:
# splitting the text into chunks for embeddings creation

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000, 
        chunk_overlap = 200, 
        length_function = len,
        separators=['\n', '\n\n', ' ', '']
    )
    
chunks = text_splitter.split_text(text = all_text)

In [14]:
len(chunks)

544

In [15]:
import os
os.environ['HuggingFaceHub_API_Token']= 'Your API'
os.environ['GOOGLE_API_KEY']= "Your API"
os.environ['cohere_api_key'] = "Your API"

In [17]:
# Initializing embeddings model

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [18]:
# Indexing the data using FAISS
vectorstore = FAISS.from_texts(chunks, embedding = embeddings)

In [19]:
# creating retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [20]:
retrieved_docs = retriever.invoke("How did the Swadeshi Movement influence Indian industries in the early 20th century?")

In [21]:
len(retrieved_docs)

6

In [22]:
print(retrieved_docs[0].page_content)

rates from the colonial government; and they invested in mining,indigo and jute. Most of these were products required primarily for
export trade and not for sale in India.
When Indian businessmen began setting up industries in the late
nineteenth century, they avoided competing with Manchester goods
in the Indian market. Since yarn was not an important part of British
imports into India, the early cotton mills in India produced coarsecotton yarn (thread) rather than fabric. When yarn was imported it
was only of the superior variety. The yarn produced in Indian spinning
mills was used by handloom weavers in India or exported to China.
By the first decade of the twentieth century a series of changes
affected the pattern of industrialisation. As the swadeshi movement
gathered momentum, nationalists mobilised people to boycott foreigncloth. Industrial groups organised themselves to protect their collective
interests, pressurising the government to increase tariff protection


In [23]:
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                not contained in the context, say "answer not available in context" \n\n
                Context: \n {context}?\n
                Question: \n {question} \n
                Answer:"""

prompt = PromptTemplate.from_template(template=prompt_template)

In [24]:
# function to create a single string of relevant documents given by Faiss.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [25]:
# RAG Chain

def generate_answer(question):
    cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('cohere_api_key'))
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | cohere_llm
        | StrOutputParser()
    )
    
    return rag_chain.invoke(question)

In [26]:
ans = generate_answer("How did the Swadeshi Movement influence Indian industries in the early 20th century?")
print(ans)

  cohere_llm = Cohere(model="command", temperature=0.1, cohere_api_key = os.getenv('cohere_api_key'))


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
 The Swadeshi Movement encouraged Indian businessmen to produce Indian substitutes for foreign goods and promote domestic industries. It led to the expansion of certain crafts and industries like cotton spinning, but the weavers and craftspeople who comprised this industry often struggled to thrive financially. In response to the movement, industrialists organized to protect their collective interests and pressure the government for increased tariff protection. 


In [27]:
ans = generate_answer("Who is virat kohli")
print(ans)

 answer not available in context


In [28]:
ans = generate_answer("How did the East India Company contribute to the opium trade with China in the 19th century?")
print(ans)

 The East India Company grew opium in India and exported it to China and, with the money earned through this sale, it financed its tea and other imports from China. Many Indians became junior players in this trade, providing finance, procuring supplies, and shipping consignments, and some of these businessmen had visions of developing industrial enterprises in India. In Bengal, Dwarkanath Tagore made his fortune in the China trade before he turned to industrial investment, setting up six joint-stock companies in the 1830s and 1840s, and in Bombay, Parsis like Dinshaw Petit and Jamsetjee Nusserwanjee Tata who built huge industrial empires in India, accumulated their initial wealth partly from exports to China. 


In [29]:
ans = generate_answer("What was the impact of British manufactured goods on the Indian market during the 19th century?")
print(ans)

 The introduction of British manufactured goods into the Indian market during the 19th century had a significant impact on India's existing textile industry, leading to its decline. Indian weavers and craftspeople faced difficulties competing with cheaper, machine-made goods, which led to many losing their livelihoods. This shift resulted in a decline in the share of cotton textiles from India, from 30% of exports around 1800 to 15% by 1815 and below 3% by the 1870s. 


In [30]:
ans = generate_answer("What is the primary goal of the project?")
print(ans)

 The primary goal of the project is to examine the history of industrialization and global interconnectedness,
specifically looking at the movement of trade, migration, and capital. The project aims to analyze the phases
through which the modern global world has emerged by examining the significant historical events and
factors that have contributed to its development. 


In [31]:
ans = generate_answer("Which machine learning algorithms are utilized in the project?")
print(ans)

 answer not available in context


In [32]:
ans = generate_answer("What preprocessing techniques are used in the project?")
print(ans)

 answer not available in context


In [34]:
ans = generate_answer("What significant political change occurred in France as a result of the French Revolution in 1789?")
print(ans)

 The French Revolution saw a transfer of sovereignty from the monarchy to the body of French citizens.  As a result, the people who constitute the nation would be able to shape its destiny.  This was a significant change from absolute monarchic control and contributed to the rise of nationalism.  This would pave the way for the spread of nationalist ideology abroad, most notably through the French armies.  
