In [None]:
!pip install requests beautifulsoup4



In [None]:
!pip install PyPDF2
!pip install langchain
!pip install openai
!pip install faiss-cpu
!pip install tiktoken
!pip install langchain-community

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
!pip install langchain-community langchain-openai faiss-cpu pypdf beautifulsoup4 requests


Collecting langchain-openai
  Downloading langchain_openai-0.3.3-py3-none-any.whl.metadata (2.7 kB)
Collecting pypdf
  Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain-core<0.4.0,>=0.3.32 (from langchain-community)
  Downloading langchain_core-0.3.33-py3-none-any.whl.metadata (6.3 kB)
Downloading langchain_openai-0.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf-5.2.0-py3-none-any.whl (298 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.7/298.7 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.33-py3-none-any.whl (412 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.7/412.7 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf, langchain-core, langchain-openai
  Attempting uninstall: langchain-core
    Found existing install

In [7]:
import os
import requests
import PyPDF2
from bs4 import BeautifulSoup
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from google.colab import userdata

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

def download_and_extract_ipc():
    pdf_path = "/content/IPC_186045_removed_removed.pdf"

    # Direct PDF download without scraping
    if not os.path.exists(pdf_path):
        url = "https://www.indiacode.nic.in/repealedfileopen?rfilename=A1860-45.pdf"
        response = requests.get(url, stream=True)

        if response.status_code == 200:
            with open(pdf_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print("IPC Document Downloaded Successfully!")
        else:
            print("Failed to download the IPC PDF.")
            return None

    text = extract_text_from_pdf(pdf_path)

    if text.strip():
        with open("Indian_Penal_Code.txt", "w", encoding="utf-8") as text_file:
            text_file.write(text)
        print("Text extracted and saved successfully!")
        return text
    else:
        print("No extractable text found in the PDF.")
        return None

def create_chatbot():
    openai_key = userdata.get("OPENAI_API_KEY")

    if not openai_key:
        raise ValueError("OpenAI API Key not found. Make sure to store it in Colab secrets.")

    os.environ["OPENAI_API_KEY"] = openai_key

    text = download_and_extract_ipc()

    if not text:
        print("No text available for chatbot training.")
        return

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_text(text)

    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_texts(texts, embeddings, normalize_L2=True)

    llm = ChatOpenAI(model_name="gpt-3.5-turbo")
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    print("Chatbot is ready! Type 'exit' to stop.")
    while True:
        query = input("You: ")
        if query.lower() == "exit":
            break
        response = qa_chain.invoke(query)
        print("Bot:")
        print(response["result"].replace(". ", "."))

if __name__ == "__main__":
    create_chatbot()


Text extracted and saved successfully!
Chatbot is ready! Type 'exit' to stop.
You: whats the dco about
Bot:
The document is the Indian Penal Code, which is a comprehensive criminal code of India that covers all substantive aspects of criminal law.It includes definitions of various criminal acts, explanations of legal terms, general exceptions, and provisions related to punishments for different offenses.The document outlines the scope of the code, definitions of terms such as "person," "public servant," and "document." It also addresses counterfeit practices, fraud, possession of property, and definition of "dishonestly" and "fraudulently." The Code further explains the concept of "reason to believe," the term "movable property," and "wrongful gain" and "wrongful loss." It also includes definitions of "valuable security," "electronic record," and many other legal terms used in the context of criminal law in India.
You: list any 5 code with the law
Bot:
1.Section 17: Defines "Government

KeyboardInterrupt: Interrupted by user