In [None]:
!pip install requests beautifulsoup4



In [None]:
!pip install PyPDF2
!pip install langchain
!pip install openai
!pip install faiss-cpu
!pip install tiktoken
!pip install langchain-community

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
!pip install langchain-community langchain-openai faiss-cpu pypdf beautifulsoup4 requests


Collecting langchain-openai
  Downloading langchain_openai-0.3.3-py3-none-any.whl.metadata (2.7 kB)
Collecting pypdf
  Downloading pypdf-5.2.0-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain-core<0.4.0,>=0.3.32 (from langchain-community)
  Downloading langchain_core-0.3.33-py3-none-any.whl.metadata (6.3 kB)
Downloading langchain_openai-0.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf-5.2.0-py3-none-any.whl (298 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.7/298.7 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.33-py3-none-any.whl (412 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.7/412.7 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf, langchain-core, langchain-openai
  Attempting uninstall: langchain-core
    Found existing install

In [8]:
import os
import requests
import PyPDF2
from bs4 import BeautifulSoup
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from google.colab import userdata

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

def download_and_extract_ipc():
    pdf_path = "/content/IPC_186045_removed_removed.pdf"

    # Direct PDF download without scraping
    if not os.path.exists(pdf_path):
        url = "https://www.indiacode.nic.in/repealedfileopen?rfilename=A1860-45.pdf"
        response = requests.get(url, stream=True)

        if response.status_code == 200:
            with open(pdf_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print("IPC Document Downloaded Successfully!")
        else:
            print("Failed to download the IPC PDF.")
            return None

    text = extract_text_from_pdf(pdf_path)

    if text.strip():
        with open("Indian_Penal_Code.txt", "w", encoding="utf-8") as text_file:
            text_file.write(text)
        print("Text extracted and saved successfully!")
        return text
    else:
        print("No extractable text found in the PDF.")
        return None

def create_chatbot():
    openai_key = userdata.get("OPENAI_API_KEY")

    if not openai_key:
        raise ValueError("OpenAI API Key not found. Make sure to store it in Colab secrets.")

    os.environ["OPENAI_API_KEY"] = openai_key

    text = download_and_extract_ipc()

    if not text:
        print("No text available for chatbot training.")
        return

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_text(text)

    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_texts(texts, embeddings, normalize_L2=True)

    llm = ChatOpenAI(model_name="gpt-3.5-turbo")
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    print("Chatbot is ready! Type 'exit' to stop.")
    while True:
        query = input("You: ")
        if query.lower() == "exit":
            break
        response = qa_chain.invoke(query)
        print("Bot:")
        print(response["result"].replace(". ", "."))

if __name__ == "__main__":
    create_chatbot()


Text extracted and saved successfully!
Chatbot is ready! Type 'exit' to stop.
You: top 5 law
Bot:
It seems like you're asking for a list of the top 5 laws or legal regulations mentioned in the Indian Penal Code context provided earlier.Here are the top 5 laws based on the content:

1.**Punishment for Murder** - Defined under Chapters XII, XIII of the Indian Penal Code.
2.**Punishment for Theft** - Also addressed in multiple sections under Chapter VII of the Penal Code.
3.**Offences Relating to Marriage** - Contains various laws about marriage-related crimes per Chapter XX of the Penal Code.
4.**Criminal Intimidation, Insult, and Annoyance** - Under Chapter XXII, this covers laws regarding intimidation, insult, and nuisance.
5.**Criminal Breach of Contracts of Service** - Found in Chapter XIX and deals with breaches of contractual agreements regarding service.

These laws and regulations are significant within the Indian legal framework and are essential for maintaining societal order a

In [9]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.42.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.0-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m54.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m79.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[

In [15]:
%%writefile app.py

import os
import requests
import PyPDF2
import streamlit as st
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA

# Function to extract text from the PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

# Function to download and extract IPC data
def download_and_extract_ipc():
    pdf_path = "/content/IPC_186045_removed_removed.pdf"

    # Direct PDF download
    if not os.path.exists(pdf_path):
        url = "https://www.indiacode.nic.in/repealedfileopen?rfilename=A1860-45.pdf"
        response = requests.get(url, stream=True)

        if response.status_code == 200:
            with open(pdf_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            st.success("IPC Document Downloaded Successfully!")
        else:
            st.error("Failed to download the IPC PDF.")
            return None

    text = extract_text_from_pdf(pdf_path)

    if text.strip():
        with open("Indian_Penal_Code.txt", "w", encoding="utf-8") as text_file:
            text_file.write(text)
        st.success("Text extracted and saved successfully!")
        return text
    else:
        st.error("No extractable text found in the PDF.")
        return None

# Initialize chatbot
def create_chatbot():
    openai_key = os.getenv("OPENAI_API_KEY")

    if not openai_key:
        st.error("OpenAI API Key not found. Please set it in environment variables.")
        return None

    text = download_and_extract_ipc()

    if not text:
        st.error("No text available for chatbot training.")
        return None

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_text(text)

    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_texts(texts, embeddings, normalize_L2=True)

    llm = ChatOpenAI(model_name="gpt-3.5-turbo")
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    return qa_chain

# Streamlit UI
st.set_page_config(page_title="IPC Chatbot", page_icon="⚖️", layout="wide")

st.title("⚖️ Indian Penal Code (IPC) Chatbot")
st.write("Ask legal questions related to the Indian Penal Code (IPC).")

if "qa_chain" not in st.session_state:
    st.session_state.qa_chain = create_chatbot()

user_input = st.text_input("Enter your question:", placeholder="e.g., What is Section 302 in IPC?")
if st.button("Ask"):
    if user_input and st.session_state.qa_chain:
        response = st.session_state.qa_chain.invoke(user_input)
        st.subheader("Response:")
        st.write(response["result"])
    else:
        st.error("Chatbot is not ready. Please check API key or reload the app.")

st.markdown("---")
st.caption("Powered by OpenAI and LangChain")


Overwriting app.py


In [16]:
!npm install -g localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
changed 22 packages in 958ms
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K

In [17]:
!streamlit run /content/app.py &>/content/logs.txt &

In [18]:
!wget -q -O - https://loca.lt/mytunnelpassword

34.23.216.100

In [None]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://shaggy-moose-stop.loca.lt
