# AI Lawyer: Legal Document Q&A System

In [9]:
pip install openai faiss-cpu flask sentence-transformers numpy requests tqdm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [15]:
import os
import openai
import faiss
import fitz
import numpy as np

from sentence_transformers import SentenceTransformer
from flask import Flask, request, jsonify

### Set up OpenAI keys

In [13]:
os.environ["OPENAI_API_KEY"]="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"
openai.api_key = os.getenv("OPENAI_API_KEY")

#### RAG Implementation

**Extract Text From Apple Terms & Conditions**

In [14]:
pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: pymupdf
[0mSuccessfully installed pymupdf-1.25.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:
def extract_test_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

pdf_text = extract_test_from_pdf("AppleTerms.pdf")
print("Extracted Text: ", pdf_text[:500])

Extracted Text:  ENGLISH
IMPORTANT: BY USING YOUR iPHONE OR iPAD (“DEVICE”), YOU ARE AGREEING TO BE BOUND 
BY THE FOLLOWING TERMS:
A.     APPLE iOS AND iPadOS SOFTWARE LICENSE AGREEMENT
B.     APPLE PAY & WALLET TERMS AND CONDITIONS 
C.     NOTICES FROM APPLE
APPLE INC
iOS AND iPadOS SOFTWARE LICENSE AGREEMENT
Single Use License
PLEASE READ THIS SOFTWARE LICENSE AGREEMENT (“LICENSE”) CAREFULLY BEFORE 
USING YOUR DEVICE OR DOWNLOADING THE SOFTWARE UPDATE ACCOMPANYING THIS 
LICENSE. BY USING YOUR DEVICE OR DOWNLOA


**Chunk the Text**

In [18]:
def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

**Generate and Store Embeddings**

In [19]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def store_in_vector_db(text_chunks):
    embeddings = embedder.encode(text_chunks, convert_to_numpy=True)
    d = embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(embeddings)
    return index, text_chunks

**Perform Semantic Search**

In [20]:
def retrieve_relevant_text(query, index, text_chunks):
    query_embedding = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_embedding, 1)
    return text_chunks[I[0][0]]

#### Integrate with OpenAI GPT-3.5 Turbo

In [21]:
def generate_legal_response(query, context):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a legal assistant who provides clear and accurate legal explanations."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
        ]
    )
    return response["choices"][0]["message"]["content"]

In [22]:
app = Flask(__name__)

@app.route("/ask", methods=["POST"])
def ask():
    user_query = request.json.get("query")
    context = retrieve_relevant_text(user_query, index, text_chunks)
    response = generate_legal_response(user_query, context)
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://10.0.4.144:5000
Press CTRL+C to quit
