# Sample Notebook
This is a sample Jupyter Notebook uploaded to GitHub.

In [23]:
# 1. GPU check
!nvidia-smi

# 2. Install dependencies
!pip -q install -U transformers datasets sentencepiece accelerate huggingface_hub python-dotenv feedparser beautifulsoup4 sacrebleu rouge-score gradio



# 3. Clone or pull repo
from getpass import getpass
token = getpass("Enter your GitHub PAT: ")
USERNAME = "sukritichawla"
REPO = "legaldocs_ST"
REPO_URL = f"https://{token}@github.com/{USERNAME}/{REPO}.git"
!git clone $REPO_URL


# 4. Go inside repo
%cd /content/legaldocs_ST

# 5. Hugging Face login
from huggingface_hub import login
login()

Sat Sep 13 06:57:54 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   46C    P8             12W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [24]:
from transformers import pipeline

# Choose a simplification fine-tuned model (change to any HF model above)
model_name = "agentlans/flan-t5-small-simplifier"  # or "mrm8488/t5-small-finetuned-text-simplification"

simplifier = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=-1)  # device=0 for GPU

legal_text = """The service provider reserves the unilateral right to amend, revise, or update these terms and policies annually on 01/01 or at any other reasonable time with thirty (30) days’ notice, and continued use of the service shall constitute acceptance of such amendments."""
# Many T5-style models expect an instruction prefix; try both:
inputs = "simplify: " + legal_text
out = simplifier(inputs, max_length=512, do_sample=False)   # deterministic output

print(out[0]["generated_text"])


Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The service provider reserves the right to amend, revise, or update these terms and policies annually on 01/01 or at any other reasonable time with thirty (30) days’ notice, and continued use of the service shall constitute acceptance of such amendments.


In [38]:
from transformers import pipeline

# ---------------- 1️⃣ Instruction-following model ----------------
simplifier = pipeline("text2text-generation", model="google/flan-t5-base")

# ---------------- 2️⃣ General cleanup dictionary ----------------
general_cleanup_dict = {
    # Rights and obligations
    "reserves the unilateral right to": "may",
    "reserves the right to": "may",
    "shall be entitled to": "can",
    "is entitled to": "can",
    "shall have the right to": "can",
    "may at its sole discretion": "can",
    "is authorized to": "can",
    "has the right to": "can",

    # Actions
    "amend, revise, or update": "change",
    "amend or update": "change",
    "terminated earlier": "ended earlier",
    "shall be terminated": "can be ended",
    "may at any time": "anytime",
    "take any action": "act",
    "perform any obligations": "do its duties",
    "exercise any rights": "use its rights",

    # Agreement/contract terms
    "shall become effective": "starts",
    "shall remain in full force and effect": "will continue",
    "hereinafter referred to as": "called",
    "notwithstanding the foregoing": "despite this",
    "pursuant to": "under",
    "subject to": "under",
    "prior written notice": "written notice",
    "herein": "in this document",
    "hereof": "of this document",
    "hereunder": "under this",
    "for the term of": "for",
    "during the term": "while active",
    "from time to time": "occasionally",

    # Acceptance/consent
    "shall constitute acceptance": "means you accept",
    "constitutes acceptance": "means you accept",
    "continued use of the service": "using the service",
    "by continuing to use": "by using",
    "shall be deemed to": "means",
    "is deemed to": "means",
    "consent to": "agree to",

    # Misc legalese
    "breach of terms": "breaking the rules",
    "explicitly terminated by either party": "ended by either party",
    "unless otherwise agreed": "unless agreed otherwise",
    "at any other reasonable time": "anytime",
    "in accordance with": "under",
    "as soon as practicable": "quickly",
    "without prejudice to": "without affecting",
    "to the extent permitted by law": "if allowed by law",
    "for the avoidance of doubt": "to be clear",
    "including but not limited to": "including",
    "any and all": "all",
    "each and every": "every",
    "without limitation": "including",
    "in the event of": "if",
    "as applicable": "if relevant",
    "as required by law": "if law requires",
    "without notice": "without informing",
    "at its discretion": "as it decides",
    "in perpetuity": "forever",
    "for all purposes": "for all uses",
    "to the fullest extent permitted by law": "as much as allowed by law",
    "not limited to": "including",
}

# ---------------- 3️⃣ Simplification function ----------------
def simplify_legal_text(text):
    """
    Simplify any legal text into short, plain English.
    Keeps key details like dates, obligations, notice periods.
    """
    # Step 1: Instruction prompt
    prompt = (
        "Rewrite this legal text in short, plain English, "
        "keeping all important details such as dates, obligations, and notice periods. "
        "Avoid legal jargon and make it concise:\n\n" + text
    )

    # Step 2: Model generates simplified text
    result = simplifier(
        prompt,
        max_new_tokens=80,
        do_sample=False
    )[0]["generated_text"]

    # Step 3: Apply general cleanup dictionary
    for k, v in general_cleanup_dict.items():
        result = result.replace(k, v)

    # Step 4: Clean extra spaces
    result = " ".join(result.split())
    return result.strip()

# ---------------- 4️⃣ Example usage ----------------
legal_texts = [
    """Users are solely responsible for ensuring that their use of the service, including all uploaded, shared, or transmitted content, complies with applicable local, state, and federal laws, and the company reserves the right to cooperate with legal authorities in any investigation.""",

    """All intellectual property rights, including but not limited to trademarks, logos, copyrights, patents, and proprietary software, shall remain the exclusive property of the company or its licensors, and users shall not claim any ownership or usage rights beyond what is expressly granted.""",

    """No waiver of any provision of this agreement shall constitute a continuing waiver, and any failure by the company to enforce a provision shall not prevent future enforcement of the same or other provisions.""",

    """In accordance with the applicable law, all parties shall comply with their obligations and any breach of these terms may result in remedies including but not limited to damages or termination of this agreement.""",

    """The service provider reserves the unilateral right to amend, revise, or update these terms and policies annually on 01/01 or at any other reasonable time with thirty (30) days’ notice, and continued use of the service shall constitute acceptance of such amendments.""",

    """This agreement shall become effective on 01/09/2025 and shall remain in full force and effect until explicitly terminated by either party with sixty (60) days’ prior written notice, unless terminated earlier pursuant to breach of terms.""",

    """Users must immediately notify the service provider of any unauthorized access or security breach relating to their account, and failure to do so within forty-eight (48) hours may limit the provider’s liability.""",

    """Invoices issued by the service provider shall be payable within fifteen (15) business days from the date of issuance; any failure to remit payment within this period shall accrue interest at a rate of 1.5% per month until paid in full.""",

    """The company shall not be liable for any indirect, incidental, special, or consequential damages arising from use or inability to use the service, even if the company has been advised of the possibility of such damages.""",

    """All confidential information shared between the parties must be safeguarded for a minimum period of seven (7) years from the date of disclosure, and may only be used for purposes explicitly authorized under this agreement, unless otherwise required by law."""
]


for t in legal_texts:
    simplified = simplify_legal_text(t)
    print("\n--- Simplified Text ---\n")
    print(simplified)


Device set to use cuda:0



--- Simplified Text ---

Users are solely responsible for ensuring that their use of the Service, including all uploaded, shared, or transmitted content, complies with applicable local, state, and federal laws, and the company may cooperate with legal authorities in any investigation.

--- Simplified Text ---

All intellectual property rights, including trademarks, logos, copyrights, patents, and proprietary software, shall remain the exclusive property of the company or its licensors, and users shall not claim any ownership or usage rights beyond what is expressly granted.

--- Simplified Text ---

No waiver of any provision of this Agreement shall constitute a continuing waiver, and any failure by the Company to enforce a provision shall not prevent future enforcement of the same or other provisions.

--- Simplified Text ---

In accordance with the applicable law, all parties shall comply with their obligations and any breach of these terms may result in remedies including damages o