# Sample Notebook
This is a sample Jupyter Notebook uploaded to GitHub.

In [23]:
# 1. GPU check
!nvidia-smi

# 2. Install dependencies
!pip -q install -U transformers datasets sentencepiece accelerate huggingface_hub python-dotenv feedparser beautifulsoup4 sacrebleu rouge-score gradio



# 3. Clone or pull repo
from getpass import getpass
token = getpass("Enter your GitHub PAT: ")
USERNAME = "sukritichawla"
REPO = "legaldocs_ST"
REPO_URL = f"https://{token}@github.com/{USERNAME}/{REPO}.git"
!git clone $REPO_URL


# 4. Go inside repo
%cd /content/legaldocs_ST

# 5. Hugging Face login
from huggingface_hub import login
login()

Sat Sep 13 06:57:54 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   46C    P8             12W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [24]:
from transformers import pipeline

# Choose a simplification fine-tuned model (change to any HF model above)
model_name = "agentlans/flan-t5-small-simplifier"  # or "mrm8488/t5-small-finetuned-text-simplification"

simplifier = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=-1)  # device=0 for GPU

legal_text = """The service provider reserves the unilateral right to amend, revise, or update these terms and policies annually on 01/01 or at any other reasonable time with thirty (30) days’ notice, and continued use of the service shall constitute acceptance of such amendments."""
# Many T5-style models expect an instruction prefix; try both:
inputs = "simplify: " + legal_text
out = simplifier(inputs, max_length=512, do_sample=False)   # deterministic output

print(out[0]["generated_text"])


Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


The service provider reserves the right to amend, revise, or update these terms and policies annually on 01/01 or at any other reasonable time with thirty (30) days’ notice, and continued use of the service shall constitute acceptance of such amendments.


In [32]:
from transformers import pipeline

# Load summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def simplify_text(text):
    """
    Summarize and simplify into short, plain English.
    Keeps important details (dates, notice periods, obligations).
    """
    # Step 1: Summarize to cut length but not truncate
    summary = summarizer(
        text,
        max_length=100,   # allow slightly longer to avoid cutoff
        min_length=15,
        do_sample=False
    )[0]['summary_text']

    # Step 2: Cleanup dictionary for plain English
    replacements = {
        "reserves the unilateral right to": "may",
        "reserves the right to": "may",
        "amend, revise, or update": "change",
        "amend or update": "change",
        "these terms and policies": "terms",
        "these terms": "terms",
        "continued use of the service shall constitute acceptance": "using the service means you accept",
        "shall constitute acceptance": "means you accept",
        "constitutes acceptance": "means you accept",
    }

    for k, v in replacements.items():
        summary = summary.replace(k, v)

    return summary.strip()


# ---------------- Example ----------------
legal_text = """The service provider reserves the unilateral right to amend,
revise, or update these terms and policies annually on 01/01 or at any
other reasonable time with thirty (30) days’ notice, and continued use
of the service shall constitute acceptance of such amendments."""

simplified = simplify_text(legal_text)

print("\n--- Simplified Text ---\n")
print(simplified)


Device set to use cuda:0
Your max_length is set to 100, but your input_length is only 61. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=30)



--- Simplified Text ---

The service provider may change terms annually on 01/01 or at any other reasonable time with thirty (30) days’ notice. Continued use of the service means you accept of such amendments.
