In [7]:
import argparse
import pandas as pd
import requests

OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL = "llama3.2"  # Change to your locally pulled model if needed

def redact_pii(text):
    """Send text to Ollama model to redact PII."""
    if not isinstance(text, str) or text.strip() == "":
        return text

    prompt = f"""
    You are an Agent to identify and replace PII. You can only replace and should not rewrite or rephrase the sentence.

    Please replace any name with [MASKED_NAME], address with [MASKED_ADDRESS], phone number with [MASKED_PHONE_NUMBER], company name with [MASKED_COMPANY_NAME], and any other personally identifiable information accordingly.

    Please replace name, address, phone number, bank account number, PAN, Aadhar, age, any identification number, EPF, Bill number, Company name, Company address, date, etc.

    Text:
    {text}

    Return only the redacted version of the text.
    """

    try:
        response = requests.post(OLLAMA_URL, json={
            "model": MODEL,
            "prompt": prompt,
            "stream": False
        })

        if response.status_code == 200:
            return response.json().get("response", "").strip()
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return text
    except Exception as e:
        print(f"Error contacting Ollama: {e}")
        return text

def redact_excel_file(input_path, output_path, pii_columns):
    """Read Excel file, redact PII from selected columns, and save to new file."""
    try:
        df = pd.read_excel(input_path)
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return

    for col in pii_columns:
        if col not in df.columns:
            print(f"Column '{col}' not found in Excel. Skipping.")
            continue

        print(f"Redacting PII in column: {col}")
        df[col] = df[col].apply(redact_pii)

    try:
        df.to_excel(output_path, index=False)
        print(f"✅ Anonymized Excel saved to: {output_path}")
    except Exception as e:
        print(f"Error saving redacted Excel file: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Redact PII from an Excel file using an LLM")
    parser.add_argument("--input", required=True, help="Path to the input Excel file")
    parser.add_argument("--output", required=True, help="Path to save the redacted Excel file")
    parser.add_argument("--columns", required=True, help="Comma-separated column names to redact")

    args = parser.parse_args()

    input_file = args.input
    output_file = args.output
    columns_with_pii = [col.strip() for col in args.columns.split(",")]

    redact_excel_file(input_file, output_file, columns_with_pii)

usage: ipykernel_launcher.py [-h] --input INPUT --output OUTPUT --columns
                             COLUMNS
ipykernel_launcher.py: error: the following arguments are required: --input, --output, --columns


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [6]:
!pip install ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m74.9 kB/s[0m eta [36m0:00:00[0ma [36m0:00:02[0mm
[?25hInstalling collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
Successfully installed ipywidgets-8.1.7 jupyterlab_widgets-3.0.15 widgetsnbextension-4.0.14

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip i