# Reversible data anonymization with Microsoft Presidio

[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/data_anonymization.ipynb)


## Use case



## Overview



## Quickstart



In [1]:
# Install necessary packages
# ! pip install langchain langchain-experimental openai
# ! python -m spacy download en_core_web_lg

In [2]:
from langchain_experimental.data_anonymizer import PresidioAnonymizer

anonymizer = PresidioAnonymizer(
    analyzed_fields=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD"],
    faker_seed=0,
)

anonymizer.anonymize(
    "My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com"
)

'My name is Sydney Davis, call me at 515-978-1565 or email me at tammy76@example.com'

In [3]:
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer

anonymizer = PresidioReversibleAnonymizer(
    analyzed_fields=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD"],
    faker_seed=0,
)

anonymizer.anonymize(
    "My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com"
)

'My name is Sydney Davis, call me at 515-978-1565 or email me at tammy76@example.com'

In [4]:
anonymizer.deanonymizer_mapping

{'PERSON': {'Sydney Davis': 'Slim Shady'},
 'PHONE_NUMBER': {'515-978-1565': '313-666-7440'},
 'EMAIL_ADDRESS': {'tammy76@example.com': 'real.slim.shady@gmail.com'}}

In [5]:
anonymizer.anonymize(
    "Do you have his VISA card number? Yep, it's 4001 9192 5753 7193. I'm John Doe by the way."
)

"Do you have his VISA card number? Yep, it's 180014841858395. I'm Lisa Clayton by the way."

In [6]:
anonymizer.deanonymizer_mapping

{'PERSON': {'Sydney Davis': 'Slim Shady', 'Lisa Clayton': 'John Doe'},
 'PHONE_NUMBER': {'515-978-1565': '313-666-7440'},
 'EMAIL_ADDRESS': {'tammy76@example.com': 'real.slim.shady@gmail.com'},
 'CREDIT_CARD': {'180014841858395': '4001 9192 5753 7193'}}

In [7]:
fake_name = "Sydney Davis"
fake_phone = "515-978-1565"
fake_email = "tammy76@example.com"
fake_credit_card = "180014841858395"

In [8]:
anonymized_text = f"""{fake_name} recently lost his wallet. 
Inside is some cash and his credit card with the number {fake_credit_card}. 
If you would find it, please call him at {fake_phone} or email him: {fake_email}."""

print(anonymized_text)

Sydney Davis recently lost his wallet. 
Inside is some cash and his credit card with the number 180014841858395. 
If you would find it, please call him at 515-978-1565 or email him: tammy76@example.com.


In [9]:
print(anonymizer.deanonymize(anonymized_text))

Slim Shady recently lost his wallet. 
Inside is some cash and his credit card with the number 4001 9192 5753 7193. 
If you would find it, please call him at 313-666-7440 or email him: real.slim.shady@gmail.com.


In [10]:
anonymized_text = f"{anonymized_text}\n{fake_name} will be very grateful!"
print(anonymizer.deanonymize(anonymized_text))

Slim Shady recently lost his wallet. 
Inside is some cash and his credit card with the number 4001 9192 5753 7193. 
If you would find it, please call him at 313-666-7440 or email him: real.slim.shady@gmail.com.
Slim Shady will be very grateful!


In [11]:
anonymizer.save_deanonymizer_mapping("deanonymizer_mapping.json")
# anonymizer.save_deanonymizer_mapping("deanonymizer_mapping.yaml")

In [12]:
anonymizer = PresidioReversibleAnonymizer()

anonymizer.deanonymizer_mapping

{}

In [13]:
anonymizer.load_deanonymizer_mapping("deanonymizer_mapping.json")

anonymizer.deanonymizer_mapping

{'PERSON': {'Sydney Davis': 'Slim Shady', 'Lisa Clayton': 'John Doe'},
 'PHONE_NUMBER': {'515-978-1565': '313-666-7440'},
 'EMAIL_ADDRESS': {'tammy76@example.com': 'real.slim.shady@gmail.com'},
 'CREDIT_CARD': {'180014841858395': '4001 9192 5753 7193'}}

In [14]:
from langchain.chains.transform import TransformChain

anonymizer = PresidioReversibleAnonymizer()


def anonymize_func(inputs: dict) -> dict:
    text = inputs["text"]
    return {"anonymized_text": anonymizer.anonymize(text)}


anonymize_chain = TransformChain(
    input_variables=["text"],
    output_variables=["anonymized_text"],
    transform=anonymize_func,
)

anonymize_chain("You can find our super secret data at https://supersecretdata.com")

{'text': 'You can find our super secret data at https://supersecretdata.com',
 'anonymized_text': 'You can find our super secret data at http://www.anderson.biz/'}

In [15]:
from operator import itemgetter
from langchain.prompts.prompt import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.llms.openai import OpenAI

template = """According to this text, where can you find our super secret data?

{anonymized_text}

Answer:"""
prompt = PromptTemplate(input_variables=["anonymized_text"], template=template)
llm_chain = LLMChain(llm=OpenAI(), prompt=prompt)


chain = (
    anonymize_chain
    | {"anonymized_text": itemgetter("anonymized_text")}
    | prompt
    | llm_chain
)
chain.invoke("You can find our super secret data at https://supersecretdata.com")

{'anonymized_text': StringPromptValue(text='According to this text, where can you find our super secret data?\n\nYou can find our super secret data at https://harding.com/\n\nAnswer:'),
 'text': ' https://harding.com/'}

In [16]:
def deanonymize_func(inputs: dict) -> dict:
    text = inputs["llm_response"]
    return {"deaonymized_text": anonymizer.deanonymize(text)}


deanonymize_chain = TransformChain(
    input_variables=["llm_response"],
    output_variables=["deaonymized_text"],
    transform=deanonymize_func,
)

In [17]:
chain = (
    anonymize_chain
    | {"anonymized_text": itemgetter("anonymized_text")}
    | prompt
    | llm_chain
    | {
        "anonymized_text": itemgetter("anonymized_text"),
        "llm_response": itemgetter("text"),
    }
    | deanonymize_chain
)
chain.invoke("You can find our super secret data at https://supersecretdata.com")

{'anonymized_text': StringPromptValue(text='According to this text, where can you find our super secret data?\n\nYou can find our super secret data at https://zimmerman.info/\n\nAnswer:'),
 'llm_response': ' https://zimmerman.info/',
 'deaonymized_text': ' https://supersecretdata.com'}