# Usage

### Anonymizing class `Anonymize` defined in `anonymize.py`

In [1]:
import spacy
from pprint import pprint
import tabulate

from anonymize import Anonymize

### Set tags to be considered 

In [2]:
tags_to_anon = ['PERSON', 'ORG', 'GPE', 'NORP', 'FAC']
#tags_to_anon = ['PERSON', 'ORG']

anon = Anonymize(tags_to_anon)

##### Kindly refer to https://spacy.io/api/annotation#named-entities for tag definitions

### Call `get_anon_string` for all the strings

In [3]:
orig_strings = ["Anand P Gupta was a student in my university, the XYZ Institute of Technology located at Pilani, Rajasthan, India. He performed well.",
           
           "Gabriella Marquex worked with me for a long long time in the Yugoslavic state. I was her direct superviser at Global link Technology."
          ]

In [4]:
anon_strings = [anon.get_anon_string(orig_string) for orig_string in orig_strings]

for idx in range(len(orig_strings)):
    print("Example #{0}".format(idx + 1))
    to_print = tabulate.tabulate(zip(orig_strings[idx].split(), anon_strings[idx].split()), \
                             headers = ["Original String", "Anonymized String"])
    print(to_print)
    print("*"*50)

Example #1
Original String    Anonymized String
-----------------  -------------------
Anand              PERSON-B-0
P                  PERSON-I-1
Gupta              PERSON-I-2
was                was
a                  a
student            student
in                 in
my                 my
university,        university
the                ,
XYZ                ORG-B-3
Institute          ORG-I-4
of                 ORG-I-5
Technology         ORG-I-6
located            ORG-I-7
at                 located
Pilani,            at
Rajasthan,         GPE-B-8
India.             ,
He                 GPE-B-9
performed          ,
well.              GPE-B-10
**************************************************
Example #2
Original String    Anonymized String
-----------------  -------------------
Gabriella          PERSON-B-11
Marquex            PERSON-I-12
worked             worked
with               with
me                 me
for                for
a                  a
long               long
long     

### To print mapping  -

In [5]:
pprint(anon.mapping)

{'Anand': 'PERSON-B-0',
 'Gabriella': 'PERSON-B-11',
 'Global': 'ORG-B-14',
 'Gupta': 'PERSON-I-2',
 'India': 'GPE-B-10',
 'Institute': 'ORG-I-5',
 'Marquex': 'PERSON-I-12',
 'P': 'PERSON-I-1',
 'Pilani': 'GPE-B-8',
 'Rajasthan': 'GPE-B-9',
 'Technology': 'ORG-I-7',
 'XYZ': 'ORG-I-4',
 'Yugoslavic': 'NORP-B-13',
 'link': 'ORG-I-15',
 'of': 'ORG-I-6',
 'the': 'ORG-B-3'}


##### Note that even though words like `the` and `of` are in the mapping, not all of them are replaced in the text. Only those that are part of Named Entities are replaced. 

### To save mapping - 

In [6]:
filepath = 'anon_mapping.pkl'
anon.save_mapping(filepath)

### To load mapping - 

In [7]:
mapping = anon.load_mapping(filepath)
pprint(mapping)

{'Anand': 'PERSON-B-0',
 'Gabriella': 'PERSON-B-11',
 'Global': 'ORG-B-14',
 'Gupta': 'PERSON-I-2',
 'India': 'GPE-B-10',
 'Institute': 'ORG-I-5',
 'Marquex': 'PERSON-I-12',
 'P': 'PERSON-I-1',
 'Pilani': 'GPE-B-8',
 'Rajasthan': 'GPE-B-9',
 'Technology': 'ORG-I-7',
 'XYZ': 'ORG-I-4',
 'Yugoslavic': 'NORP-B-13',
 'link': 'ORG-I-15',
 'of': 'ORG-I-6',
 'the': 'ORG-B-3'}
