# Usage

### Anonymizing class `Anonymize` defined in `anonymize.py`

In [1]:
import spacy
from pprint import pprint

from anonymize import Anonymize

### Set tags to be considered

In [2]:
tags_to_anon = ['PERSON', 'ORG', 'GPE', 'NORP', 'FAC']

anon = Anonymize(tags_to_anon)

##### Kindly refer to https://spacy.io/api/annotation#named-entities for tag definitions

### Call `get_anon_string` for all the strings

In [3]:
strings = ["Anand P Gupta was a student in my university," + "\n" + "the XYZ Institute of Technology located \
at Pilani, Rajasthan, India. He performed well.",
           
           "Gabriella Marquex worked with me for a long long time in the Yugoslavic state. I was her direct \
superviser at Global link Technology."
          ]

In [4]:
anon_strings = [anon.get_anon_string(string) for string in strings]

pprint(anon_strings)

['PERSON-B-0 PERSON-I-1 PERSON-I-2 was a student in my university , \n'
 ' ORG-B-3 ORG-I-4 ORG-I-5 ORG-I-6 ORG-I-7 located at GPE-B-8 , GPE-B-9 , '
 'GPE-B-10 . He performed well .',
 'PERSON-B-11 PERSON-I-12 worked with me for a long long time in the NORP-B-13 '
 'state . I was her direct superviser at ORG-B-14 ORG-I-15 ORG-I-7 .']


### To print mapping  -

In [5]:
pprint(anon.mapping)

{'Anand': 'PERSON-B-0',
 'Gabriella': 'PERSON-B-11',
 'Global': 'ORG-B-14',
 'Gupta': 'PERSON-I-2',
 'India': 'GPE-B-10',
 'Institute': 'ORG-I-5',
 'Marquex': 'PERSON-I-12',
 'P': 'PERSON-I-1',
 'Pilani': 'GPE-B-8',
 'Rajasthan': 'GPE-B-9',
 'Technology': 'ORG-I-7',
 'XYZ': 'ORG-I-4',
 'Yugoslavic': 'NORP-B-13',
 'link': 'ORG-I-15',
 'of': 'ORG-I-6',
 'the': 'ORG-B-3'}


##### Note that even though words like `the` and `of` are in the mapping, not all of them are replaced in the text. Only those that are part of Named Entities are replaced. 

### To save mapping - 

In [6]:
filepath = 'anon_mapping.pkl'
anon.save_mapping(filepath)

### To load mapping - 

In [7]:
mapping = anon.load_mapping(filepath)
pprint(mapping)

{'Anand': 'PERSON-B-0',
 'Gabriella': 'PERSON-B-11',
 'Global': 'ORG-B-14',
 'Gupta': 'PERSON-I-2',
 'India': 'GPE-B-10',
 'Institute': 'ORG-I-5',
 'Marquex': 'PERSON-I-12',
 'P': 'PERSON-I-1',
 'Pilani': 'GPE-B-8',
 'Rajasthan': 'GPE-B-9',
 'Technology': 'ORG-I-7',
 'XYZ': 'ORG-I-4',
 'Yugoslavic': 'NORP-B-13',
 'link': 'ORG-I-15',
 'of': 'ORG-I-6',
 'the': 'ORG-B-3'}
