In [1]:
import pandas as pd
from pyaaas.aaas import AaaS
from pyaaas.models.privacy_models import KAnonymity

In [2]:
aaas = AaaS("http://localhost:8080")

In [3]:
test_data = {'age': {0: 34,
  1: 35,
  2: 36,
  3: 37,
  4: 38,
  5: 39,
  6: 40,
  7: 41,
  8: 42,
  9: 43,
  10: 44},
 ' gender': {0: ' male',
  1: ' female',
  2: ' male',
  3: ' female',
  4: ' male',
  5: ' female',
  6: ' male',
  7: ' female',
  8: ' male',
  9: ' female',
  10: ' male'},
 ' zipcode': {0: 81667,
  1: 81668,
  2: 81669,
  3: 81670,
  4: 81671,
  5: 81672,
  6: 81673,
  7: 81674,
  8: 81675,
  9: 81676,
  10: 81677}}

In [4]:
df = pd.DataFrame(test_data)

In [5]:
df


Unnamed: 0,age,gender,zipcode
0,34,male,81667
1,35,female,81668
2,36,male,81669
3,37,female,81670
4,38,male,81671
5,39,female,81672
6,40,male,81673
7,41,female,81674
8,42,male,81675
9,43,female,81676


### Set data to be anonymized

In [6]:
aaas.set_data(df)

### Sett attribute type for fields

In [7]:
aaas.set_attribute_type({"age":"IDENTIFYING",
                           "gender":"INSENSITIVE",
                           "zipcode":"INSENSITIVE"
       })

### Sett hierarchy for field

In [8]:
zip_code_hierarchy = [
                ["81667", "8166*", "816**", "81***", "8****", "*****"]
                ,["81668", "8166*", "816**", "81***", "8****", "*****"]
                ,["81669", "8166*", "816**", "81***", "8****", "*****"]
                ,["81670", "8167*", "816**", "81***", "8****", "*****"]
                ,["81671", "8167*", "816**", "81***", "8****", "*****"]
                ,["81672", "8167*", "816**", "81***", "8****", "*****"]
                ,["81673", "8167*", "816**", "81***", "8****", "*****"]
                ,["81674", "8167*", "816**", "81***", "8****", "*****"]
                ,["81675", "8167*", "816**", "81***", "8****", "*****"]
                ,["81676", "8167*", "816**", "81***", "8****", "*****"]
                ,["81677", "8167*", "816**", "81***", "8****", "*****"]
]

In [9]:
aaas.set_hierarchy("zipcode", zip_code_hierarchy)


### Create and set Privacy model

In [10]:
kanon = KAnonymity(k=4)

In [11]:
aaas.set_model(kanon)

## Print current state 

In [12]:
from pyaaas.state_printer import jupyter_print_mapping

In [13]:
aaas.describe(jupyter_print_mapping)

Privacy Models


Unnamed: 0_level_0,Unnamed: 1_level_0,value
privacy_model,parameter,Unnamed: 2_level_1
KANONYMITY,k,4


----------------------------------------
Attribute Types


Unnamed: 0_level_0,type
field,Unnamed: 1_level_1
age,IDENTIFYING
gender,INSENSITIVE
zipcode,INSENSITIVE


----------------------------------------
Transform Models


Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,8,9,10
zipcode,level_1,8166*,8166*,8166*,8167*,8167*,8167*,8167*,8167*,8167*,8167*,8167*
zipcode,level_2,816**,816**,816**,816**,816**,816**,816**,816**,816**,816**,816**
zipcode,level_3,81***,81***,81***,81***,81***,81***,81***,81***,81***,81***,81***
zipcode,level_4,8****,8****,8****,8****,8****,8****,8****,8****,8****,8****,8****
zipcode,level_5,*****,*****,*****,*****,*****,*****,*****,*****,*****,*****,*****


----------------------------------------


In [14]:

from pyaaas.state_printer import jupyter_print_mapping
aaas.describe(printer=jupyter_print_mapping)

Privacy Models


Unnamed: 0_level_0,Unnamed: 1_level_0,value
privacy_model,parameter,Unnamed: 2_level_1
KANONYMITY,k,4


----------------------------------------
Attribute Types


Unnamed: 0_level_0,type
field,Unnamed: 1_level_1
age,IDENTIFYING
gender,INSENSITIVE
zipcode,INSENSITIVE


----------------------------------------
Transform Models


Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,8,9,10
zipcode,level_1,8166*,8166*,8166*,8167*,8167*,8167*,8167*,8167*,8167*,8167*,8167*
zipcode,level_2,816**,816**,816**,816**,816**,816**,816**,816**,816**,816**,816**
zipcode,level_3,81***,81***,81***,81***,81***,81***,81***,81***,81***,81***,81***
zipcode,level_4,8****,8****,8****,8****,8****,8****,8****,8****,8****,8****,8****
zipcode,level_5,*****,*****,*****,*****,*****,*****,*****,*****,*****,*****,*****


----------------------------------------


In [15]:
aaas.payload.attribute_types

{'age': 'IDENTIFYING', 'gender': 'INSENSITIVE', 'zipcode': 'INSENSITIVE'}

### Run anonymization

In [16]:
result = aaas.anonymize()


In [17]:
result

<pyaaas.models.anonymize_result.AnonymizeResult at 0x7fbe1c2f1668>

### Get DataFrame from result

In [18]:
df = result.get_result_dataframe()

In [19]:
df

Unnamed: 0,age,gender,zipcode
0,*,male,816**
1,*,female,816**
2,*,male,816**
3,*,female,816**
4,*,male,816**
5,*,female,816**
6,*,male,816**
7,*,female,816**
8,*,male,816**
9,*,female,816**


### Get all result data as dict

In [20]:
bmetrics = result.get_metrics_before()

In [21]:
bmetrics

Unnamed: 0_level_0,value
metric,Unnamed: 1_level_1
measure_value,[%]
record_affected_by_highest_risk,100.0
sample_uniques,0.0
estimated_prosecutor_risk,9.090909090909092
population_model,DANKAR
records_affected_by_lowest_risk,100.0
estimated_marketer_risk,9.090909090909092
highest_prosecutor_risk,9.090909090909092
estimated_journalist_risk,9.090909090909092
lowest_risk,9.090909090909092


In [22]:
ametrics = result.get_metrics_after()

In [23]:
ametrics

Unnamed: 0_level_0,value
metric,Unnamed: 1_level_1
measure_value,[%]
record_affected_by_highest_risk,100.0
sample_uniques,0.0
estimated_prosecutor_risk,9.090909090909092
population_model,DANKAR
records_affected_by_lowest_risk,100.0
estimated_marketer_risk,9.090909090909092
highest_prosecutor_risk,9.090909090909092
estimated_journalist_risk,9.090909090909092
lowest_risk,9.090909090909092
