# QIF Analyses (AOL experimental dataset)

In [1]:
import pandas
from bvmlib.bvm import BVM

## AOL experimental dataset

In [2]:
%%time
# Loads AOL-experimental dataset into aol_experimental_browsing_history DataFrame.
aol_experimental_browsing_history = pandas.read_csv('AOL-experimental.csv', low_memory=False, on_bad_lines='warn', index_col=0)

CPU times: user 6.97 s, sys: 1.24 s, total: 8.21 s
Wall time: 8.15 s


In [3]:
display(aol_experimental_browsing_history)

Unnamed: 0,RandID,BrowsingHistory
0,0,"[('edzapp.com', Timestamp('2006-05-13 00:12:16..."
1,1,"[('kidshealth.org', Timestamp('2006-03-01 11:5..."
2,3,"[('lawyers.com', Timestamp('2006-03-02 17:38:4..."
3,4,"[('foxnews.com', Timestamp('2006-03-12 20:33:2..."
4,5,"[('tvsquad.com', Timestamp('2006-03-06 23:15:2..."
...,...,...
436000,521687,"[('christianspeakers.com', Timestamp('2006-03-..."
436001,521688,"[('winonamanufacturing.com', Timestamp('2006-0..."
436002,521689,"[('dilithiumnetworks.com', Timestamp('2006-03-..."
436003,521690,"[('dfas.mil', Timestamp('2006-03-01 22:06:56')..."


## Experiment 1: Third-party cookies on AOL experimental dataset

### Privacy

In [4]:
temp = aol_experimental_browsing_history.copy()
temp['UID'] = temp['RandID']
temp = temp.drop(columns=['BrowsingHistory'], inplace=False)
display(temp, temp.nunique())

Unnamed: 0,RandID,UID
0,0,0
1,1,1
2,3,3
3,4,4
4,5,5
...,...,...
436000,521687,521687
436001,521688,521688
436002,521689,521689
436003,521690,521690


RandID    436005
UID       436005
dtype: int64

In [5]:
%%time
E1P = BVM(temp)
E1P.qids(['UID'])
E1P.sensitive(['RandID'])
results = E1P.assess()

CPU times: user 3.64 s, sys: 19.6 ms, total: 3.66 s
Wall time: 3.65 s


In [6]:
display(results['re_id'], results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,['UID'],1.0,436005,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,['UID'],RandID,0.0,436005.0,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


In [7]:
# Prior Bayes vulnerability.
display(results['re_id']['Prior'][0], results['att_inf']['Prior'][0])

2.2935516794532174e-06

2.2935516794532174e-06

In [8]:
# Posterior Bayes vulnerability.
display(results['re_id']['Posterior'][0], results['att_inf']['Posterior'][0])

1.0

1.0

In [9]:
# Bayes leakage.
display(results['re_id']['Posterior'][0]/results['re_id']['Prior'][0], results['att_inf']['Posterior'][0]/results['att_inf']['Prior'][0])

436005.0

436005.0

### Utility

In [10]:
temp = aol_experimental_browsing_history.copy()
temp = temp.rename(columns={'RandID':'UID'}, inplace=False)
display(temp, temp.nunique())

Unnamed: 0,UID,BrowsingHistory
0,0,"[('edzapp.com', Timestamp('2006-05-13 00:12:16..."
1,1,"[('kidshealth.org', Timestamp('2006-03-01 11:5..."
2,3,"[('lawyers.com', Timestamp('2006-03-02 17:38:4..."
3,4,"[('foxnews.com', Timestamp('2006-03-12 20:33:2..."
4,5,"[('tvsquad.com', Timestamp('2006-03-06 23:15:2..."
...,...,...
436000,521687,"[('christianspeakers.com', Timestamp('2006-03-..."
436001,521688,"[('winonamanufacturing.com', Timestamp('2006-0..."
436002,521689,"[('dilithiumnetworks.com', Timestamp('2006-03-..."
436003,521690,"[('dfas.mil', Timestamp('2006-03-01 22:06:56')..."


UID                436005
BrowsingHistory    436005
dtype: int64

In [11]:
%%time
E1U = BVM(temp)
E1U.qids(['UID'])
E1U.sensitive(['BrowsingHistory'])
results = E1U.assess()

CPU times: user 8.41 s, sys: 37.9 ms, total: 8.44 s
Wall time: 8.42 s


In [12]:
display(results['re_id'], results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,['UID'],1.0,436005,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,['UID'],BrowsingHistory,0.0,436005.0,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


In [13]:
# Prior Bayes vulnerability.
display(results['re_id']['Prior'][0], results['att_inf']['Prior'][0])

2.2935516794532174e-06

2.2935516794532174e-06

In [14]:
# Posterior Bayes vulnerability.
display(results['re_id']['Posterior'][0], results['att_inf']['Posterior'][0])

1.0

1.0

In [15]:
# Bayes leakage.
display(results['re_id']['Posterior'][0]/results['re_id']['Prior'][0], results['att_inf']['Posterior'][0]/results['att_inf']['Prior'][0])

436005.0

436005.0