# QIF Analyses (AOL experimental dataset)

In [1]:
import pandas
from bvmlib.bvm import BVM

## AOL experimental dataset

In [2]:
%%time
aol_experimental_browsing_history = pandas.read_csv('AOL-experimental.csv', low_memory=False, on_bad_lines='warn', index_col=0)

CPU times: user 13.5 s, sys: 1.34 s, total: 14.8 s
Wall time: 14.8 s


In [3]:
display(aol_experimental_browsing_history)

Unnamed: 0,RandID,BrowsingHistory
0,0,"[('about.com', Timestamp('2006-03-06 13:59:48'..."
1,2,"[('jr.com', Timestamp('2006-03-01 21:59:34')),..."
2,3,"[('webdate.com', Timestamp('2006-05-19 00:39:4..."
3,4,"[('microsoft.com', Timestamp('2006-03-29 22:32..."
4,5,"[('about.com', Timestamp('2006-05-30 21:58:47'..."
...,...,...
451195,521687,"[('1001freefonts.com', Timestamp('2006-04-08 1..."
451196,521688,"[('pluggedincleveland.com', Timestamp('2006-04..."
451197,521689,"[('arab2.com', Timestamp('2006-03-08 03:17:38'..."
451198,521690,"[('washingtonpost.com', Timestamp('2006-05-11 ..."


## Experiment 1: Third-party cookies on AOL experimental dataset

### Privacy

In [4]:
temp = aol_experimental_browsing_history.copy()
temp['UID'] = temp['RandID']
temp = temp.drop(columns=['BrowsingHistory'], inplace=False)
display(temp, temp.nunique())

Unnamed: 0,RandID,UID
0,0,0
1,2,2
2,3,3
3,4,4
4,5,5
...,...,...
451195,521687,521687
451196,521688,521688
451197,521689,521689
451198,521690,521690


RandID    451200
UID       451200
dtype: int64

In [5]:
%%time
E1P = BVM(temp)
E1P.qids(['UID'])
E1P.sensitive(['RandID'])
results = E1P.assess()

CPU times: user 6.95 s, sys: 30.1 ms, total: 6.98 s
Wall time: 6.99 s


In [6]:
display(results['re_id'], results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,['UID'],1.0,451200,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,['UID'],RandID,0.0,451200.0,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


In [7]:
display(results['re_id']['Prior'][0], results['att_inf']['Prior'][0])

2.2163120567375886e-06

2.2163120567375886e-06

In [8]:
display(results['re_id']['Posterior'][0], results['att_inf']['Posterior'][0])

1.0

1.0

In [9]:
display(results['re_id']['Posterior'][0]/results['re_id']['Prior'][0], results['att_inf']['Posterior'][0]/results['att_inf']['Prior'][0])

451200.0

451200.0

### Utility

In [10]:
temp = aol_experimental_browsing_history.copy()
temp = temp.rename(columns={'RandID':'UID'}, inplace=False)
display(temp, temp.nunique())

Unnamed: 0,UID,BrowsingHistory
0,0,"[('about.com', Timestamp('2006-03-06 13:59:48'..."
1,2,"[('jr.com', Timestamp('2006-03-01 21:59:34')),..."
2,3,"[('webdate.com', Timestamp('2006-05-19 00:39:4..."
3,4,"[('microsoft.com', Timestamp('2006-03-29 22:32..."
4,5,"[('about.com', Timestamp('2006-05-30 21:58:47'..."
...,...,...
451195,521687,"[('1001freefonts.com', Timestamp('2006-04-08 1..."
451196,521688,"[('pluggedincleveland.com', Timestamp('2006-04..."
451197,521689,"[('arab2.com', Timestamp('2006-03-08 03:17:38'..."
451198,521690,"[('washingtonpost.com', Timestamp('2006-05-11 ..."


UID                451200
BrowsingHistory    451200
dtype: int64

In [11]:
%%time
E1U = BVM(temp)
E1U.qids(['UID'])
E1U.sensitive(['BrowsingHistory'])
results = E1U.assess()

CPU times: user 12.8 s, sys: 28.3 ms, total: 12.8 s
Wall time: 12.8 s


In [12]:
display(results['re_id'], results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,['UID'],1.0,451200,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,['UID'],BrowsingHistory,0.0,451200.0,2e-06,1.0,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


In [13]:
display(results['re_id']['Prior'][0], results['att_inf']['Prior'][0])

2.2163120567375886e-06

2.2163120567375886e-06

In [14]:
display(results['re_id']['Posterior'][0], results['att_inf']['Posterior'][0])

1.0

1.0

In [15]:
display(results['re_id']['Posterior'][0]/results['re_id']['Prior'][0], results['att_inf']['Posterior'][0]/results['att_inf']['Prior'][0])

451200.0

451200.0