## Generating binary categories for training
**Steps**

- Setup categoricals for each segment based on author consensus
- Assign a binary value to each category of segment, in corresponding category columns (one-hot encoding)

In [1]:
import pandas as pd
import numpy as np
import nltk

In [2]:
# from sqlalchemy import create_engine
# from sqlalchemy_utils import database_exists, create_database
import psycopg2

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
#Connect to db
dbname = 'beforeiagree_db'
username = 'peterostendorp'

#Create engine
con = psycopg2.connect(database = dbname, user = username)

In [5]:
#Get annotations df
sql = """
SELECT * FROM annotations
WHERE "Policy UID" IN
(SELECT "Policy UID" FROM sites
WHERE "In 115 Set?" = TRUE);
"""
annotations = pd.read_sql_query(sql,con)

In [6]:
#Get sites df
sql = """
SELECT * FROM sites
WHERE "In 115 Set?" = TRUE;
"""
sites = pd.read_sql_query(sql,con)

In [7]:
#Get segments df
sql = """
SELECT * FROM segments
WHERE "Policy UID" IN
(SELECT "Policy UID" FROM sites
WHERE "In 115 Set?" = TRUE)
"""

segments = pd.read_sql_query(sql,con)

In [159]:
#df.groupby("date").agg({"duration": np.sum, "user_id": lambda x: x.nunique()})
#            duration  user_id
print(annotations['Policy UID'].nunique())
annotated_segments = pd.DataFrame(annotations.groupby('Policy UID').agg(lambda x: x.nunique())['segment_id'])
print(annotated_segments)
annotations.head()

115
            segment_id
Policy UID            
20                  36
21                  36
26                  68
32                  29
33                  65
58                  47
59                  13
70                  71
82                  46
93                  40
98                  41
105                 37
133                 43
135                 39
144                 31
164                 17
175                 34
186                 47
200                 43
202                 26
207                 34
228                 25
303                 54
320                 44
325                 20
331                 17
348                 62
359                 41
394                 42
414                 23
...                ...
1206                17
1221                 6
1224                19
1252                25
1259                56
1261                13
1264                14
1300                82
1306                26
1360                19
1361   

Unnamed: 0,Policy UID,annotation_id,batch_id,annotator_id,segment_id,category_name,attributes_value_pairs,date,policy_url
0,1017,20137,test_category_labeling_highlight_fordham_aaaaa,121,0,Other,"{""Other Type"": {""selectedText"": ""Sci-News.com ...",NaT,http://www.sci-news.com/privacy-policy.html
1,1017,20324,test_category_labeling_highlight_fordham_aaaaa,121,1,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""nformati...",NaT,http://www.sci-news.com/privacy-policy.html
2,1017,20325,test_category_labeling_highlight_fordham_aaaaa,121,1,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""nformati...",NaT,http://www.sci-news.com/privacy-policy.html
3,1017,20326,test_category_labeling_highlight_fordham_aaaaa,121,2,Data Retention,"{""Personal Information Type"": {""selectedText"":...",NaT,http://www.sci-news.com/privacy-policy.html
4,1017,20327,test_category_labeling_highlight_fordham_aaaaa,121,3,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""Not sele...",NaT,http://www.sci-news.com/privacy-policy.html


In [201]:
print(segments['Policy UID'].nunique())
n_segments = pd.DataFrame(segments.groupby('Policy UID').count()['segment_id'])
print(n_segments['segment_id'].sum())
print(n_segments)
segments.head()

115
6469
            segment_id
Policy UID            
20                  36
21                  36
26                  68
32                 348
33                 585
58                  47
59                  13
70                 142
82                  92
93                  40
98                  41
105                 37
133                 43
135                 39
144                186
164                 17
175                 34
186                376
200                 43
202                 26
207                 68
228                150
303                 54
320                 44
325                 20
331                 17
348                 62
359                 41
394                 42
414                 23
...                ...
1206                17
1221                 6
1224                19
1252                25
1259                56
1261                13
1264                14
1300               164
1306                26
1360                19
13

Unnamed: 0,Policy UID,segment_id,segments
0,20,0,<strong> Privacy Policy </strong> <br> <br> <s...
1,20,1,This privacy policy does not apply to Sites ma...
2,20,2,"By visiting our Sites, you are accepting the p..."
3,20,3,<strong> What Information Is Collected? </stro...
4,20,4,<strong> Personally Identifiable Information <...


In [134]:
print(sites['Policy UID'].nunique())
print(sites['Site UID'].nunique())
sites.head()

115
115


Unnamed: 0,Policy UID,Site UID,Site URL,Site Human-Readable Name,Site Check Date,In 115 Set?,Comments,Sector,Policy URL,Policy collection date,Policy last updated date,policy_text
0,20,1,theatlantic.com,The Atlantic,2016-02-08,True,"Alexa Rank: 975 (Global), 289 (US)",Arts,theatlantic.com/privacy-policy/,2015-07-02,2015-01-01,<strong> Privacy Policy </strong> <br> <br> <s...
1,21,2,imdb.com,IMDb,2016-02-08,True,"Alexa Rank: 49 (Global), 27 (US)",Arts,imdb.com/privacy,2015-07-02,2014-12-05,"IMDb Privacy Notice <br> <br>|||Last Updated, ..."
2,26,3,nytimes.com,New York Times,2016-02-08,True,"Alexa Rank: 101 (Global), 22 (US)",Arts,nytimes.com/privacy,2015-07-08,2015-06-10,<strong> Privacy Policy </strong> <br> <br> La...
3,32,4,theverge.com,The Verge,2016-02-08,True,"Alexa Rank: 525 (Global), 230 (US)",Home,voxmedia.com/privacy-policy,2015-07-02,2014-05-01,Vox Media Privacy Policy <br> <br>|||<strong> ...
4,33,5,nbc.com,NBC Universal,2016-02-08,True,"Alexa Rank: 1548 (Global), 426 (US)",Arts,nbcuniversal.com/privacy/full-privacy-policy,2015-07-02,2015-01-14,Full Privacy Policy <br> <br> Last updated: 14...


There are many cases where the number of segments in a doc far exceeds the number of annotated segments.

In [166]:
#How do annotated segments compare to total number of segments?
for i in range(0,115):
    if annotated_segments.iloc[i]['segment_id'] == n_segments.iloc[i]['segment_id']:
        print('Yes')
    else:
        print('No')

Yes
Yes
Yes
No
No
Yes
Yes
No
No
Yes
Yes
Yes
Yes
Yes
No
Yes
Yes
No
Yes
Yes
No
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
Yes
Yes
Yes
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
Yes
No
No
Yes
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
Yes
Yes
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
Yes
Yes
Yes
Yes
Yes
Yes
Yes
Yes
No
No
Yes


## This is the tricky bit where we join segments and annotations

In [8]:
#Indexed version of segments
seg_ind = segments.set_index(['Policy UID','segment_id'])
seg_ind['key'] = 1 #TFH
seg_ind.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,segments,key
Policy UID,segment_id,Unnamed: 2_level_1,Unnamed: 3_level_1
20,0,<strong> Privacy Policy </strong> <br> <br> <s...,1
20,1,This privacy policy does not apply to Sites ma...,1
20,2,"By visiting our Sites, you are accepting the p...",1
20,3,<strong> What Information Is Collected? </stro...,1
20,4,<strong> Personally Identifiable Information <...,1


In [9]:
#Indexed version of annotations
ann_ind = annotations.set_index(['Policy UID','segment_id'])
ann_ind['key'] = 1 #TFH
ann_ind.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,annotation_id,batch_id,annotator_id,category_name,attributes_value_pairs,date,policy_url,key
Policy UID,segment_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1017,0,20137,test_category_labeling_highlight_fordham_aaaaa,121,Other,"{""Other Type"": {""selectedText"": ""Sci-News.com ...",NaT,http://www.sci-news.com/privacy-policy.html,1
1017,1,20324,test_category_labeling_highlight_fordham_aaaaa,121,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""nformati...",NaT,http://www.sci-news.com/privacy-policy.html,1
1017,1,20325,test_category_labeling_highlight_fordham_aaaaa,121,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""nformati...",NaT,http://www.sci-news.com/privacy-policy.html,1
1017,2,20326,test_category_labeling_highlight_fordham_aaaaa,121,Data Retention,"{""Personal Information Type"": {""selectedText"":...",NaT,http://www.sci-news.com/privacy-policy.html,1
1017,3,20327,test_category_labeling_highlight_fordham_aaaaa,121,First Party Collection/Use,"{""Collection Mode"": {""selectedText"": ""Not sele...",NaT,http://www.sci-news.com/privacy-policy.html,1


In [None]:
#Join the annotations with the segments using an outer join
#joined = pd.merge(annotations,segments,on=['Policy UID','segment_id'],how='outer')
joined = seg_ind.merge(ann_ind)
print(joined.shape)
joined.head()

There are usually instances where a single segment gets multiple categories.

In [173]:
#Are there often cases where there are multiple category values for a single segment?
print(joined.groupby(['Policy UID','segment_id']).agg(lambda x: x.nunique())['category_name'])

Policy UID  segment_id
20          0             1
            1             1
            2             2
            3             1
            4             2
            5             1
            6             2
            7             1
            8             1
            9             2
            10            1
            11            2
            12            1
            13            1
            14            2
            15            1
            16            2
            17            2
            18            1
            19            1
            20            1
            21            2
            22            2
            23            3
            24            1
            25            1
            26            2
            27            2
            28            1
            29            1
                         ..
1713        59            2
            60            3
            61            1
            62           

In [14]:
pids = list(segments['Policy UID'].unique())
categories = pd.DataFrame(columns=['Policy UID','segment_id','category_name'])
for pid in pids:
    df = segments[segments['Policy UID']==pid]
    segs = list(df['segment_id'].unique())
    for seg in segs:
        ann = annotations[(annotations['Policy UID']==pid) & annotations['segment_id']==seg]['category_name']
        cat = ann if ann.shape[0] != 0 else 'None'
        print('Policy: ' + str(pid) + ', Segment: ' + str(seg) + ', Category: ' + str(cat))
        tmp = pd.DataFrame({'Policy UID':[pid], 'segment_id':[seg], 'category_name':[cat]})
        categories = pd.concat([categories,tmp],ignore_index=True)

Policy: 20, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                        User Choice/Control
17                        User Choice/Control
18             User Access, Edit and Deletion
19                                      Other
20                                      Other


Policy: 26, Segment: 38, Category: None
Policy: 26, Segment: 39, Category: None
Policy: 26, Segment: 40, Category: None
Policy: 26, Segment: 41, Category: None
Policy: 26, Segment: 42, Category: None
Policy: 26, Segment: 43, Category: None
Policy: 26, Segment: 44, Category: None
Policy: 26, Segment: 45, Category: None
Policy: 26, Segment: 46, Category: None
Policy: 26, Segment: 47, Category: None
Policy: 26, Segment: 48, Category: None
Policy: 26, Segment: 49, Category: None
Policy: 26, Segment: 50, Category: None
Policy: 26, Segment: 51, Category: None
Policy: 26, Segment: 52, Category: None
Policy: 26, Segment: 53, Category: None
Policy: 26, Segment: 54, Category: None
Policy: 26, Segment: 55, Category: None
Policy: 26, Segment: 56, Category: None
Policy: 26, Segment: 57, Category: None
Policy: 26, Segment: 60, Category: None
Policy: 26, Segment: 61, Category: None
Policy: 26, Segment: 62, Category: None
Policy: 26, Segment: 63, Category: None
Policy: 26, Segment: 64, Category: None


Policy: 33, Segment: 40, Category: None
Policy: 33, Segment: 41, Category: None
Policy: 33, Segment: 42, Category: None
Policy: 33, Segment: 43, Category: None
Policy: 33, Segment: 44, Category: None
Policy: 33, Segment: 46, Category: None
Policy: 33, Segment: 48, Category: None
Policy: 33, Segment: 49, Category: None
Policy: 33, Segment: 50, Category: None
Policy: 33, Segment: 51, Category: None
Policy: 33, Segment: 52, Category: None
Policy: 33, Segment: 53, Category: None
Policy: 33, Segment: 54, Category: None
Policy: 33, Segment: 55, Category: None
Policy: 33, Segment: 57, Category: None
Policy: 33, Segment: 58, Category: None
Policy: 33, Segment: 59, Category: None
Policy: 33, Segment: 60, Category: None
Policy: 33, Segment: 61, Category: None
Policy: 33, Segment: 62, Category: None
Policy: 21, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                           

Policy: 32, Segment: 9, Category: None
Policy: 32, Segment: 10, Category: None
Policy: 32, Segment: 11, Category: None
Policy: 32, Segment: 12, Category: None
Policy: 32, Segment: 13, Category: None
Policy: 32, Segment: 14, Category: None
Policy: 32, Segment: 15, Category: None
Policy: 32, Segment: 16, Category: None
Policy: 32, Segment: 17, Category: None
Policy: 32, Segment: 19, Category: None
Policy: 32, Segment: 20, Category: None
Policy: 32, Segment: 21, Category: None
Policy: 32, Segment: 22, Category: None
Policy: 32, Segment: 23, Category: None
Policy: 32, Segment: 24, Category: None
Policy: 32, Segment: 25, Category: None
Policy: 32, Segment: 26, Category: None
Policy: 32, Segment: 27, Category: None
Policy: 32, Segment: 28, Category: None
Policy: 70, Segment: 23, Category: None
Policy: 70, Segment: 37, Category: None
Policy: 70, Segment: 22, Category: None
Policy: 70, Segment: 19, Category: None
Policy: 70, Segment: 20, Category: None
Policy: 70, Segment: 21, Category: None
P

Policy: 70, Segment: 53, Category: None
Policy: 70, Segment: 54, Category: None
Policy: 70, Segment: 55, Category: None
Policy: 70, Segment: 56, Category: None
Policy: 70, Segment: 57, Category: None
Policy: 70, Segment: 58, Category: None
Policy: 70, Segment: 59, Category: None
Policy: 70, Segment: 60, Category: None
Policy: 70, Segment: 61, Category: None
Policy: 70, Segment: 62, Category: None
Policy: 70, Segment: 63, Category: None
Policy: 70, Segment: 64, Category: None
Policy: 70, Segment: 65, Category: None
Policy: 70, Segment: 66, Category: None
Policy: 70, Segment: 67, Category: None
Policy: 70, Segment: 68, Category: None
Policy: 70, Segment: 69, Category: None
Policy: 70, Segment: 70, Category: None
Policy: 59, Segment: 6, Category: None
Policy: 59, Segment: 5, Category: None
Policy: 59, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                             

Policy: 58, Segment: 30, Category: None
Policy: 58, Segment: 31, Category: None
Policy: 58, Segment: 32, Category: None
Policy: 58, Segment: 33, Category: None
Policy: 58, Segment: 34, Category: None
Policy: 58, Segment: 35, Category: None
Policy: 58, Segment: 36, Category: None
Policy: 58, Segment: 37, Category: None
Policy: 58, Segment: 38, Category: None
Policy: 58, Segment: 39, Category: None
Policy: 58, Segment: 42, Category: None
Policy: 58, Segment: 43, Category: None
Policy: 58, Segment: 44, Category: None
Policy: 58, Segment: 45, Category: None
Policy: 58, Segment: 46, Category: None
Policy: 82, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Us

Policy: 676, Segment: 2, Category: None
Policy: 676, Segment: 3, Category: None
Policy: 676, Segment: 4, Category: None
Policy: 686, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                        User Choice/Control
17                        User Choice/Control
18             Us

Policy: 93, Segment: 13, Category: None
Policy: 93, Segment: 14, Category: None
Policy: 93, Segment: 15, Category: None
Policy: 93, Segment: 16, Category: None
Policy: 93, Segment: 17, Category: None
Policy: 93, Segment: 18, Category: None
Policy: 93, Segment: 19, Category: None
Policy: 93, Segment: 20, Category: None
Policy: 93, Segment: 21, Category: None
Policy: 93, Segment: 22, Category: None
Policy: 93, Segment: 23, Category: None
Policy: 93, Segment: 24, Category: None
Policy: 93, Segment: 25, Category: None
Policy: 93, Segment: 26, Category: None
Policy: 93, Segment: 27, Category: None
Policy: 93, Segment: 28, Category: None
Policy: 93, Segment: 29, Category: None
Policy: 93, Segment: 30, Category: None
Policy: 93, Segment: 31, Category: None
Policy: 93, Segment: 32, Category: None
Policy: 93, Segment: 33, Category: None
Policy: 93, Segment: 34, Category: None
Policy: 93, Segment: 35, Category: None
Policy: 93, Segment: 36, Category: None
Policy: 93, Segment: 37, Category: None


Policy: 98, Segment: 39, Category: None
Policy: 98, Segment: 3, Category: None
Policy: 98, Segment: 4, Category: None
Policy: 98, Segment: 5, Category: None
Policy: 98, Segment: 6, Category: None
Policy: 98, Segment: 7, Category: None
Policy: 98, Segment: 8, Category: None
Policy: 98, Segment: 9, Category: None
Policy: 98, Segment: 10, Category: None
Policy: 98, Segment: 11, Category: None
Policy: 98, Segment: 12, Category: None
Policy: 98, Segment: 13, Category: None
Policy: 98, Segment: 14, Category: None
Policy: 98, Segment: 15, Category: None
Policy: 98, Segment: 16, Category: None
Policy: 98, Segment: 17, Category: None
Policy: 98, Segment: 18, Category: None
Policy: 98, Segment: 19, Category: None
Policy: 98, Segment: 20, Category: None
Policy: 98, Segment: 21, Category: None
Policy: 98, Segment: 22, Category: None
Policy: 98, Segment: 23, Category: None
Policy: 98, Segment: 24, Category: None
Policy: 98, Segment: 25, Category: None
Policy: 98, Segment: 26, Category: None
Policy:

Policy: 105, Segment: 9, Category: None
Policy: 105, Segment: 10, Category: None
Policy: 105, Segment: 11, Category: None
Policy: 105, Segment: 12, Category: None
Policy: 105, Segment: 13, Category: None
Policy: 105, Segment: 30, Category: None
Policy: 105, Segment: 14, Category: None
Policy: 105, Segment: 15, Category: None
Policy: 105, Segment: 16, Category: None
Policy: 105, Segment: 17, Category: None
Policy: 105, Segment: 18, Category: None
Policy: 105, Segment: 19, Category: None
Policy: 105, Segment: 20, Category: None
Policy: 105, Segment: 21, Category: None
Policy: 105, Segment: 22, Category: None
Policy: 105, Segment: 23, Category: None
Policy: 105, Segment: 24, Category: None
Policy: 105, Segment: 25, Category: None
Policy: 105, Segment: 26, Category: None
Policy: 105, Segment: 27, Category: None
Policy: 105, Segment: 28, Category: None
Policy: 105, Segment: 29, Category: None
Policy: 105, Segment: 31, Category: None
Policy: 105, Segment: 32, Category: None
Policy: 105, Segm

Policy: 135, Segment: 6, Category: None
Policy: 135, Segment: 7, Category: None
Policy: 135, Segment: 8, Category: None
Policy: 135, Segment: 37, Category: None
Policy: 135, Segment: 9, Category: None
Policy: 135, Segment: 10, Category: None
Policy: 135, Segment: 11, Category: None
Policy: 135, Segment: 12, Category: None
Policy: 135, Segment: 13, Category: None
Policy: 135, Segment: 14, Category: None
Policy: 135, Segment: 15, Category: None
Policy: 135, Segment: 16, Category: None
Policy: 135, Segment: 17, Category: None
Policy: 135, Segment: 18, Category: None
Policy: 135, Segment: 19, Category: None
Policy: 135, Segment: 20, Category: None
Policy: 135, Segment: 23, Category: None
Policy: 135, Segment: 24, Category: None
Policy: 135, Segment: 25, Category: None
Policy: 135, Segment: 26, Category: None
Policy: 135, Segment: 27, Category: None
Policy: 135, Segment: 28, Category: None
Policy: 135, Segment: 29, Category: None
Policy: 135, Segment: 30, Category: None
Policy: 135, Segment

Policy: 164, Segment: 12, Category: None
Policy: 164, Segment: 13, Category: None
Policy: 164, Segment: 14, Category: None
Policy: 164, Segment: 15, Category: None
Policy: 164, Segment: 16, Category: None
Policy: 186, Segment: 6, Category: None
Policy: 186, Segment: 7, Category: None
Policy: 186, Segment: 8, Category: None
Policy: 186, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Cont

Policy: 175, Segment: 18, Category: None
Policy: 175, Segment: 19, Category: None
Policy: 175, Segment: 20, Category: None
Policy: 175, Segment: 21, Category: None
Policy: 175, Segment: 22, Category: None
Policy: 175, Segment: 23, Category: None
Policy: 175, Segment: 24, Category: None
Policy: 175, Segment: 25, Category: None
Policy: 175, Segment: 26, Category: None
Policy: 175, Segment: 27, Category: None
Policy: 175, Segment: 28, Category: None
Policy: 175, Segment: 29, Category: None
Policy: 175, Segment: 30, Category: None
Policy: 175, Segment: 31, Category: None
Policy: 175, Segment: 32, Category: None
Policy: 175, Segment: 33, Category: None
Policy: 1713, Segment: 24, Category: None
Policy: 1713, Segment: 25, Category: None
Policy: 1713, Segment: 26, Category: None
Policy: 1713, Segment: 27, Category: None
Policy: 1713, Segment: 62, Category: None
Policy: 1713, Segment: 63, Category: None
Policy: 1713, Segment: 64, Category: None
Policy: 1713, Segment: 65, Category: None
Policy: 

Policy: 1713, Segment: 40, Category: None
Policy: 1713, Segment: 41, Category: None
Policy: 1713, Segment: 42, Category: None
Policy: 1713, Segment: 43, Category: None
Policy: 1713, Segment: 44, Category: None
Policy: 1713, Segment: 45, Category: None
Policy: 1713, Segment: 46, Category: None
Policy: 1713, Segment: 47, Category: None
Policy: 1713, Segment: 48, Category: None
Policy: 1713, Segment: 49, Category: None
Policy: 1713, Segment: 50, Category: None
Policy: 1713, Segment: 68, Category: None
Policy: 1713, Segment: 69, Category: None
Policy: 1713, Segment: 51, Category: None
Policy: 1713, Segment: 52, Category: None
Policy: 1713, Segment: 53, Category: None
Policy: 1713, Segment: 54, Category: None
Policy: 1713, Segment: 55, Category: None
Policy: 1713, Segment: 56, Category: None
Policy: 1713, Segment: 57, Category: None
Policy: 1713, Segment: 58, Category: None
Policy: 1713, Segment: 59, Category: None
Policy: 1713, Segment: 60, Category: None
Policy: 1713, Segment: 61, Categor

Policy: 348, Segment: 36, Category: None
Policy: 348, Segment: 37, Category: None
Policy: 348, Segment: 38, Category: None
Policy: 348, Segment: 39, Category: None
Policy: 348, Segment: 40, Category: None
Policy: 348, Segment: 41, Category: None
Policy: 348, Segment: 42, Category: None
Policy: 348, Segment: 43, Category: None
Policy: 348, Segment: 44, Category: None
Policy: 348, Segment: 45, Category: None
Policy: 348, Segment: 46, Category: None
Policy: 348, Segment: 47, Category: None
Policy: 348, Segment: 48, Category: None
Policy: 348, Segment: 49, Category: None
Policy: 348, Segment: 50, Category: None
Policy: 348, Segment: 51, Category: None
Policy: 348, Segment: 52, Category: None
Policy: 348, Segment: 53, Category: None
Policy: 348, Segment: 54, Category: None
Policy: 348, Segment: 55, Category: None
Policy: 348, Segment: 56, Category: None
Policy: 348, Segment: 58, Category: None
Policy: 348, Segment: 59, Category: None
Policy: 348, Segment: 60, Category: None
Policy: 348, Seg

Policy: 517, Segment: 37, Category: None
Policy: 517, Segment: 38, Category: None
Policy: 517, Segment: 39, Category: None
Policy: 517, Segment: 40, Category: None
Policy: 517, Segment: 41, Category: None
Policy: 517, Segment: 42, Category: None
Policy: 517, Segment: 43, Category: None
Policy: 517, Segment: 44, Category: None
Policy: 517, Segment: 45, Category: None
Policy: 517, Segment: 46, Category: None
Policy: 517, Segment: 47, Category: None
Policy: 517, Segment: 48, Category: None
Policy: 517, Segment: 49, Category: None
Policy: 517, Segment: 50, Category: None
Policy: 517, Segment: 51, Category: None
Policy: 517, Segment: 52, Category: None
Policy: 517, Segment: 53, Category: None
Policy: 517, Segment: 54, Category: None
Policy: 517, Segment: 55, Category: None
Policy: 517, Segment: 56, Category: None
Policy: 517, Segment: 57, Category: None
Policy: 517, Segment: 58, Category: None
Policy: 517, Segment: 59, Category: None
Policy: 517, Segment: 60, Category: None
Policy: 517, Seg

Name: category_name, Length: 23157, dtype: object
Policy: 627, Segment: 1, Category: 17164                                   Other
17536                                   Other
17547                                   Other
17551                                   Other
17557          Third Party Sharing/Collection
17561          Third Party Sharing/Collection
17564          Third Party Sharing/Collection
17566          Third Party Sharing/Collection
17567                                   Other
17569          Third Party Sharing/Collection
17573    International and Specific Audiences
17574                            Do Not Track
17575              First Party Collection/Use
17577                                   Other
17581          Third Party Sharing/Collection
17583          Third Party Sharing/Collection
17585              First Party Collection/Use
17587          Third Party Sharing/Collection
17589          Third Party Sharing/Collection
17591    International and Specific Audie

Policy: 635, Segment: 2, Category: None
Policy: 635, Segment: 3, Category: None
Policy: 635, Segment: 4, Category: None
Policy: 635, Segment: 5, Category: None
Policy: 635, Segment: 6, Category: None
Policy: 635, Segment: 7, Category: None
Policy: 635, Segment: 8, Category: None
Policy: 635, Segment: 9, Category: None
Policy: 635, Segment: 10, Category: None
Policy: 635, Segment: 11, Category: None
Policy: 635, Segment: 12, Category: None
Policy: 635, Segment: 13, Category: None
Policy: 635, Segment: 14, Category: None
Policy: 635, Segment: 15, Category: None
Policy: 635, Segment: 16, Category: None
Policy: 635, Segment: 17, Category: None
Policy: 635, Segment: 18, Category: None
Policy: 635, Segment: 19, Category: None
Policy: 635, Segment: 20, Category: None
Policy: 635, Segment: 21, Category: None
Policy: 635, Segment: 22, Category: None
Policy: 635, Segment: 23, Category: None
Policy: 635, Segment: 24, Category: None
Policy: 635, Segment: 26, Category: None
Policy: 635, Segment: 27

Policy: 928, Segment: 6, Category: None
Policy: 928, Segment: 7, Category: None
Policy: 928, Segment: 8, Category: None
Policy: 928, Segment: 9, Category: None
Policy: 928, Segment: 10, Category: None
Policy: 928, Segment: 11, Category: None
Policy: 928, Segment: 12, Category: None
Policy: 928, Segment: 13, Category: None
Policy: 928, Segment: 14, Category: None
Policy: 928, Segment: 15, Category: None
Policy: 928, Segment: 16, Category: None
Policy: 928, Segment: 17, Category: None
Policy: 928, Segment: 18, Category: None
Policy: 928, Segment: 19, Category: None
Policy: 928, Segment: 20, Category: None
Policy: 928, Segment: 21, Category: None
Policy: 928, Segment: 22, Category: None
Policy: 928, Segment: 23, Category: None
Policy: 928, Segment: 24, Category: None
Policy: 928, Segment: 25, Category: None
Policy: 928, Segment: 26, Category: None
Policy: 928, Segment: 27, Category: None
Policy: 928, Segment: 28, Category: None
Policy: 928, Segment: 29, Category: None
Policy: 928, Segment

Policy: 940, Segment: 44, Category: None
Policy: 940, Segment: 45, Category: None
Policy: 940, Segment: 46, Category: None
Policy: 940, Segment: 47, Category: None
Policy: 940, Segment: 48, Category: None
Policy: 940, Segment: 49, Category: None
Policy: 940, Segment: 50, Category: None
Policy: 940, Segment: 51, Category: None
Policy: 940, Segment: 52, Category: None
Policy: 940, Segment: 53, Category: None
Policy: 940, Segment: 54, Category: None
Policy: 940, Segment: 55, Category: None
Policy: 940, Segment: 56, Category: None
Policy: 940, Segment: 61, Category: None
Policy: 940, Segment: 62, Category: None
Policy: 940, Segment: 63, Category: None
Policy: 940, Segment: 64, Category: None
Policy: 940, Segment: 65, Category: None
Policy: 940, Segment: 66, Category: None
Policy: 940, Segment: 67, Category: None
Policy: 940, Segment: 68, Category: None
Policy: 940, Segment: 69, Category: None
Policy: 940, Segment: 70, Category: None
Policy: 940, Segment: 71, Category: None
Policy: 940, Seg

Policy: 200, Segment: 18, Category: None
Policy: 200, Segment: 19, Category: None
Policy: 200, Segment: 20, Category: None
Policy: 200, Segment: 21, Category: None
Policy: 200, Segment: 22, Category: None
Policy: 200, Segment: 23, Category: None
Policy: 200, Segment: 24, Category: None
Policy: 200, Segment: 25, Category: None
Policy: 200, Segment: 26, Category: None
Policy: 200, Segment: 27, Category: None
Policy: 200, Segment: 28, Category: None
Policy: 200, Segment: 29, Category: None
Policy: 200, Segment: 30, Category: None
Policy: 200, Segment: 31, Category: None
Policy: 200, Segment: 32, Category: None
Policy: 200, Segment: 33, Category: None
Policy: 200, Segment: 34, Category: None
Policy: 200, Segment: 35, Category: None
Policy: 200, Segment: 36, Category: None
Policy: 200, Segment: 37, Category: None
Policy: 200, Segment: 38, Category: None
Policy: 200, Segment: 39, Category: None
Policy: 200, Segment: 40, Category: None
Policy: 200, Segment: 41, Category: None
Policy: 200, Seg

Policy: 202, Segment: 8, Category: None
Policy: 202, Segment: 9, Category: None
Policy: 202, Segment: 10, Category: None
Policy: 202, Segment: 11, Category: None
Policy: 202, Segment: 12, Category: None
Policy: 202, Segment: 13, Category: None
Policy: 202, Segment: 14, Category: None
Policy: 202, Segment: 15, Category: None
Policy: 202, Segment: 16, Category: None
Policy: 202, Segment: 17, Category: None
Policy: 202, Segment: 18, Category: None
Policy: 202, Segment: 19, Category: None
Policy: 202, Segment: 20, Category: None
Policy: 202, Segment: 21, Category: None
Policy: 202, Segment: 22, Category: None
Policy: 202, Segment: 23, Category: None
Policy: 202, Segment: 24, Category: None
Policy: 202, Segment: 25, Category: None
Policy: 207, Segment: 18, Category: None
Policy: 207, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4   

Policy: 414, Segment: 18, Category: None
Policy: 414, Segment: 19, Category: None
Policy: 414, Segment: 20, Category: None
Policy: 414, Segment: 21, Category: None
Policy: 414, Segment: 22, Category: None
Policy: 228, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                      

Policy: 1636, Segment: 2, Category: None
Policy: 1636, Segment: 3, Category: None
Policy: 1636, Segment: 4, Category: None
Policy: 1636, Segment: 5, Category: None
Policy: 1636, Segment: 6, Category: None
Policy: 1636, Segment: 7, Category: None
Policy: 1636, Segment: 8, Category: None
Policy: 1636, Segment: 9, Category: None
Policy: 1636, Segment: 10, Category: None
Policy: 1636, Segment: 30, Category: None
Policy: 1636, Segment: 11, Category: None
Policy: 1636, Segment: 12, Category: None
Policy: 1636, Segment: 13, Category: None
Policy: 1636, Segment: 14, Category: None
Policy: 1636, Segment: 15, Category: None
Policy: 1636, Segment: 16, Category: None
Policy: 1636, Segment: 17, Category: None
Policy: 1636, Segment: 18, Category: None
Policy: 1636, Segment: 19, Category: None
Policy: 1636, Segment: 20, Category: None
Policy: 1636, Segment: 21, Category: None
Policy: 1636, Segment: 22, Category: None
Policy: 1636, Segment: 23, Category: None
Policy: 1636, Segment: 24, Category: None


Policy: 303, Segment: 24, Category: None
Policy: 303, Segment: 25, Category: None
Policy: 303, Segment: 26, Category: None
Policy: 303, Segment: 27, Category: None
Policy: 303, Segment: 28, Category: None
Policy: 303, Segment: 29, Category: None
Policy: 303, Segment: 30, Category: None
Policy: 303, Segment: 53, Category: None
Policy: 303, Segment: 31, Category: None
Policy: 303, Segment: 32, Category: None
Policy: 303, Segment: 33, Category: None
Policy: 303, Segment: 34, Category: None
Policy: 303, Segment: 35, Category: None
Policy: 303, Segment: 36, Category: None
Policy: 303, Segment: 37, Category: None
Policy: 303, Segment: 38, Category: None
Policy: 303, Segment: 39, Category: None
Policy: 303, Segment: 40, Category: None
Policy: 303, Segment: 41, Category: None
Policy: 303, Segment: 42, Category: None
Policy: 303, Segment: 43, Category: None
Policy: 303, Segment: 44, Category: None
Policy: 303, Segment: 45, Category: None
Policy: 303, Segment: 46, Category: None
Policy: 303, Seg

Policy: 331, Segment: 2, Category: None
Policy: 331, Segment: 3, Category: None
Policy: 331, Segment: 4, Category: None
Policy: 331, Segment: 5, Category: None
Policy: 331, Segment: 6, Category: None
Policy: 331, Segment: 7, Category: None
Policy: 331, Segment: 8, Category: None
Policy: 331, Segment: 9, Category: None
Policy: 331, Segment: 10, Category: None
Policy: 331, Segment: 12, Category: None
Policy: 331, Segment: 13, Category: None
Policy: 331, Segment: 14, Category: None
Policy: 331, Segment: 15, Category: None
Policy: 331, Segment: 16, Category: None
Policy: 325, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Spec

Policy: 359, Segment: 1, Category: 1813                                    Other
14027                                   Other
14030                                   Other
14034              First Party Collection/Use
14035                                   Other
14037                     User Choice/Control
14038              First Party Collection/Use
14047              First Party Collection/Use
14048              First Party Collection/Use
14049              First Party Collection/Use
14050              First Party Collection/Use
14051              First Party Collection/Use
14055              First Party Collection/Use
14056              First Party Collection/Use
14059              First Party Collection/Use
14060              First Party Collection/Use
14063                     User Choice/Control
14064                     User Choice/Control
14065                     User Choice/Control
14071                     User Choice/Control
14072              First Party Collection/Use

Policy: 453, Segment: 10, Category: None
Policy: 453, Segment: 25, Category: None
Policy: 453, Segment: 26, Category: None
Policy: 453, Segment: 11, Category: None
Policy: 453, Segment: 12, Category: None
Policy: 453, Segment: 13, Category: None
Policy: 453, Segment: 14, Category: None
Policy: 453, Segment: 15, Category: None
Policy: 453, Segment: 16, Category: None
Policy: 453, Segment: 17, Category: None
Policy: 453, Segment: 18, Category: None
Policy: 453, Segment: 19, Category: None
Policy: 453, Segment: 20, Category: None
Policy: 453, Segment: 21, Category: None
Policy: 453, Segment: 22, Category: None
Policy: 453, Segment: 23, Category: None
Policy: 453, Segment: 24, Category: None
Policy: 453, Segment: 27, Category: None
Policy: 453, Segment: 29, Category: None
Policy: 453, Segment: 30, Category: None
Policy: 453, Segment: 31, Category: None
Policy: 453, Segment: 32, Category: None
Policy: 453, Segment: 33, Category: None
Policy: 453, Segment: 34, Category: None
Policy: 453, Seg

Policy: 481, Segment: 7, Category: None
Policy: 481, Segment: 8, Category: None
Policy: 481, Segment: 9, Category: None
Policy: 481, Segment: 10, Category: None
Policy: 481, Segment: 11, Category: None
Policy: 481, Segment: 12, Category: None
Policy: 481, Segment: 13, Category: None
Policy: 481, Segment: 14, Category: None
Policy: 481, Segment: 15, Category: None
Policy: 481, Segment: 16, Category: None
Policy: 481, Segment: 17, Category: None
Policy: 481, Segment: 18, Category: None
Policy: 456, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10              

Policy: 503, Segment: 15, Category: None
Policy: 503, Segment: 16, Category: None
Policy: 503, Segment: 17, Category: None
Policy: 503, Segment: 18, Category: None
Policy: 503, Segment: 19, Category: None
Policy: 962, Segment: 13, Category: None
Policy: 962, Segment: 34, Category: None
Policy: 962, Segment: 35, Category: None
Policy: 962, Segment: 36, Category: None
Policy: 962, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Othe

Policy: 962, Segment: 61, Category: None
Policy: 962, Segment: 62, Category: None
Policy: 962, Segment: 63, Category: None
Policy: 962, Segment: 64, Category: None
Policy: 962, Segment: 65, Category: None
Policy: 962, Segment: 66, Category: None
Policy: 962, Segment: 67, Category: None
Policy: 962, Segment: 68, Category: None
Policy: 962, Segment: 69, Category: None
Policy: 962, Segment: 70, Category: None
Policy: 962, Segment: 71, Category: None
Policy: 962, Segment: 72, Category: None
Policy: 962, Segment: 73, Category: None
Policy: 531, Segment: 36, Category: None
Policy: 531, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International 

Policy: 1070, Segment: 1, Category: 360                            Policy Change
374                            Policy Change
874                      User Choice/Control
987                            Policy Change
993                            Policy Change
1311                     User Choice/Control
1324                                   Other
1325              First Party Collection/Use
1326              First Party Collection/Use
1327              First Party Collection/Use
1328              First Party Collection/Use
1329              First Party Collection/Use
1330              First Party Collection/Use
1331              First Party Collection/Use
1332                                   Other
1333                                   Other
1335                     User Choice/Control
1338              First Party Collection/Use
1339              First Party Collection/Use
1340              First Party Collection/Use
1341              First Party Collection/Use
1343              F

Policy: 571, Segment: 4, Category: None
Policy: 571, Segment: 5, Category: None
Policy: 571, Segment: 6, Category: None
Policy: 571, Segment: 7, Category: None
Policy: 571, Segment: 8, Category: None
Policy: 571, Segment: 9, Category: None
Policy: 571, Segment: 10, Category: None
Policy: 571, Segment: 11, Category: None
Policy: 541, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control

Policy: 559, Segment: 13, Category: None
Policy: 559, Segment: 14, Category: None
Policy: 559, Segment: 15, Category: None
Policy: 559, Segment: 16, Category: None
Policy: 559, Segment: 17, Category: None
Policy: 559, Segment: 18, Category: None
Policy: 559, Segment: 19, Category: None
Policy: 559, Segment: 20, Category: None
Policy: 559, Segment: 21, Category: None
Policy: 559, Segment: 22, Category: None
Policy: 559, Segment: 23, Category: None
Policy: 581, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11      

Policy: 586, Segment: 2, Category: None
Policy: 586, Segment: 3, Category: None
Policy: 586, Segment: 4, Category: None
Policy: 586, Segment: 5, Category: None
Policy: 586, Segment: 6, Category: None
Policy: 586, Segment: 7, Category: None
Policy: 586, Segment: 8, Category: None
Policy: 586, Segment: 9, Category: None
Policy: 586, Segment: 10, Category: None
Policy: 586, Segment: 11, Category: None
Policy: 586, Segment: 12, Category: None
Policy: 586, Segment: 13, Category: None
Policy: 586, Segment: 14, Category: None
Policy: 586, Segment: 15, Category: None
Policy: 586, Segment: 16, Category: None
Policy: 586, Segment: 17, Category: None
Policy: 586, Segment: 18, Category: None
Policy: 586, Segment: 19, Category: None
Policy: 586, Segment: 20, Category: None
Policy: 586, Segment: 21, Category: None
Policy: 586, Segment: 22, Category: None
Policy: 586, Segment: 23, Category: None
Policy: 586, Segment: 24, Category: None
Policy: 586, Segment: 25, Category: None
Policy: 586, Segment: 26

Policy: 1300, Segment: 36, Category: None
Policy: 1300, Segment: 37, Category: None
Policy: 1300, Segment: 38, Category: None
Policy: 1300, Segment: 39, Category: None
Policy: 1300, Segment: 40, Category: None
Policy: 1300, Segment: 41, Category: None
Policy: 1300, Segment: 42, Category: None
Policy: 1300, Segment: 43, Category: None
Policy: 1300, Segment: 44, Category: None
Policy: 1300, Segment: 45, Category: None
Policy: 1300, Segment: 46, Category: None
Policy: 1300, Segment: 47, Category: None
Policy: 1300, Segment: 48, Category: None
Policy: 1300, Segment: 49, Category: None
Policy: 1300, Segment: 74, Category: None
Policy: 1300, Segment: 50, Category: None
Policy: 1300, Segment: 51, Category: None
Policy: 1300, Segment: 52, Category: None
Policy: 1300, Segment: 53, Category: None
Policy: 1300, Segment: 54, Category: None
Policy: 1300, Segment: 55, Category: None
Policy: 1300, Segment: 56, Category: None
Policy: 1300, Segment: 57, Category: None
Policy: 1300, Segment: 58, Categor

Policy: 640, Segment: 18, Category: None
Policy: 640, Segment: 19, Category: None
Policy: 640, Segment: 20, Category: None
Policy: 640, Segment: 21, Category: None
Policy: 640, Segment: 22, Category: None
Policy: 640, Segment: 23, Category: None
Policy: 640, Segment: 24, Category: None
Policy: 640, Segment: 25, Category: None
Policy: 640, Segment: 26, Category: None
Policy: 640, Segment: 27, Category: None
Policy: 640, Segment: 28, Category: None
Policy: 640, Segment: 29, Category: None
Policy: 640, Segment: 30, Category: None
Policy: 640, Segment: 31, Category: None
Policy: 640, Segment: 32, Category: None
Policy: 640, Segment: 33, Category: None
Policy: 640, Segment: 34, Category: None
Policy: 640, Segment: 35, Category: None
Policy: 640, Segment: 36, Category: None
Policy: 640, Segment: 37, Category: None
Policy: 640, Segment: 38, Category: None
Policy: 640, Segment: 39, Category: None
Policy: 640, Segment: 40, Category: None
Policy: 640, Segment: 41, Category: None
Policy: 641, Seg

Policy: 642, Segment: 1, Category: 17979        First Party Collection/Use
18003                     Policy Change
18023                     Policy Change
18053                     Policy Change
18202                             Other
18203                             Other
18204                             Other
18226        First Party Collection/Use
18227        First Party Collection/Use
18228        First Party Collection/Use
18229        First Party Collection/Use
18230        First Party Collection/Use
18231        First Party Collection/Use
18232        First Party Collection/Use
18233        First Party Collection/Use
18234        First Party Collection/Use
18242        First Party Collection/Use
18243        First Party Collection/Use
18244        First Party Collection/Use
18245        First Party Collection/Use
18246        First Party Collection/Use
18248        First Party Collection/Use
18249        First Party Collection/Use
18250        First Party Collection/Use
18251

Policy: 652, Segment: 12, Category: None
Policy: 652, Segment: 13, Category: None
Policy: 652, Segment: 14, Category: None
Policy: 652, Segment: 15, Category: None
Policy: 652, Segment: 16, Category: None
Policy: 652, Segment: 17, Category: None
Policy: 652, Segment: 18, Category: None
Policy: 652, Segment: 19, Category: None
Policy: 652, Segment: 20, Category: None
Policy: 652, Segment: 21, Category: None
Policy: 652, Segment: 22, Category: None
Policy: 652, Segment: 23, Category: None
Policy: 652, Segment: 24, Category: None
Policy: 652, Segment: 25, Category: None
Policy: 652, Segment: 26, Category: None
Policy: 652, Segment: 27, Category: None
Policy: 652, Segment: 28, Category: None
Policy: 652, Segment: 29, Category: None
Policy: 652, Segment: 30, Category: None
Policy: 652, Segment: 31, Category: None
Policy: 652, Segment: 32, Category: None
Policy: 652, Segment: 33, Category: None
Policy: 652, Segment: 34, Category: None
Policy: 652, Segment: 35, Category: None
Policy: 652, Seg

Policy: 746, Segment: 10, Category: None
Policy: 746, Segment: 11, Category: None
Policy: 746, Segment: 12, Category: None
Policy: 746, Segment: 13, Category: None
Policy: 746, Segment: 14, Category: None
Policy: 746, Segment: 15, Category: None
Policy: 746, Segment: 16, Category: None
Policy: 746, Segment: 17, Category: None
Policy: 746, Segment: 18, Category: None
Policy: 746, Segment: 19, Category: None
Policy: 746, Segment: 20, Category: None
Policy: 746, Segment: 21, Category: None
Policy: 746, Segment: 22, Category: None
Policy: 746, Segment: 23, Category: None
Policy: 746, Segment: 24, Category: None
Policy: 746, Segment: 25, Category: None
Policy: 746, Segment: 26, Category: None
Policy: 746, Segment: 27, Category: None
Policy: 746, Segment: 28, Category: None
Policy: 746, Segment: 29, Category: None
Policy: 746, Segment: 30, Category: None
Policy: 746, Segment: 31, Category: None
Policy: 760, Segment: 14, Category: None
Policy: 760, Segment: 0, Category: 0                     

Policy: 807, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                        User Choice/Control
17                        User Choice/Control
18             User Access, Edit and Deletion
19                                      Other
20                                      Other

Policy: 817, Segment: 4, Category: None
Policy: 817, Segment: 5, Category: None
Policy: 817, Segment: 6, Category: None
Policy: 856, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                        User Choice/Control
17                        User Choice/Control
18             Us

Policy: 891, Segment: 2, Category: None
Policy: 891, Segment: 3, Category: None
Policy: 891, Segment: 4, Category: None
Policy: 891, Segment: 5, Category: None
Policy: 891, Segment: 6, Category: None
Policy: 891, Segment: 7, Category: None
Policy: 891, Segment: 8, Category: None
Policy: 891, Segment: 9, Category: None
Policy: 891, Segment: 10, Category: None
Policy: 891, Segment: 11, Category: None
Policy: 891, Segment: 12, Category: None
Policy: 891, Segment: 13, Category: None
Policy: 891, Segment: 14, Category: None
Policy: 891, Segment: 15, Category: None
Policy: 891, Segment: 16, Category: None
Policy: 891, Segment: 17, Category: None
Policy: 891, Segment: 18, Category: None
Policy: 891, Segment: 19, Category: None
Policy: 891, Segment: 20, Category: None
Policy: 891, Segment: 21, Category: None
Policy: 891, Segment: 22, Category: None
Policy: 891, Segment: 23, Category: None
Policy: 891, Segment: 24, Category: None
Policy: 891, Segment: 25, Category: None
Policy: 891, Segment: 26

Policy: 1306, Segment: 15, Category: None
Policy: 1306, Segment: 16, Category: None
Policy: 1306, Segment: 17, Category: None
Policy: 1306, Segment: 18, Category: None
Policy: 1306, Segment: 19, Category: None
Policy: 1306, Segment: 20, Category: None
Policy: 1306, Segment: 21, Category: None
Policy: 1306, Segment: 22, Category: None
Policy: 1306, Segment: 23, Category: None
Policy: 1306, Segment: 24, Category: None
Policy: 1017, Segment: 0, Category: 0                                       Other
3                              Data Retention
8        International and Specific Audiences
9                                       Other
11                                      Other
13                        User Choice/Control
15                                      Other
16                        User Choice/Control
17                        User Choice/Control
20                                      Other
21                                      Other
23                             Data Re

Name: category_name, Length: 23003, dtype: object
Policy: 1050, Segment: 1, Category: 579                                    Other
580                                    Other
586                      User Choice/Control
587               First Party Collection/Use
588                                    Other
593               First Party Collection/Use
594               First Party Collection/Use
595               First Party Collection/Use
596               First Party Collection/Use
598               First Party Collection/Use
608               First Party Collection/Use
609               First Party Collection/Use
614               First Party Collection/Use
615               First Party Collection/Use
616                                    Other
618           Third Party Sharing/Collection
619                      User Choice/Control
628               First Party Collection/Use
629               First Party Collection/Use
630           Third Party Sharing/Collection
631           

Policy: 1083, Segment: 14, Category: None
Policy: 1083, Segment: 15, Category: None
Policy: 1083, Segment: 16, Category: None
Policy: 1083, Segment: 17, Category: None
Policy: 1083, Segment: 18, Category: None
Policy: 1083, Segment: 19, Category: None
Policy: 1083, Segment: 20, Category: None
Policy: 1083, Segment: 21, Category: None
Policy: 1083, Segment: 22, Category: None
Policy: 1083, Segment: 25, Category: None
Policy: 1083, Segment: 26, Category: None
Policy: 1083, Segment: 27, Category: None
Policy: 1083, Segment: 28, Category: None
Policy: 1083, Segment: 29, Category: None
Policy: 1083, Segment: 30, Category: None
Policy: 1083, Segment: 31, Category: None
Policy: 1164, Segment: 1, Category: 844                                    Other
1826                                   Other
2168    International and Specific Audiences
2526                                   Other
2527                           Policy Change
2550    International and Specific Audiences
2551              Firs

Policy: 1089, Segment: 17, Category: None
Policy: 1089, Segment: 19, Category: None
Policy: 1089, Segment: 20, Category: None
Policy: 1089, Segment: 21, Category: None
Policy: 1089, Segment: 22, Category: None
Policy: 1089, Segment: 23, Category: None
Policy: 1089, Segment: 24, Category: None
Policy: 1089, Segment: 25, Category: None
Policy: 1089, Segment: 26, Category: None
Policy: 1089, Segment: 27, Category: None
Policy: 1089, Segment: 28, Category: None
Policy: 1089, Segment: 29, Category: None
Policy: 1089, Segment: 30, Category: None
Policy: 1089, Segment: 31, Category: None
Policy: 1089, Segment: 32, Category: None
Policy: 1099, Segment: 12, Category: None
Policy: 1099, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6               

Policy: 1224, Segment: 11, Category: None
Policy: 1224, Segment: 12, Category: None
Policy: 1224, Segment: 13, Category: None
Policy: 1224, Segment: 14, Category: None
Policy: 1224, Segment: 15, Category: None
Policy: 1224, Segment: 16, Category: None
Policy: 1224, Segment: 17, Category: None
Policy: 1224, Segment: 18, Category: None
Policy: 1106, Segment: 2, Category: None
Policy: 1106, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                 

Policy: 1259, Segment: 19, Category: None
Policy: 1259, Segment: 20, Category: None
Policy: 1259, Segment: 21, Category: None
Policy: 1259, Segment: 22, Category: None
Policy: 1259, Segment: 23, Category: None
Policy: 1259, Segment: 24, Category: None
Policy: 1259, Segment: 25, Category: None
Policy: 1259, Segment: 26, Category: None
Policy: 1259, Segment: 27, Category: None
Policy: 1259, Segment: 28, Category: None
Policy: 1259, Segment: 29, Category: None
Policy: 1259, Segment: 30, Category: None
Policy: 1259, Segment: 31, Category: None
Policy: 1259, Segment: 32, Category: None
Policy: 1259, Segment: 33, Category: None
Policy: 1259, Segment: 34, Category: None
Policy: 1259, Segment: 35, Category: None
Policy: 1259, Segment: 36, Category: None
Policy: 1259, Segment: 37, Category: None
Policy: 1259, Segment: 38, Category: None
Policy: 1259, Segment: 39, Category: None
Policy: 1259, Segment: 40, Category: None
Policy: 1259, Segment: 41, Category: None
Policy: 1259, Segment: 42, Categor

Name: category_name, Length: 23142, dtype: object
Policy: 1261, Segment: 1, Category: 3330                     User Choice/Control
3480                     User Choice/Control
3494                     User Choice/Control
3549                                   Other
3550              First Party Collection/Use
3551                     User Choice/Control
3552          Third Party Sharing/Collection
3553              First Party Collection/Use
3554          Third Party Sharing/Collection
3557                                   Other
3559              First Party Collection/Use
3560          Third Party Sharing/Collection
3561                     User Choice/Control
3562                     User Choice/Control
3563          Third Party Sharing/Collection
3573                     User Choice/Control
3574          Third Party Sharing/Collection
3575              First Party Collection/Use
3576                     User Choice/Control
3577              First Party Collection/Use
3578          

Policy: 1360, Segment: 2, Category: None
Policy: 1360, Segment: 3, Category: None
Policy: 1360, Segment: 4, Category: None
Policy: 1360, Segment: 5, Category: None
Policy: 1360, Segment: 6, Category: None
Policy: 1360, Segment: 7, Category: None
Policy: 1360, Segment: 8, Category: None
Policy: 1360, Segment: 9, Category: None
Policy: 1360, Segment: 10, Category: None
Policy: 1360, Segment: 11, Category: None
Policy: 1360, Segment: 12, Category: None
Policy: 1360, Segment: 13, Category: None
Policy: 1360, Segment: 16, Category: None
Policy: 1360, Segment: 17, Category: None
Policy: 1360, Segment: 18, Category: None
Policy: 1419, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  F

Policy: 1468, Segment: 20, Category: None
Policy: 1468, Segment: 21, Category: None
Policy: 1468, Segment: 22, Category: None
Policy: 1468, Segment: 23, Category: None
Policy: 1468, Segment: 24, Category: None
Policy: 1468, Segment: 25, Category: None
Policy: 1468, Segment: 26, Category: None
Policy: 1468, Segment: 27, Category: None
Policy: 1468, Segment: 28, Category: None
Policy: 1468, Segment: 29, Category: None
Policy: 1468, Segment: 30, Category: None
Policy: 1468, Segment: 31, Category: None
Policy: 1468, Segment: 32, Category: None
Policy: 1468, Segment: 33, Category: None
Policy: 1468, Segment: 34, Category: None
Policy: 1468, Segment: 35, Category: None
Policy: 1468, Segment: 36, Category: None
Policy: 1468, Segment: 38, Category: None
Policy: 1468, Segment: 39, Category: None
Policy: 1468, Segment: 40, Category: None
Policy: 1468, Segment: 41, Category: None
Policy: 1468, Segment: 42, Category: None
Policy: 1468, Segment: 43, Category: None
Policy: 1468, Segment: 44, Categor

Policy: 1498, Segment: 36, Category: None
Policy: 1498, Segment: 37, Category: None
Policy: 1498, Segment: 38, Category: None
Policy: 1498, Segment: 39, Category: None
Policy: 1470, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Other
12                                      Other
13                        User Choice/Control
14             Third Party Sharing/Collection
15                                      Other
16                        User Choice/Control
17            

Policy: 1510, Segment: 8, Category: None
Policy: 1510, Segment: 9, Category: None
Policy: 1510, Segment: 10, Category: None
Policy: 1510, Segment: 11, Category: None
Policy: 1510, Segment: 12, Category: None
Policy: 1510, Segment: 13, Category: None
Policy: 1510, Segment: 14, Category: None
Policy: 1510, Segment: 15, Category: None
Policy: 1510, Segment: 16, Category: None
Policy: 1510, Segment: 17, Category: None
Policy: 1510, Segment: 18, Category: None
Policy: 1510, Segment: 19, Category: None
Policy: 1510, Segment: 20, Category: None
Policy: 1510, Segment: 21, Category: None
Policy: 1510, Segment: 22, Category: None
Policy: 1510, Segment: 23, Category: None
Policy: 1510, Segment: 24, Category: None
Policy: 1510, Segment: 25, Category: None
Policy: 1510, Segment: 26, Category: None
Policy: 1510, Segment: 27, Category: None
Policy: 1510, Segment: 28, Category: None
Policy: 1510, Segment: 29, Category: None
Policy: 1510, Segment: 30, Category: None
Policy: 1510, Segment: 31, Category:

Policy: 1539, Segment: 2, Category: None
Policy: 1539, Segment: 3, Category: None
Policy: 1539, Segment: 4, Category: None
Policy: 1539, Segment: 5, Category: None
Policy: 1539, Segment: 6, Category: None
Policy: 1539, Segment: 7, Category: None
Policy: 1539, Segment: 8, Category: None
Policy: 1539, Segment: 9, Category: None
Policy: 1539, Segment: 10, Category: None
Policy: 1539, Segment: 11, Category: None
Policy: 1539, Segment: 12, Category: None
Policy: 1539, Segment: 14, Category: None
Policy: 1539, Segment: 15, Category: None
Policy: 1539, Segment: 16, Category: None
Policy: 1539, Segment: 17, Category: None
Policy: 1539, Segment: 18, Category: None
Policy: 1539, Segment: 19, Category: None
Policy: 1539, Segment: 20, Category: None
Policy: 1539, Segment: 21, Category: None
Policy: 1539, Segment: 22, Category: None
Policy: 1539, Segment: 23, Category: None
Policy: 1539, Segment: 24, Category: None
Policy: 1539, Segment: 25, Category: None
Policy: 1539, Segment: 26, Category: None


Policy: 1545, Segment: 17, Category: None
Policy: 1545, Segment: 18, Category: None
Policy: 1545, Segment: 19, Category: None
Policy: 1545, Segment: 20, Category: None
Policy: 1545, Segment: 21, Category: None
Policy: 1545, Segment: 22, Category: None
Policy: 1545, Segment: 23, Category: None
Policy: 1545, Segment: 24, Category: None
Policy: 1545, Segment: 25, Category: None
Policy: 1545, Segment: 26, Category: None
Policy: 1545, Segment: 27, Category: None
Policy: 1618, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/

Policy: 1673, Segment: 17, Category: None
Policy: 1673, Segment: 18, Category: None
Policy: 1673, Segment: 19, Category: None
Policy: 1673, Segment: 20, Category: None
Policy: 1673, Segment: 21, Category: None
Policy: 1673, Segment: 22, Category: None
Policy: 1673, Segment: 23, Category: None
Policy: 1673, Segment: 24, Category: None
Policy: 1673, Segment: 25, Category: None
Policy: 1673, Segment: 26, Category: None
Policy: 1673, Segment: 27, Category: None
Policy: 1673, Segment: 28, Category: None
Policy: 1673, Segment: 29, Category: None
Policy: 1673, Segment: 30, Category: None
Policy: 1673, Segment: 31, Category: None
Policy: 1673, Segment: 32, Category: None
Policy: 1673, Segment: 34, Category: None
Policy: 1673, Segment: 35, Category: None
Policy: 1673, Segment: 36, Category: None
Policy: 1673, Segment: 37, Category: None
Policy: 1673, Segment: 38, Category: None
Policy: 1673, Segment: 39, Category: None
Policy: 1673, Segment: 40, Category: None
Policy: 1673, Segment: 41, Categor

Policy: 1637, Segment: 5, Category: None
Policy: 1637, Segment: 6, Category: None
Policy: 1637, Segment: 7, Category: None
Policy: 1637, Segment: 8, Category: None
Policy: 1637, Segment: 9, Category: None
Policy: 1637, Segment: 10, Category: None
Policy: 1637, Segment: 11, Category: None
Policy: 1637, Segment: 12, Category: None
Policy: 1637, Segment: 13, Category: None
Policy: 1637, Segment: 14, Category: None
Policy: 1666, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  First Party Collection/Use
6                  First Party Collection/Use
7                  First Party Collection/Use
8        International and Specific Audiences
9                                       Other
10                 First Party Collection/Use
11                                      Oth

Policy: 1683, Segment: 16, Category: None
Policy: 1683, Segment: 17, Category: None
Policy: 1683, Segment: 18, Category: None
Policy: 1683, Segment: 19, Category: None
Policy: 1683, Segment: 20, Category: None
Policy: 1683, Segment: 21, Category: None
Policy: 1683, Segment: 22, Category: None
Policy: 1683, Segment: 23, Category: None
Policy: 1683, Segment: 24, Category: None
Policy: 1683, Segment: 25, Category: None
Policy: 1683, Segment: 26, Category: None
Policy: 1683, Segment: 27, Category: None
Policy: 1683, Segment: 28, Category: None
Policy: 1683, Segment: 29, Category: None
Policy: 1683, Segment: 30, Category: None
Policy: 1683, Segment: 31, Category: None
Policy: 1683, Segment: 32, Category: None
Policy: 1703, Segment: 0, Category: 0                                       Other
1                  First Party Collection/Use
2                  First Party Collection/Use
3                              Data Retention
4                  First Party Collection/Use
5                  F

This mess above yields the same answer as when using the join and agg method below.

In [17]:
categories.head()

Unnamed: 0,Policy UID,category_name,segment_id
0,20,0 Other ...,0
1,20,11178 Data Retention ...,1
2,20,,2
3,20,,3
4,20,,4


In [106]:
#Set up table in which categories are assigned to each unique document segment. Those that aren't
#tagged are labeled as "None".
#Join category name to segments and determine the mode of each category, concat back onto original segments
tmp = pd.merge(segments,annotations,on=['Policy UID','segment_id'],how='outer')
tmp['category_name'].fillna(value='None')
tmp2 = pd.DataFrame(tmp.groupby(by=['Policy UID','segment_id']).count())
print(tmp2.shape)
tmp2.head(100)#['category_name'].agg(lambda x: x.value_counts().index[0]))

(3792, 8)


Unnamed: 0_level_0,Unnamed: 1_level_0,segments,annotation_id,batch_id,annotator_id,category_name,attributes_value_pairs,date,policy_url
Policy UID,segment_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
20,0,3,3,3,3,3,3,3,3
20,1,3,3,3,3,3,3,3,3
20,2,7,7,7,7,7,7,7,7
20,3,13,13,13,13,13,13,13,13
20,4,5,5,5,5,5,5,5,5
20,5,16,16,16,16,16,16,16,16
20,6,10,10,10,10,10,10,10,10
20,7,7,7,7,7,7,7,7,7
20,8,4,4,4,4,4,4,4,4
20,9,7,7,7,7,7,7,7,7


In [85]:
tmp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40133 entries, 0 to 40132
Data columns (total 10 columns):
Policy UID                40133 non-null int64
segment_id                40133 non-null int64
segments                  40133 non-null object
annotation_id             40133 non-null int64
batch_id                  40133 non-null object
annotator_id              40133 non-null int64
category_name             40133 non-null object
attributes_value_pairs    40133 non-null object
date                      36399 non-null datetime64[ns]
policy_url                40133 non-null object
dtypes: datetime64[ns](1), int64(4), object(5)
memory usage: 3.4+ MB


In [60]:
categories = list(tmp2['category_name'].unique())
cols = {'Other': 'other',
        'Policy Change': 'policy_change',
        'First Party Collection/Use': 'first_party_collection_use',
        'Third Party Sharing/Collection': 'third_party_sharing_collection',
        'Do Not Track': 'do_not_track',
        'User Choice/Control': 'user_choice_control',
        'International and Specific Audiences': 'international_specific_audiences',
        'Data Security': 'data_security',
        'Data Retention': 'data_retention',
        'User Access, Edit and Deletion': 'user_access_edit_deletion'}

## Segment-level categorization
Loop through categories and generate one-hot encoding of each category and stick them into new columns with the corresponding names in cols

In [61]:
#Loop through the categories and generate a set of new columns with names in cols
binary_categories = pd.DataFrame()

for category in categories:
    one_hot = lambda s: 1 if s.startswith(category) else 0
    binary_categories[cols[category]] = tmp2['category_name'].apply(one_hot)

In [65]:
binary_categories.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,other,policy_change,first_party_collection_use,third_party_sharing_collection,do_not_track,user_choice_control,international_specific_audiences,data_security,data_retention,user_access_edit_deletion
Policy UID,segment_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
20,0,1,0,0,0,0,0,0,0,0,0
20,1,1,0,0,0,0,0,0,0,0,0
20,2,0,1,0,0,0,0,0,0,0,0
20,3,0,0,1,0,0,0,0,0,0,0
20,4,0,0,1,0,0,0,0,0,0,0


In [27]:
#Create engine for persisting
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database

In [28]:
engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
print(engine.url)

postgres://peterostendorp@localhost/beforeiagree_db


In [29]:
binary_categories.to_sql('binary_segment_categories',engine, if_exists='replace')

## Policy-level categorization

In [39]:
binary_categories_policy = binary_categories.reset_index(level=[0,1]).groupby('Policy UID').sum().drop('segment_id',axis=1)
binary_categories_policy = binary_categories_policy.applymap(lambda x: 1 if x > 0 else 0)
binary_categories_policy.head()

Unnamed: 0_level_0,other,policy_change,first_party_collection_use,third_party_sharing_collection,do_not_track,user_choice_control,international_specific_audiences,data_security,data_retention,user_access_edit_deletion
Policy UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
20,1,1,1,1,1,1,1,1,1,1
21,1,1,1,1,0,1,1,1,0,1
26,1,1,1,1,0,1,1,1,0,1
32,1,1,1,1,0,1,1,1,1,0
33,1,1,1,1,0,1,1,1,1,1


In [40]:
binary_categories_policy.sum()

other                               112
policy_change                        83
first_party_collection_use          112
third_party_sharing_collection      110
do_not_track                         22
user_choice_control                  98
international_specific_audiences     85
data_security                        94
data_retention                       25
user_access_edit_deletion            73
dtype: int64

Most policies contain info on:
- Other
- First and third party data collection
- user choice control
- data security

Less frequently mentioned:
- data retention
- user access edit deletion
- do not track

In [41]:
binary_categories_policy.to_sql('binary_policy_categories',engine)