In [1]:
import json
import os
import pandas as pd
import zipfile

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
input_folder = "privacy_scraper/blacklight_json"

In [4]:
rows = []

for filename in os.listdir(input_folder):
    if filename.endswith(".json"):
        file_path = os.path.join(input_folder, filename)
        
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            domain_name = filename.replace(".json", "")
            cards = data.get("groups", [])[0].get("cards", [])
            
            ddg_join_ads = 0
            third_party_cookies = 0
            canvas_fingerprinting = 0
            session_recording = 0
            key_logging = 0
            fb_pixel = 0
            google_analytics = 0

            for card in cards:
                card_type = card.get("cardType", "")
                big_number = card.get("bigNumber", 0)

                if card_type == "ddg_join_ads":
                    ddg_join_ads = big_number
                elif card_type == "cookies":
                    third_party_cookies = big_number
                elif card_type == "canvas_fingerprinters":
                    canvas_fingerprinting = 1 if card.get("testEventsFound", False) else 0
                elif card_type == "session_recorders":
                    session_recording = 1 if card.get("testEventsFound", False) else 0
                elif card_type == "key_logging":
                    key_logging = 1 if card.get("testEventsFound", False) else 0
                elif card_type == "fb_pixel_events":
                    fb_pixel = 1 if card.get("testEventsFound", False) else 0
                elif card_type == "ga":
                    google_analytics = 1 if card.get("testEventsFound", False) else 0
            
            rows.append({
                "filename": domain_name,
                "ddg_join_ads": ddg_join_ads,
                "third_party_cookies": third_party_cookies,
                "canvas_fingerprinting": canvas_fingerprinting,
                "session_recording": session_recording,
                "key_logging": key_logging,
                "fb_pixel": fb_pixel,
                "google_analytics": google_analytics,
            })
        
        except Exception as e:
            print(f"Error processing {filename}: {e}")

In [5]:
def process_json_files(input_folder: str) -> list:
    """
    Process JSON files to extract tracking metrics from DuckDuckGo privacy data.
    
    Args:
        input_folder (str): Path to folder containing JSON files
        
    Returns:
        list: List of dictionaries containing extracted metrics
    """
    rows = []
    
    for filename in os.listdir(input_folder):
        if not filename.endswith(".json"):
            continue
            
        file_path = os.path.join(input_folder, filename)
        
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            domain_name = filename.replace(".json", "")
            cards = data.get("groups", [])[0].get("cards", [])
            
            metrics = {
                "filename": domain_name,
                "ddg_join_ads": 0,
                "third_party_cookies": 0,
                "canvas_fingerprinting": 0,
                "session_recording": 0,
                "key_logging": 0,
                "fb_pixel": 0,
                "google_analytics": 0
            }
            
            for card in cards:
                card_type = card.get("cardType", "")
                if card_type == "ddg_join_ads":
                    metrics["ddg_join_ads"] = card.get("bigNumber", 0)
                elif card_type == "cookies":
                    metrics["third_party_cookies"] = card.get("bigNumber", 0)
                elif card_type in ["canvas_fingerprinters", "session_recorders", 
                                 "key_logging", "fb_pixel_events"]:
                    metric_key = {
                        "canvas_fingerprinters": "canvas_fingerprinting",
                        "session_recorders": "session_recording",
                        "key_logging": "key_logging",
                        "fb_pixel_events": "fb_pixel"
                    }[card_type]
                    metrics[metric_key] = 1 if card.get("testEventsFound", False) else 0
                elif card_type == "ga":
                    metrics["google_analytics"] = 1 if card.get("testEventsFound", False) else 0
            
            rows.append(metrics)
        
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            
    return rows

In [6]:
govdir_blacklight_df = pd.DataFrame(process_json_files("privacy_scraper/blacklight_json"))
govdir_blacklight_df.shape

(3877, 8)

In [7]:
govdir_blacklight_df.head()

Unnamed: 0,filename,ddg_join_ads,third_party_cookies,canvas_fingerprinting,session_recording,key_logging,fb_pixel,google_analytics
0,www.freienbach.ch_,0,0,0,0,1,0,0
1,drangsnes.is_,0,0,0,0,0,0,0
2,www.wald.zh.ch_,0,0,0,0,1,0,0
3,www.bournens.ch_,1,0,0,0,0,0,0
4,hagneck.ch_,3,0,0,0,0,0,0


In [8]:
govdir_blacklight_df.to_csv("../data/blacklight_domain.csv", index = False)

In [9]:
govdir_blacklight_df.describe()

Unnamed: 0,ddg_join_ads,third_party_cookies,canvas_fingerprinting,session_recording,key_logging,fb_pixel,google_analytics
count,3877.0,3877.0,3877.0,3877.0,3877.0,3877.0,3877.0
mean,0.388703,0.207893,0.004127,0.004643,0.084859,0.006706,0.000774
std,0.854362,0.989683,0.064117,0.067988,0.278708,0.081627,0.02781
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,8.0,15.0,1.0,1.0,1.0,1.0,1.0


In [10]:
govdir = pd.read_csv("../data/combined_tsv_data.tsv", sep = "\t")
govdir.head()

Unnamed: 0,Name,Govdirectory URL,Type,Website,source_file
0,Cherkasy Oblast,https://www.govdirectory.org/ukraine/Q161808/,oblast of Ukraine,https://www.oblradack.gov.ua/,Ukraine
1,Chernihiv Oblast,https://www.govdirectory.org/ukraine/Q167874/,oblast of Ukraine,https://cg.gov.ua/,Ukraine
2,Chernivtsi Oblast,https://www.govdirectory.org/ukraine/Q168856/,oblast of Ukraine,https://bukoda.gov.ua/,Ukraine
3,Dnipropetrovsk Oblast,https://www.govdirectory.org/ukraine/Q170672/,oblast of Ukraine,http://www.adm.dp.gov.ua/,Ukraine
4,Donetsk Oblast,https://www.govdirectory.org/ukraine/Q2012050/,oblast of Ukraine,https://dn.gov.ua/,Ukraine


In [11]:
# Keys
govdir_blacklight_df['filename'] = govdir_blacklight_df['filename'].str.rstrip('_')
govdir['Website'] = govdir['Website'].str.replace(r'^https?://', '', regex=True)
govdir['Website'] = govdir['Website'].str.rstrip('/')

In [12]:
fin_df = govdir.merge(govdir_blacklight_df, 
            left_on = "Website",
            right_on = "filename",
                     how = "left")

In [13]:
fin_df.shape

(12255, 13)

In [14]:
fin_df.head()

Unnamed: 0,Name,Govdirectory URL,Type,Website,source_file,filename,ddg_join_ads,third_party_cookies,canvas_fingerprinting,session_recording,key_logging,fb_pixel,google_analytics
0,Cherkasy Oblast,https://www.govdirectory.org/ukraine/Q161808/,oblast of Ukraine,www.oblradack.gov.ua,Ukraine,www.oblradack.gov.ua,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Chernihiv Oblast,https://www.govdirectory.org/ukraine/Q167874/,oblast of Ukraine,cg.gov.ua,Ukraine,cg.gov.ua,4.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Chernivtsi Oblast,https://www.govdirectory.org/ukraine/Q168856/,oblast of Ukraine,bukoda.gov.ua,Ukraine,bukoda.gov.ua,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Dnipropetrovsk Oblast,https://www.govdirectory.org/ukraine/Q170672/,oblast of Ukraine,www.adm.dp.gov.ua,Ukraine,,,,,,,,
4,Donetsk Oblast,https://www.govdirectory.org/ukraine/Q2012050/,oblast of Ukraine,dn.gov.ua,Ukraine,dn.gov.ua,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
fin_df.groupby('source_file').describe().reset_index().reset_index().dropna()

Unnamed: 0_level_0,index,source_file,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
0,0,Austria,389.0,0.154242,0.539056,0.0,0.0,0.0,0.0,4.0,389.0,0.169666,0.980219,0.0,0.0,0.0,0.0,14.0,389.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,389.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,389.0,0.005141,0.071611,0.0,0.0,0.0,0.0,1.0,389.0,0.005141,0.071611,0.0,0.0,0.0,0.0,1.0,389.0,0.002571,0.050702,0.0,0.0,0.0,0.0,1.0
1,1,Belgium,444.0,0.423423,0.931511,0.0,0.0,0.0,1.0,7.0,444.0,0.189189,1.085621,0.0,0.0,0.0,0.0,12.0,444.0,0.009009,0.094594,0.0,0.0,0.0,0.0,1.0,444.0,0.013514,0.11559,0.0,0.0,0.0,0.0,1.0,444.0,0.029279,0.168778,0.0,0.0,0.0,0.0,1.0,444.0,0.02027,0.141082,0.0,0.0,0.0,0.0,1.0,444.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,Bermuda,2.0,3.0,1.414214,2.0,2.5,3.0,3.5,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,Cameroon,14.0,0.214286,0.425815,0.0,0.0,0.0,0.0,1.0,14.0,0.285714,1.069045,0.0,0.0,0.0,0.0,4.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,East Timor,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,10,Ghana,51.0,0.686275,1.378263,0.0,0.0,0.0,1.0,6.0,51.0,0.333333,1.608312,0.0,0.0,0.0,0.0,11.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,11,Iceland,59.0,1.016949,1.370773,0.0,0.0,1.0,1.0,5.0,59.0,0.40678,1.13135,0.0,0.0,0.0,0.0,5.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59.0,0.016949,0.130189,0.0,0.0,0.0,0.0,1.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59.0,0.084746,0.280894,0.0,0.0,0.0,0.0,1.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,15,Latvia,18.0,0.333333,0.685994,0.0,0.0,0.0,0.0,2.0,18.0,0.333333,0.485071,0.0,0.0,0.0,1.0,1.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.111111,0.323381,0.0,0.0,0.0,0.0,1.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,17,Malaysia,29.0,0.896552,1.113066,0.0,0.0,1.0,1.0,4.0,29.0,0.724138,1.810615,0.0,0.0,0.0,0.0,8.0,29.0,0.034483,0.185695,0.0,0.0,0.0,0.0,1.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.0,0.034483,0.185695,0.0,0.0,0.0,0.0,1.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,20,Nepal,88.0,1.670455,1.467828,0.0,0.0,1.0,3.0,4.0,88.0,0.545455,1.923294,0.0,0.0,0.0,0.0,8.0,88.0,0.011364,0.1066,0.0,0.0,0.0,0.0,1.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
fin_df.groupby(['source_file', 'Type']).describe().reset_index().reset_index().dropna()

Unnamed: 0_level_0,index,source_file,Type,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,ddg_join_ads,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,third_party_cookies,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,canvas_fingerprinting,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,session_recording,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,key_logging,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,fb_pixel,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics,google_analytics
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
0,0,Austria,Bundesministerium,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,Austria,district of Austria,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,Austria,federal state of Austria,7.0,0.428571,1.133893,0.0,0.0,0.0,0.0,3.0,7.0,2.428571,3.154739,0.0,0.0,0.0,5.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,Austria,municipality of Austria,372.0,0.153226,0.530133,0.0,0.0,0.0,0.0,4.0,372.0,0.13172,0.863369,0.0,0.0,0.0,0.0,14.0,372.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,372.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,372.0,0.005376,0.073225,0.0,0.0,0.0,0.0,1.0,372.0,0.005376,0.073225,0.0,0.0,0.0,0.0,1.0,372.0,0.002688,0.051848,0.0,0.0,0.0,0.0,1.0
4,4,Belgium,Belgian delegations,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,Belgium,Federal Public Service,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.75,0.5,0.0,0.75,1.0,1.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,Belgium,Federal Scientific Institute,6.0,1.833333,2.041241,0.0,0.0,1.5,3.75,4.0,6.0,0.666667,1.632993,0.0,0.0,0.0,0.0,4.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.166667,0.408248,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.333333,0.516398,0.0,0.0,0.0,0.75,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,Belgium,Public Institution of Social Security,5.0,0.2,0.447214,0.0,0.0,0.0,0.0,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,Belgium,Public Interest Organizations,7.0,1.571429,1.812654,0.0,0.5,1.0,2.0,5.0,7.0,2.285714,3.9036,0.0,0.0,0.0,4.0,8.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.142857,0.377964,0.0,0.0,0.0,0.0,1.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,10,Belgium,Public Planning Service,2.0,0.5,0.707107,0.0,0.25,0.5,0.75,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Let's do the same for US gov. list

In [22]:
usgov_blacklight_df = pd.DataFrame(process_json_files("privacy_scraper/us_blacklight_json"))
usgov_blacklight_df.shape

(1512, 8)

In [23]:
usgov_blacklight_df.head()

Unnamed: 0,filename,ddg_join_ads,third_party_cookies,canvas_fingerprinting,session_recording,key_logging,fb_pixel,google_analytics
0,bucklinmo.gov,0,5,0,0,0,0,0
1,bcutah.gov,1,0,0,0,0,0,0
2,abseconnj.gov,1,1,0,0,0,0,0
3,eaglevilletn.gov,0,0,0,0,0,0,0
4,monterey.gov,5,5,0,0,0,0,0


In [24]:
usgov_blacklight_df.describe()

Unnamed: 0,ddg_join_ads,third_party_cookies,canvas_fingerprinting,session_recording,key_logging,fb_pixel,google_analytics
count,1512.0,1512.0,1512.0,1512.0,1512.0,1512.0,1512.0
mean,1.57209,0.896825,0.00463,0.015212,0.011905,0.019841,0.006614
std,2.154235,3.308324,0.067906,0.122434,0.108493,0.139501,0.081082
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2.0,0.0,0.0,0.0,0.0,0.0,0.0
max,33.0,63.0,1.0,1.0,1.0,1.0,1.0


### Let's do it for Indian Gov. List

In [None]:
ingov_blacklight_df = pd.DataFrame(process_json_files("privacy_scraper/in_blacklight_json"))
ingov_blacklight_df.shape