In [None]:
import json
from pathlib import Path
from typing import List, Dict, Set
from collections import defaultdict

def load_discoveries(file_path: str) -> dict:
    """Load a privacy map JSON file."""
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f).get("discoveries", [])
# extract the controls from the one file
def extract_controls(discovery: dict) -> dict:
    """Extract all controls from a discovery (page)."""
    controls = discovery.get("controls", [])    
    disc_controls = {
        "label": [],
        "type": [],
        "selector": []
    }
    for control in controls:
        print(control)
        if "link" in control["selector"].lower(): continue
        for key in control:
            disc_controls[key].append(control[key])
    return disc_controls# {key: [d[key] for d in controls] for key in controls[0]}
def process_labels(labels: List[str]) -> List[str]:
    processed_labels = []
    for label in labels:
        if label == label.lower(): break
        processed_labels.append(label)
    return processed_labels
def extract_file_controls(discoveries: str) -> dict:
    discoveries = load_discoveries(discoveries)
    file_controls = {
        "label": [],
        "type": [],
        "selector": []
    }
    for discovery in discoveries:
        controls = extract_controls(discovery)
        file_controls["label"] += controls["label"]
        file_controls["type"] += controls["type"]
        file_controls["selector"] += controls["selector"]
    return file_controls
def aggregate_pages(privacy_maps: List[Path]) -> dict:
    # Extract all controls from all privacy maps
    map_controls = {pmap: extract_file_controls(pmap) for pmap in privacy_maps}
    all_controls = {
        "label": [],
        "type": [],
        "selector": []
    }
    for pmap in map_controls:
        all_controls["label"] += map_controls[pmap]["label"]
        all_controls["type"] += map_controls[pmap]["type"]
        all_controls["selector"] += map_controls[pmap]["selector"]

    return all_controls

# Used to compare a user's preferences to one particular page's controls
def find_intersecting_controls(controls1: str, target: str) -> set[str]:
    # Find intersecting controls between two sets of controls.
    controls1 = extract_file_controls(controls1)
    target = extract_file_controls(target)
    intersecting_controls = {
        "label": [],
        "type": []
    }
    labels = set(controls1["label"]) & set(target["label"])
    for i in range(len(controls1["label"])):
        if controls1["label"][i] in labels:
            intersecting_controls["label"].append(controls1["label"][i])
            intersecting_controls["type"].append(controls1["type"][i])
    return intersecting_controls

def summarize_output(controls1: dict, target: dict, intersecting_controls: dict) -> dict:
    # Summarize the output of the comparison
    summary = {
        "common controls": len(intersecting_controls["label"]),
        "unique to user": len(controls1["label"]) - len(intersecting_controls["label"]),
        "unique to target": len(target["label"]) - len(intersecting_controls["label"])
    }
    return summary

In [None]:
maps_dir = Path("C:/Users/aryav/gemini-team/outputs/")
privacy_maps = [file for file in maps_dir.glob("privacy_map_*.json")] 
# all_controls = aggregate_pages(maps_dir)
controls1, controls2 = privacy_maps[0], privacy_maps[1]

overlap = find_intersecting_controls(controls1, controls2)
print(json.dumps(overlap, indent=4))

In [None]:
'''
This program needs the appropriate functions to:
- Ingest a set of files and extract relevant information (label, type, selector).
- Compare extracted features from two files and output the relevant matches.
- Summarize the output of the comparison.
'''

83

In [36]:
test = extract_file_controls(controls1)

{'label': 'Detailed crash dumps (Windows)', 'type': 'checkbox', 'selector': 'zoom-toggle__original'}
{'label': 'Accept All Cookies', 'type': 'button', 'selector': 'accept-recommended-btn-handler'}
{'label': 'Targeting Cookies', 'type': 'button', 'selector': 'ot-switch'}
{'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}
{'label': 'Functional Cookies', 'type': 'button', 'selector': 'ot-switch'}
{'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}
{'label': 'Performance Cookies', 'type': 'button', 'selector': 'ot-switch'}
{'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}
{'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-hand

In [22]:
len(extract_file_controls(controls1)["label"]) == 139

[{'label': 'Detailed crash dumps (Windows)', 'type': 'checkbox', 'selector': 'zoom-toggle__original'}, {'label': 'Accept All Cookies', 'type': 'button', 'selector': 'accept-recommended-btn-handler'}, {'label': 'Targeting Cookies', 'type': 'button', 'selector': 'ot-switch'}, {'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}, {'label': 'Functional Cookies', 'type': 'button', 'selector': 'ot-switch'}, {'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}, {'label': 'Performance Cookies', 'type': 'button', 'selector': 'ot-switch'}, {'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-list-handler'}, {'label': 'Cookies Details Cookie Details button opens Cookie List menu', 'type': 'button', 'selector': 'ot-link-btn category-host-

True

In [7]:
controls1.keys()

dict_keys(['label', 'type', 'selector'])

In [None]:
for key in intersecting_controls.keys():
    