# 05. Criterion C: Only 'necessary' cookies are set before consent
Using Selenium, the cookies that are being set before any interaction with the web page are retrieved. Using a reference list of cookie names and their purpose, it is checked whether these cookies are 'necessary' or not.

In [None]:
import time
import csv
import sqlite3
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

In [None]:
# Use a CSV file to get an array of necessary cookies
def get_necessary_cookies(path):
    necessary_cookies = []
    
    with open(path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if row['Purpose'].strip().lower() == 'necessary':
                necessary_cookies.append(row['Cookie'])
    
    return necessary_cookies

In [None]:
def get_initial_cookies(driver, url):
    try:
        driver.get(url)
        time.sleep(5)  # Wait for cookies to be set
        cookies = driver.get_cookies()
        return cookies
    except WebDriverException as e:
        print(f"An error occurred while fetching the page: {e}")
        return None

In [None]:
def is_necessary_cookie(cookie_name):
    for necessary_cookie in get_necessary_cookies("data/known_cookies.csv"):
        if necessary_cookie.lower() in cookie_name.lower():
            return True
    return False

In [None]:
def identify_non_necessary_cookies(cookies):
    non_necessary_cookies = []
    for cookie in cookies:
        if not is_necessary_cookie(cookie['name']):
            non_necessary_cookies.append(cookie['name'])
    return non_necessary_cookies

In [None]:
# Set up Chrome options
options = Options()
options.add_argument("--headless")  # Run in headless mode
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Set up the Chrome driver
service = ChromeService(executable_path='/usr/bin/chromedriver')
driver = webdriver.Chrome(service=service, options=options)

In [None]:
# Connect to the database
conn = sqlite3.connect("data/websites.db")
cursor = conn.cursor()

In [None]:
# Fetch and print all cookie statement URLs from the table and save to an array
urls = []

cursor.execute("SELECT url FROM website_data")
rows = cursor.fetchall()

for row in rows:
    urls.append(row[0])

In [None]:
# Loop through all URLs and save the cookie or privacy statement URL to the database
for url in urls:
    # Step 1: Get initial cookies
    initial_cookies = get_initial_cookies(driver, url)
    print(initial_cookies)

    if(initial_cookies == None):
        # Step 2: Identify non-necessary cookies
        necessary_cookies_only = False
    else:
        non_necessary_cookies = identify_non_necessary_cookies(initial_cookies)
        print(non_necessary_cookies)
        necessary_cookies_only = len(non_necessary_cookies) == 0

    # Save to database
    cursor.execute("""
    UPDATE website_data
    SET necessary_cookies_only = ?
    WHERE url = ?
    """, (necessary_cookies_only, url))
    conn.commit()