# A Collection of Results from OpenWPM data

To use, enter in the `datadir_path` bellow, then run all cells

In [2]:
from pathlib import Path
datadir_path : Path = Path('/home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000')

In [3]:
import sys
import os
from sqlalchemy.engine import Engine
from sqlalchemy import create_engine, URL 
import plyvel
from typing import Any
%reload_ext sql

parent_path : Path = Path(sys.path[0]).parent.resolve()
if str(parent_path) not in sys.path:
    sys.path.append(str(parent_path))


database_url : URL = URL.create(drivername = "sqlite", database = str(datadir_path.joinpath("crawl-data.sqlite")) )
os.environ["DATABASE_URL"] = str(database_url)
engine : Engine = create_engine(database_url)
# if 'db' not in locals():
#     db : Any = plyvel.DB( str(datadir_path.joinpath("leveldb")) ) #type: ignore

### Prevalence of `CanvasRenderingContext2D` versus `WebGLRenderingContext`

In [None]:
%%sql
WITH a AS (
    SELECT COUNT(DISTINCT visit_id) AS Total
    FROM http_responses WHERE 
    response_status LIKE "2%"
), b AS (
    SELECT COUNT(DISTINCT visit_id) AS Using_Canvas
    FROM javascript
    WHERE symbol LIKE "CanvasRenderingContext2D%"
), c AS (
    SELECT COUNT(DISTINCT visit_id) AS Using_WEBGL
    FROM javascript
    WHERE symbol LIKE "WebGLRenderingContext%"
)
SELECT a.Total, b.Using_Canvas, c.Using_WEBGL
FROM a, b, c

### Unambiguous proof of Canvas fingerprinting among the group classified as Canvas fingerprinting dynamically but not lexically
(note this is durning the first analysis, where no string unescaping is being performed)

TODO, prove how many of them are fingerprinting

In [18]:
%%sql 
WITH domain AS(
    SELECT visit_id, script_url 
    FROM analysis_results 
    WHERE Canvas_Basic_Dynamic = True AND Canvas_Basic_Static = False
),
total AS(
    SELECT COUNT(*) AS total
    FROM domain
),
b AS(
    SELECT COUNT(*) AS using_fillText_questionably
    FROM domain
    WHERE (visit_id, script_url) IN ( SELECT visit_id, script_url 
                                      FROM javascript AS js  
                                      WHERE js.symbol = "CanvasRenderingContext2D.fillText" 
                                      AND ( js.arguments LIKE '["Cwm fjordbank glyphs vext quiz%' 
                                      OR    js.arguments LIKE '["Hel$&?6\%){mZ+#@%'  ESCAPE '\' )
                                    )
)
SELECT *
FROM total, b

 * sqlite:////home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000/crawl-data.sqlite
Done.


total,using_fillText_questionably
699,107


### Signs of Canvas fingerprinting among the group classified as Canvas font fingerprinting dynamically but not lexically

In [13]:
%%sql 
WITH domain AS(
    SELECT visit_id, script_url 
    FROM analysis_results 
    WHERE Canvas_Font_1M_Dynamic = True AND Canvas_Font_1M_Static = False
),
total AS(
    SELECT COUNT(*) AS total
    FROM domain
),
b AS(
    SELECT COUNT(*) AS using_fillText_questionably
    FROM domain
    WHERE (visit_id, script_url) IN ( SELECT visit_id, script_url 
                                      FROM javascript AS js  
                                      WHERE js.symbol = "CanvasRenderingContext2D.fillText" 
                                      AND ( js.arguments LIKE '["Cwm fjordbank glyphs vext quiz%' 
                                      OR    js.arguments LIKE '["Hel$&?6\%){mZ+#@%'  ESCAPE '\' )
                                    )
)
SELECT *
FROM total, b

 * sqlite:////home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000/crawl-data.sqlite
Done.


total,using_fillText_questionably
66,65


### Use of the Canvas API among the ground classified as Canvas font fingerprinting lexically but not dynamically

In [14]:
%%sql 
WITH domain AS(
SELECT visit_id, script_url 
FROM analysis_results 
WHERE Canvas_Font_1M_Dynamic = False AND Canvas_Font_1M_Static = True 
),
total AS(
    SELECT COUNT(*) AS total
    FROM domain
),
b AS(
    SELECT COUNT(*) AS using_Canvas_API
    FROM domain
    WHERE (visit_id, script_url) IN( SELECT visit_id, script_url 
                                    FROM javascript
                                    WHERE symbol  LIKE "CanvasRenderingContext2D%" 
                                )
),
c AS(
    SELECT COUNT(*) AS using_MeasureText_and_font
    FROM domain 
    WHERE (visit_id, script_url) IN (SELECT visit_id, script_url FROM javascript WHERE symbol = "CanvasRenderingContext2D.font")
    AND  (visit_id, script_url) IN (SELECT visit_id, script_url FROM javascript WHERE symbol = "CanvasRenderingContext2D.measureText")
),
d AS(
    SELECT COUNT(*) AS AD_Network_using_MeasureText_and_font
    FROM domain
    WHERE (visit_id, script_url) IN (SELECT visit_id, script_url FROM javascript WHERE symbol = "CanvasRenderingContext2D.font")
    AND  (visit_id, script_url) IN (SELECT visit_id, script_url FROM javascript WHERE symbol = "CanvasRenderingContext2D.measureText")
    AND script_url LIKE "https://cdn.taboola.com/libtrc/cta-component%"
)
SELECT *
FROM total, b, c, d

 * sqlite:////home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000/crawl-data.sqlite
Done.


total,using_Canvas_API,using_MeasureText_and_font,AD_Network_using_MeasureText_and_font
1243,509,121,60


### Signs of Canvas fingerprinting among the group classified as WebGL parameter fingerprinting dynamically but not lexically

In [15]:
%%sql
WITH domain AS(
    SELECT visit_id, script_url 
    FROM analysis_results 
    WHERE WebGL_Dynamic = True AND WebGL_Static = False 
),
total AS(
    SELECT COUNT(*) AS total
    FROM domain
),
b AS(
    SELECT COUNT(*) AS using_fillText_questionably
    FROM domain
    WHERE (visit_id, script_url) IN(SELECT visit_id, script_url 
                                    FROM javascript
                                    WHERE symbol = "CanvasRenderingContext2D.fillText"
                                    AND arguments LIKE '["Cwm fjordbank glyphs vext quiz%'
                                    )
)
SELECT *
FROM total, b

 * sqlite:////home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000/crawl-data.sqlite
Done.


total,using_fillText_questionably
40,37


### Signs of Canvas fingerprinting among the group classified as WebGL parameter fingerprinting lexically but not dynamically

In [16]:
%%sql
WITH domain AS(
    SELECT visit_id, script_url 
    FROM analysis_results 
    WHERE WebGL_Dynamic = False AND WebGL_Static = True 
),
total AS(
    SELECT COUNT(*) AS total
    FROM domain
),
b AS(
    SELECT COUNT(*) AS using_Canvas_API
    FROM domain
    WHERE (visit_id, script_url) IN( SELECT visit_id, script_url
                                     FROM javascript
                                     WHERE symbol  LIKE "CanvasRenderingContext2D%" 
                                    )
),
c AS(
    SELECT COUNT(*) AS using_WebGL_API
    FROM domain
    WHERE (visit_id, script_url) IN( SELECT visit_id, script_url
                                     FROM javascript
                                     WHERE symbol  LIKE "WebGLRenderingContext%" 
                                    )
),
d AS(
    SELECT COUNT(*) AS using_fillText_questionably
    FROM domain
    WHERE (visit_id, script_url) IN( SELECT visit_id, script_url
                                     FROM javascript
                                     WHERE symbol = "CanvasRenderingContext2D.fillText" 
                                     AND arguments LIKE '["Cwm fjordbank glyphs vext quiz%'
                                    )
),
e AS (
    SELECT COUNT(*) AS classified_As_Canvas_Fingerprinting_Dynamically
    FROM domain
    WHERE (visit_id, script_url) IN (SELECT visit_id, script_url FROM analysis_results WHERE Canvas_1M_Dynamic = True)
)
SELECT *
FROM total, b, c, d, e

 * sqlite:////home/ndanner_plp/OpenWPM/crawl-data/datadir-0-10000/crawl-data.sqlite
Done.


total,using_Canvas_API,using_WebGL_API,using_fillText_questionably,classified_As_Canvas_Fingerprinting_Dynamically
2277,350,2,177,294
