In [7]:
import numpy as np
import pandas as pd
import altair as alt
from tqdm import tqdm
tqdm.pandas()

from clickhouse_driver import Client as Clickhouse
from uuid import uuid4
from pathlib import Path

pd.options.display.max_columns = 500
pd.options.display.max_rows = 500
alt.data_transformers.disable_max_rows()

def click_query(q, params=None):
    click = Clickhouse("localhost")
    query_id = f"oonidata-{uuid4()}"
    print(f"Starting query with id: {query_id} :)")
    return click.query_dataframe(q, params=params, query_id=query_id)

In [18]:
df_sample_obs_web = click_query("""
SELECT
countDistinct(measurement_uid) as count, probe_as_cc
FROM obs_web
WHERE measurement_start_time > '2025-10-01'
GROUP BY probe_as_cc
ORDER BY count DESC
LIMIT 200
""")
df_sample_obs_web

Starting query with id: oonidata-c5f4a79d-0261-4c9a-a547-e192556c22b4 :)


Unnamed: 0,count,probe_as_cc
0,9581527,US
1,3654144,RU
2,3233444,VE
3,2694621,BR
4,2534960,DE
5,2014250,GB
6,2004762,FR
7,1390749,CN
8,1223873,CA
9,1193789,BE


In [None]:
df_sample_obs_web = click_query("""
SELECT hostname,
                                tls_failure,
                                probe_cc,
                                countDistinct(measurement_uid) as count,
                                groupUniqArray(tls_end_entity_certificate_fingerprint) AS fingerprints
FROM obs_web
WHERE measurement_start_time >= '2023-01-01' AND measurement_start_time < '2024-01-01'
                                AND tls_server_name IS NOT NULL
                                -- AND tls_failure = 'ssl_unknown_authority'
                                AND probe_cc IN ('KZ', 'UZ', 'KG', 'TJ', 'TM')
                                AND hostname IN ('360tv.ru', 'astrakhan.sm.news', 'compromat.ru', 'cont.ws', 'knews.kg', 'kz.tsargrad.tv', 'regnum.ru', 'rutracker.org', 'sproot.it', 'stanradar.com', 'ukraina.ru', 'www.for.kg', 'www.pinterest.com', 'xakep.ru' )
GROUP BY hostname, tls_failure, probe_cc
ORDER BY hostname, count DESC
LIMIT 100
""")
df_sample_obs_web

Starting query with id: oonidata-effb3681-e8e6-4cd1-9e29-a17d37e1ce25 :)


Unnamed: 0,hostname,tls_failure,probe_cc,count,fingerprints
0,360tv.ru,ssl_unknown_authority,KZ,98,[08897f4b9ff1ef9d419f927b2d3668820bd92463c4c67...
1,360tv.ru,,KZ,32,[e0aae903bfd11a53bf047684f67b0a30380cd80e7e1cb...
2,360tv.ru,generic_timeout_error,KZ,1,[]
3,astrakhan.sm.news,ssl_unknown_authority,KZ,107,[08897f4b9ff1ef9d419f927b2d3668820bd92463c4c67...
4,astrakhan.sm.news,,KZ,27,[0ae8389d385363fa8258cd74b9ce9b634472a63c94007...
5,astrakhan.sm.news,generic_timeout_error,KZ,1,[]
6,compromat.ru,eof_error,KZ,14,[]
7,compromat.ru,ssl_unknown_authority,KZ,13,[1457a5d7b120bd55f2687f57cc02dea7686eeead186f6...
8,compromat.ru,ssl_invalid_certificate,KZ,11,[1457a5d7b120bd55f2687f57cc02dea7686eeead186f6...
9,compromat.ru,generic_timeout_error,KZ,2,[]


In [45]:
print(df_sample_obs_web.to_csv())

,hostname,tls_failure,probe_cc,count,fingerprints
0,360tv.ru,ssl_unknown_authority,KZ,98,"['08897f4b9ff1ef9d419f927b2d3668820bd92463c4c6794776c3dbbd5e3c05f4', '77f7aab9340f5c11b7f8905833bef1977e67c2036ce1aa226fb581c22611921b']"
1,360tv.ru,,KZ,32,['e0aae903bfd11a53bf047684f67b0a30380cd80e7e1cb775c0511b60936553fe']
2,360tv.ru,generic_timeout_error,KZ,1,[]
3,astrakhan.sm.news,ssl_unknown_authority,KZ,107,"['08897f4b9ff1ef9d419f927b2d3668820bd92463c4c6794776c3dbbd5e3c05f4', '77f7aab9340f5c11b7f8905833bef1977e67c2036ce1aa226fb581c22611921b']"
4,astrakhan.sm.news,,KZ,27,['0ae8389d385363fa8258cd74b9ce9b634472a63c940077695eee40aa427a38f7']
5,astrakhan.sm.news,generic_timeout_error,KZ,1,[]
6,compromat.ru,eof_error,KZ,14,[]
7,compromat.ru,ssl_unknown_authority,KZ,13,"['1457a5d7b120bd55f2687f57cc02dea7686eeead186f68a5d8b95864f093c899', '08897f4b9ff1ef9d419f927b2d3668820bd92463c4c6794776c3dbbd5e3c05f4', '77f7aab9340f5c11b7f8905833bef1977e67c2036ce1aa226fb581c22611921b']"
8,compromat.ru,ssl_invalid

In [22]:
df_problematic_countries = click_query("""
SELECT
    probe_cc,
    countDistinct(measurement_uid) as count
FROM obs_web
WHERE
        measurement_start_time >= '2025-10-01'
    AND tls_failure = 'ssl_unknown_authority'
GROUP BY probe_cc
ORDER BY count DESC
LIMIT 10
""")
df_problematic_countries

Starting query with id: oonidata-500f9816-68af-419a-92bf-935f4a009fb0 :)


Unnamed: 0,probe_cc,count
0,US,44436
1,BR,12769
2,RU,11658
3,DE,11506
4,AU,10809
5,FR,10634
6,VE,7116
7,NZ,6525
8,CA,6477
9,GB,6149


In [22]:
df_grouped = click_query("""
SELECT
    hostname,
    day,
    fingerprint,
                         chain,
    groupUniqArray(probe_as_cc) as ccs,
    -- groupUniqArray(tls_end_entity_certificate_fingerprint) AS fingerprints,
    -- countDistinct(tls_end_entity_certificate_fingerprint) as num_fingerprints,
    countDistinct(measurement_uid) as count
FROM obs_web
WHERE
                         measurement_start_time >= '2024-05-01' AND measurement_start_time < '2024-06-01'
                    AND tls_server_name IS NOT NULL
                    AND hostname IS NOT NULL
                         AND hostname = 'knews.kg'
                    -- AND probe_as_cc = 'IR'
GROUP BY toDate(measurement_start_time) AS day, hostname, tls_end_entity_certificate_fingerprint AS fingerprint, tls_certificate_chain_fingerprints as chain
-- HAVING uniq(probe_as_cc) > 1 AND uniq(tls_end_entity_certificate_fingerprint) > 1
ORDER BY day DESC, fingerprint
LIMIT 100
""")
df_grouped

Starting query with id: oonidata-e86328ba-788b-47f4-9ab0-5437fc6b5afe :)


Unnamed: 0,hostname,day,fingerprint,chain,ccs,count
0,knews.kg,2024-05-31,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e0...,[],[RU],1
1,knews.kg,2024-05-31,4baccb435915f9305f3970f82f45e5d0570ef73155009b...,[64e286b76063602a372efd60cde8db2656a49ee15e842...,"[KZ, CN, GB]",3
2,knews.kg,2024-05-31,,[],"[GB, RU]",19
3,knews.kg,2024-05-30,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e0...,[],[GB],1
4,knews.kg,2024-05-30,4baccb435915f9305f3970f82f45e5d0570ef73155009b...,[64e286b76063602a372efd60cde8db2656a49ee15e842...,"[KZ, CN, GB, RU]",14
5,knews.kg,2024-05-30,,[],"[SI, RU, GB]",21
6,knews.kg,2024-05-29,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e0...,[],"[GB, RU]",2
7,knews.kg,2024-05-29,4baccb435915f9305f3970f82f45e5d0570ef73155009b...,[64e286b76063602a372efd60cde8db2656a49ee15e842...,"[KZ, CN, GB, RU]",10
8,knews.kg,2024-05-29,,[],"[GB, RU]",17
9,knews.kg,2024-05-28,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e0...,[],[RU],4


In [17]:
print(df_grouped.to_csv())

,hostname,day,fingerprint,ccs,count
0,knews.kg,2024-05-31,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e06970ba0598a1ee59ce,['RU'],1
1,knews.kg,2024-05-31,4baccb435915f9305f3970f82f45e5d0570ef73155009b1f0452fc957ac4fa3d,"['KZ', 'CN', 'GB']",3
2,knews.kg,2024-05-31,,"['GB', 'RU']",19
3,knews.kg,2024-05-30,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e06970ba0598a1ee59ce,['GB'],1
4,knews.kg,2024-05-30,4baccb435915f9305f3970f82f45e5d0570ef73155009b1f0452fc957ac4fa3d,"['KZ', 'CN', 'GB', 'RU']",14
5,knews.kg,2024-05-30,,"['SI', 'RU', 'GB']",21
6,knews.kg,2024-05-29,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e06970ba0598a1ee59ce,"['GB', 'RU']",2
7,knews.kg,2024-05-29,4baccb435915f9305f3970f82f45e5d0570ef73155009b1f0452fc957ac4fa3d,"['KZ', 'CN', 'GB', 'RU']",10
8,knews.kg,2024-05-29,,"['GB', 'RU']",17
9,knews.kg,2024-05-28,0e11b482391693319ca47ea75f15dcebc0c3e7bb1738e06970ba0598a1ee59ce,['RU'],4
10,knews.kg,2024-05-28,4baccb435915f9305f3970f82f45e5d0570ef73155009b1f0452fc957ac4fa3d,"['KZ', 'CN

In [27]:
df_group_by_fingerprint = click_query("""
SELECT
                                      hostname,
                                      groupUniqArray(tls_end_entity_certificate_fingerprint) as fingerprints,
                                -- tls_certificate_chain_fingerprints,
                                groupUniqArray(probe_as_cc) as ccs,
                                countDistinct(measurement_uid) as count
FROM obs_web
WHERE measurement_start_time >= '2023-06-01' AND measurement_start_time < '2024-06-01'
                                AND tls_server_name IS NOT NULL
                                AND hostname = 'knews.kg'
                                AND input IS NOT NULL
                                -- AND tls_failure IS NOT NULL
                                AND tls_end_entity_certificate_fingerprint IS NOT NULL
                                      AND tls_certificate_chain_length > 1
GROUP BY hostname
-- HAVING has(ccs, 'IR')
ORDER BY hostname
LIMIT 20
""")
df_group_by_fingerprint

Starting query with id: oonidata-b8edff43-25fe-4229-8fb7-3e4753870cb3 :)


Unnamed: 0,hostname,fingerprints,ccs,count
0,knews.kg,[f8b9aa65a7d012de1290e8ea644aa8f597c9d27ba459d...,"[CA, RU, PA, CN, GB, KZ, RO, PS, KG, US]",593


In [28]:
print(df_group_by_fingerprint.to_csv())

,hostname,fingerprints,ccs,count
0,knews.kg,"['f8b9aa65a7d012de1290e8ea644aa8f597c9d27ba459db5208a7ae1eb2f05b45', 'addb45adae54f48b8bae9debe67e165c19a16c9a0bb7f41f3dd2767a272ac20e', 'ce5eb5e2aa67579d10fac916ca14cd762f12fd0f516a7092d0d50536ec17117a', '1ff5ec38d211e97615de1ae675b475abaa819cb88c1967caaa0d13af9fecc4ba', '68cd34df313125fad08894658f79022e61b57de80d463a0795a7f0cb5f056a5a', '4baccb435915f9305f3970f82f45e5d0570ef73155009b1f0452fc957ac4fa3d', '982620169c62925a8e09b7d4fde691f74cbe726f62b3f984da7f1c5cceaf3b5f']","['CA', 'RU', 'PA', 'CN', 'GB', 'KZ', 'RO', 'PS', 'KG', 'US']",593

