In [1]:
import numpy as np
import pandas as pd
import altair as alt

from tqdm import tqdm

tqdm.pandas()
pd.options.display.max_columns = None
pd.options.display.max_rows = 200
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
def get_explorer_url(e):
    return f'https://explorer.ooni.org/m/{e["measurement_uid"]}'

def print_explorer_url(e):
    print(get_explorer_url(e))

In [3]:
OONI_COLOR_SCHEME = {"gray0": "#f8f9fa", "gray1": "#f1f3f5", "gray2": "#e9ecef", "gray3": "#dee2e6", "gray4": "#ced4da", "gray5": "#adb5bd", "gray6": "#868e96", "gray7": "#495057", "gray8": "#343a40", "gray9": "#212529", "blue0": "#e7f5ff", "blue1": "#c9e8ff", "blue2": "#8dd5f8", "blue3": "#5db8fe", "blue4": "#37a6ed", "blue5": "#0588cb", "blue6": "#0f77b8", "blue7": "#056aa6", "blue8": "#005f9c", "blue9": "#005a99", "indigo0": "#edf2ff", "indigo1": "#dbe4ff", "indigo2": "#bac8ff", "indigo3": "#91a7ff", "indigo4": "#748ffc", "indigo5": "#5c7cfa", "indigo6": "#4c6ef5", "indigo7": "#4263eb", "indigo8": "#3b5bdb", "indigo9": "#364fc7", "violet0": "#f3f0ff", "violet1": "#e5dbff", "violet2": "#d0bfff", "violet3": "#b197fc", "violet4": "#9775fa", "violet5": "#845ef7", "violet6": "#7950f2", "violet7": "#7048e8", "violet8": "#6741d9", "violet9": "#5f3dc4", "fuchsia0": "#f8f0fc", "fuchsia1": "#f3d9fa", "fuchsia2": "#eebefa", "fuchsia3": "#e599f7", "fuchsia4": "#da77f2", "fuchsia5": "#cc5de8", "fuchsia6": "#be4bdb", "fuchsia7": "#ae3ec9", "fuchsia8": "#9c36b5", "fuchsia9": "#862e9c", "pink0": "#fff0f6", "pink1": "#ffdeeb", "pink2": "#fcc2d7", "pink3": "#faa2c1", "pink4": "#f783ac", "pink5": "#f06595", "pink6": "#e64980", "pink7": "#d6336c", "pink8": "#c2255c", "pink9": "#a61e4d", "red0": "#fff5f5", "red1": "#ffe3e3", "red2": "#ffc9c9", "red3": "#ffa8a8", "red4": "#ff8787", "red5": "#ff6b6b", "red6": "#fa5252", "red7": "#f03e3e", "red8": "#e03131", "red9": "#c92a2a", "orange0": "#fff4e6", "orange1": "#ffe8cc", "orange2": "#ffd8a8", "orange3": "#ffc078", "orange4": "#ffa94d", "orange5": "#ff922b", "orange6": "#fd7e14", "orange7": "#f76707", "orange8": "#e8590c", "orange9": "#d9480f", "yellow0": "#fff9db", "yellow1": "#fff3bf", "yellow2": "#ffec99", "yellow3": "#ffe066", "yellow4": "#ffd43b", "yellow5": "#fcc419", "yellow6": "#fab005", "yellow7": "#f59f00", "yellow8": "#f08c00", "yellow9": "#e67700", "lime0": "#f4fce3", "lime1": "#e9fac8", "lime2": "#d8f5a2", "lime3": "#c0eb75", "lime4": "#a9e34b", "lime5": "#94d82d", "lime6": "#82c91e", "lime7": "#74b816", "lime8": "#66a80f", "lime9": "#5c940d", "green0": "#ebfbee", "green1": "#d3f9d8", "green2": "#b2f2bb", "green3": "#8ce99a", "green4": "#69db7c", "green5": "#51cf66", "green6": "#40c057", "green7": "#37b24d", "green8": "#2f9e44", "green9": "#2b8a3e", "teal0": "#e6fcf5", "teal1": "#c3fae8", "teal2": "#96f2d7", "teal3": "#63e6be", "teal4": "#38d9a9", "teal5": "#20c997", "teal6": "#12b886", "teal7": "#0ca678", "teal8": "#099268", "teal9": "#087f5b", "cyan0": "#e3fafc", "cyan1": "#c5f6fa", "cyan2": "#99e9f2", "cyan3": "#66d9e8", "cyan4": "#3bc9db", "cyan5": "#22b8cf", "cyan6": "#15aabf", "cyan7": "#1098ad", "cyan8": "#0c8599", "cyan9": "#0b7285"}

In [4]:
OONI_NOK_COLORS = [
    OONI_COLOR_SCHEME['red8'],
    OONI_COLOR_SCHEME['yellow6'],
    OONI_COLOR_SCHEME['gray5'],
    OONI_COLOR_SCHEME['blue6'],
    OONI_COLOR_SCHEME['orange6'],
]
OONI_OK_COLOR = OONI_COLOR_SCHEME['green6']

In [5]:
def make_ooni_color_scale(all_failures):
    """
    given a list of failures, generates an OONI compatible color palette for it.
    """
    color_domain = ["ok"]
    color_range = [OONI_OK_COLOR]
    try:
        all_failures.remove('ok')
    except ValueError:
        pass
    for idx, failure_str in enumerate(all_failures):
        color_domain.append(failure_str)
        try:
            color_range.append(OONI_NOK_COLORS[idx])
        except IndexError:
            raise Exception(f"too many failure strings for current color scheme: {all_failures}")
    color_scale = alt.Scale(domain=color_domain, range=color_range)
    return color_scale

## Load the source datasets
* `2023-KE-web_connectivity-telegram.org.csv` contains all web_connectivity measurements from 2023-10-15 to 2023-12-15 coming from Kenya for `hostname = telegram.org`
* `2023-KE-telegram.csv` contains all measurements for the `telegram` test from 2023-10-15 to 2023-12-15 coming from Kenya
* `2024-KE-web_connectivity-telegram.org.csv` contains all web_connectivity measurements from 2024-10-15 to 2024-12-15 coming from Kenya for `hostname = telegram.org`
* `2024-KE-telegram.csv` contains all measurements for the `telegram` test from 2024-10-15 to 2024-12-15 coming from Kenya

These CSV files were generated running the following query on our observation table researcher database:

```
WITH multiIf(
    dns_failure IS NOT NULL, tuple('dns', dns_failure),
    tcp_failure IS NOT NULL, tuple('tcp', tcp_failure),
    tls_failure IS NOT NULL, tuple('tls', tls_failure),
    http_failure IS NOT NULL, tuple('https', http_failure),
    tuple('ok', '')
) as failure
SELECT 
report_id,
input,
test_name,
test_version,
measurement_uid,
probe_cc,
probe_asn,
probe_as_org_name,
probe_as_cc,
network_type,
measurement_start_time,
hostname,
ip,
port,
ip_asn,
ip_as_org_name,
resolver_ip,
resolver_cc,
resolver_asn,
resolver_as_org_name,
resolver_as_cc,
dns_engine,
dns_failure,
dns_answer,
tcp_success,
tcp_failure,
tls_handshake_time,
tls_handshake_read_count,
tls_handshake_write_count,
tls_handshake_read_bytes,
tls_handshake_write_bytes,
tls_handshake_last_operation,
tls_cipher_suite IS NOT NULL as tls_success,
tls_is_certificate_valid,
tls_end_entity_certificate_subject,
tls_end_entity_certificate_subject_common_name,
tls_end_entity_certificate_issuer,
tls_end_entity_certificate_issuer_common_name,
tls_end_entity_certificate_san_list,
tls_end_entity_certificate_not_valid_after,
tls_end_entity_certificate_not_valid_before,
tls_certificate_chain_length,
tls_failure,
http_request_url,
http_failure,
http_runtime,
probe_analysis,
failure.1 as failure_class,
IF(failure_class = 'ok', 'ok', concat(failure_class, '.', failure_str)) as failure_str_full,
IF(startsWith(failure.2, 'unknown_failure'), 'unknown_failure', failure.2) as failure_str,
failure.2 as failure_str_raw
FROM obs_web
WHERE measurement_start_time > %(measurement_start_day)s
AND measurement_start_time < %(measurement_end_day)s
AND probe_cc IN %(cc_list)s
AND test_name IN %(test_name)s
```

The code used for extracting these fields from the raw JSON measurements can be found here: https://github.com/ooni/data. 

If you require access to this database, please contact us at contact@ooni.org.

In [6]:
date_columns = [
    'measurement_start_time',
    'tls_end_entity_certificate_not_valid_after',
    'tls_end_entity_certificate_not_valid_before'
]
column_types = {'dns_failure': np.dtype('O'), 'tls_failure': np.dtype('O')}

df_wc_2023 = pd.read_csv("2023-KE-web_connectivity-telegram.org.csv", 
                         parse_dates=date_columns, 
                         dtype=column_types)
df_tg_2023 = pd.read_csv("2023-KE-telegram.csv", 
                         parse_dates=date_columns, 
                         dtype=column_types)

df_wc_2024 = pd.read_csv("2024-KE-web_connectivity-telegram.org.csv",
                         parse_dates=date_columns, 
                         dtype=column_types)
df_tg_2024 = pd.read_csv("2024-KE-telegram.csv", 
                         parse_dates=date_columns, 
                         dtype=column_types)

In [7]:
# necessary to prevent groupby on hostname dropping the row when the `hostname` is NULL on the telegram test results
df_tg_2023.fillna(value='none', inplace=True)
df_tg_2024.fillna(value='none', inplace=True)

  df_tg_2023.fillna(value='none', inplace=True)
  df_tg_2023.fillna(value='none', inplace=True)
  df_tg_2024.fillna(value='none', inplace=True)
  df_tg_2024.fillna(value='none', inplace=True)


In [8]:
def calculate_grouped_with_totals(df_raw):
    """
    Groups all observations by the specified grouping keys and
    calculate the total and percentage of each `failure_str_full`
    on a given day.
    """
    df_grouped = df_raw[[
        'measurement_start_time',
        'probe_as_org_name',
        'hostname',
        #'probe_analysis',
        'probe_asn',
        'ip',
        'failure_str_full',
        #'resolver_asn',
        #'resolver_as_org_name',
        #'network_type',
        'measurement_uid'
    ]].groupby([
        pd.Grouper(freq='h', key='measurement_start_time'),
        'probe_as_org_name',
        'probe_asn',
        'hostname',
        'ip',
        'failure_str_full',
        #'resolver_asn',
        #'resolver_as_org_name',
        #'probe_analysis',
        #'network_type'
    ]).count().reset_index().rename(columns={'measurement_uid': 'obs_count'}).copy()
    group_cols = ['measurement_start_time', 'probe_as_org_name', 'probe_asn', 'hostname', 'ip']
    total_counts = df_grouped.groupby(group_cols)['obs_count'].sum().reset_index()
    total_counts.rename(columns={'obs_count': 'total_count'}, inplace=True)
    
    # Merge total counts back to original dataframe
    df_with_totals = pd.merge(df_grouped, total_counts, on=group_cols)
    
    # Calculate percentages
    df_with_totals['percentage'] = (df_with_totals['obs_count'] / df_with_totals['total_count']) * 100
    return df_with_totals

In [9]:
# 149.154.167.99

def plot_no_disaggregate(df_msmts, df_time_intervals, start_time='2023-11-14', save=False, title='Overall accessibility of telegram.org (web_connectivity)', fname_extra=''):
    df_all = df_msmts.groupby(['measurement_start_time', 'failure_str_full'])['obs_count'].sum().reset_index()
    total_counts = df_all.groupby('measurement_start_time')['obs_count'].sum().reset_index()
    total_counts.rename(columns={'obs_count': 'total_count'}, inplace=True)
    df_msmts = pd.merge(df_all, total_counts, on='measurement_start_time')
    df_msmts['percentage'] = (df_msmts['obs_count'] / df_msmts['total_count']) * 100

    left_offset = -30
    top_offset = -20
    ooni_logo = alt.Chart(
        {"values": [{"url": "https://raw.githubusercontent.com/ooni/design-system/refs/heads/master/svgs/logos/OONI-HorizontalMonochrome.svg"}]}
    ).mark_image(opacity=0.5).encode(
        x=alt.value(left_offset), x2=alt.value(left_offset+80),  # pixels from left
        y=alt.value(top_offset), y2=alt.value(top_offset+40),  # pixels from top
        url="url:N"
    )

    df_sel = df_msmts[
        (df_msmts['measurement_start_time'] > start_time)
    ]

    base_msmts = alt.Chart(df_sel)
    
    color_scale = make_ooni_color_scale(list(df_sel['failure_str_full'].unique()))
    bar_chart = base_msmts.mark_bar(size=2).encode(
        x=alt.X('measurement_start_time:T', axis=alt.Axis(format="%Y-%m-%dT%H")),
        y='percentage',
        color=alt.Color('failure_str_full',
            scale=color_scale,
            legend=alt.Legend(
                title=None,
                orient='none',
                legendY=-20,
                labelOpacity=1,
                direction='horizontal',
                titleAnchor='start')
        ),
        tooltip=(
            alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
            'failure_str_full', 'percentage', 'obs_count', 
            'total_count'
        )
    )
    
    count_chart = base_msmts.mark_circle().encode(
        x='measurement_start_time',
        y='total_count',
        size=alt.Size('total_count',
            # legend=alt.Legend(
            #     orient='none',
            #     legendY=-40,
            #     legendX=600,
            #     labelOpacity=1,
            #     direction='horizontal',
            #     titleAnchor='end')
        ),
        tooltip=(
            alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
            'failure_str_full', 'percentage', 'obs_count', 
            'total_count'
        )
    )
    
    box_overlay = alt.Chart(df_time_intervals[
        df_time_intervals['start_time'] > start_time
    ]).mark_rect(stroke='black', color='blue').encode(
        x='start_time',
        y=alt.datum(-10),
        x2='end_time',
        y2=alt.datum(120),
        opacity=alt.value(0.2),
        tooltip=[
            alt.Tooltip("start_time:T", format="%Y-%m-%dT%H"),
            alt.Tooltip("end_time:T", format="%Y-%m-%dT%H"),
        ]
    )

    year = start_time[:4]
    chart = alt.vconcat((box_overlay + bar_chart + count_chart).resolve_legend(color='independent').properties(
        width=1500,
        height=300,
        title=f'{title} in Kenya during {year} KSCE exams'
    ), ooni_logo).configure_concat(
        spacing=-30
    ).configure_view(
        strokeOpacity=0
    )
    filename = f'charts/{year}-overview-telegram{fname_extra}.png'
    if save:
        chart.save(filename)
        chart.save(filename.replace('.png', '.svg'))
        df_sel.to_csv(filename.replace('.png', '.csv').replace('charts/', 'data/'))
    else:
        display(chart)

In [10]:
# 149.154.167.99

def plot_all_charts_wc(df_msmts, df_time_intervals, start_time='2023-11-14', save=False):
    """
    Produce web_connectivity charts for all probe_asns, limiting the
    selection to only measurements collected after
    the start_time and the IP 149.154.167.99 (see report for more details on why).
    When save=True, the measurements are exported as png,
    if save=False, they are displayed inline.
    """
    left_offset = -30
    top_offset = -20
    ooni_logo = alt.Chart(
        {"values": [{"url": "https://raw.githubusercontent.com/ooni/design-system/refs/heads/master/svgs/logos/OONI-HorizontalMonochrome.svg"}]}
    ).mark_image(opacity=0.5).encode(
        x=alt.value(left_offset), x2=alt.value(left_offset+80),  # pixels from left
        y=alt.value(top_offset), y2=alt.value(top_offset+40),  # pixels from top
        url="url:N"
    )

    for idx, row in df_msmts[['probe_asn', 'probe_as_org_name']].drop_duplicates().iterrows():
        probe_asn = row['probe_asn']
        probe_as_org_name = row['probe_as_org_name']

        df_sel = df_msmts[
            (df_msmts['ip'] == '149.154.167.99')
            & (df_msmts['measurement_start_time'] > start_time)
            & (df_msmts['probe_asn'] == probe_asn)
        ]
        if df_sel['total_count'].sum() < 10:
            continue

        base_msmts = alt.Chart(df_sel)
        
        color_scale = make_ooni_color_scale(list(df_sel['failure_str_full'].unique()))
        bar_chart = base_msmts.mark_bar(size=2).encode(
            x=alt.X('measurement_start_time:T', axis=alt.Axis(format="%Y-%m-%dT%H")),
            y='percentage',
            color=alt.Color('failure_str_full',
                scale=color_scale,
                legend=alt.Legend(
                    title=None,
                    orient='none',
                    legendY=-20,
                    labelOpacity=1,
                    direction='horizontal',
                    titleAnchor='start')
            ),
            tooltip=(
                alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
                'probe_as_org_name', 
                'failure_str_full', 'percentage', 'obs_count', 
                'total_count'
            )
        )
        
        count_chart = base_msmts.mark_circle().encode(
            x='measurement_start_time',
            y='total_count',
            size=alt.Size('total_count',
                # legend=alt.Legend(
                #     orient='none',
                #     legendY=-40,
                #     legendX=600,
                #     labelOpacity=1,
                #     direction='horizontal',
                #     titleAnchor='end')
            ),
            tooltip=(
                alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
                'probe_as_org_name', 
                'failure_str_full', 'percentage', 'obs_count', 
                'total_count'
            )
        )
        
        box_overlay = alt.Chart(df_time_intervals[
            df_time_intervals['start_time'] > start_time
        ]).mark_rect(stroke='black', color='blue').encode(
            x='start_time',
            y=alt.datum(-10),
            x2='end_time',
            y2=alt.datum(120),
            opacity=alt.value(0.2),
            tooltip=[
                alt.Tooltip("start_time:T", format="%Y-%m-%dT%H"),
                alt.Tooltip("end_time:T", format="%Y-%m-%dT%H"),
            ]
        )

        year = start_time[:4]
        chart = alt.vconcat((box_overlay + bar_chart + count_chart).resolve_legend(color='independent').properties(
            width=1500,
            height=300,
            title=f'Accessibility of telegram.org (web_connectivity) in Kenya on {probe_as_org_name} (AS{probe_asn}) during {year} KSCE exams'
        ), ooni_logo).configure_concat(
            spacing=-30
        ).configure_view(
            strokeOpacity=0
        )
        filename = f'charts/{year}-telegram.org-web_connectivity-AS{probe_asn}.png'
        if save:
            chart.save(filename)
            chart.save(filename.replace('.png', '.svg'))
            df_sel.to_csv(filename.replace('.png', '.csv').replace('charts/', 'data/'))
        else:
            display(chart)

In [11]:
# 149.154.167.99

def plot_all_charts_tg(df_msmts, df_time_intervals, start_time='2023-11-03', save=False, fname_extra=''):
    """
    Produce telegram test charts for all probe_asns, limiting the
    selection to only measurements collected after
    the start_time. 
    
    
    We exclude the IP 149.154.167.99 since that's
    already included as part of the web_connectivity results.
    
    When save=True, the measurements are exported as png,
    if save=False, they are displayed inline.
    """
    left_offset = -30
    top_offset = -20
    ooni_logo = alt.Chart(
        {"values": [{"url": "https://raw.githubusercontent.com/ooni/design-system/refs/heads/master/svgs/logos/OONI-HorizontalMonochrome.svg"}]}
    ).mark_image(opacity=0.5).encode(
        x=alt.value(left_offset), x2=alt.value(left_offset+80),  # pixels from left
        y=alt.value(top_offset), y2=alt.value(top_offset+40),  # pixels from top
        url="url:N"
    )
    year = start_time[:4]

    for idx, row in df_msmts[['probe_asn', 'probe_as_org_name']].drop_duplicates().iterrows():
        probe_asn = row['probe_asn']
        probe_as_org_name = row['probe_as_org_name']
        
        chart_group = []
        df_sel = df_msmts[
            (df_msmts['probe_asn'] == probe_asn)
            & (df_msmts['measurement_start_time'] > start_time)
        ]
        color_scale = make_ooni_color_scale(list(df_sel['failure_str_full'].unique()))

        for ip in list(df_sel['ip'].unique()):
            df_sel_ip = df_sel[
                (df_sel['ip'] == ip)
            ]
            if df_sel_ip['total_count'].sum() < 10:
                continue
            base_msmts = alt.Chart(df_sel_ip)
            bar_chart = base_msmts.mark_bar(size=2).encode(
                x=alt.X('measurement_start_time:T', axis=alt.Axis(format="%Y-%m-%dT%H"), 
                        ## This is a bit of a hack to add proper spacing between the charts, 
                        ## since I wasn't able to get padding to work on vconcat charts
                        title='.'),
                y='percentage',
                color=alt.Color('failure_str_full',
                    scale=color_scale,
                    legend=alt.Legend(
                        title=None,
                        orient='none',
                        legendY=-20,
                        labelOpacity=1,
                        direction='horizontal',
                        titleAnchor='start')
                ),
                tooltip=(
                    alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
                    'probe_as_org_name', 
                    'failure_str_full', 'percentage', 'obs_count', 
                    'total_count'
                )
            )
        
            box_overlay = alt.Chart(df_time_intervals[
                df_time_intervals['start_time'] > start_time
            ]).mark_rect(stroke='black', color='blue').encode(
                x='start_time',
                y=alt.datum(-10),
                x2='end_time',
                y2=alt.datum(120),
                opacity=alt.value(0.2),
                tooltip=[
                    alt.Tooltip("start_time:T", format="%Y-%m-%dT%H"),
                    alt.Tooltip("end_time:T", format="%Y-%m-%dT%H"),
                ]
            )
            
            count_chart = base_msmts.mark_circle().encode(
                x='measurement_start_time',
                y='total_count',
                size=alt.Size('total_count'),
                tooltip=(
                    alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
                    'probe_as_org_name', 
                    'failure_str_full', 'percentage', 'obs_count', 
                    'total_count'
                )
            )

            ip_label = alt.Chart(
                {"values": [{"label": f"IP: {ip}"}]}
            ).mark_text(
                align='left',
                baseline='middle'
            ).encode(
                x=alt.value(10),
                y=alt.value(0),
                text="label:N"
            )
        
            chart_group.append((box_overlay + bar_chart + count_chart + ip_label).properties(
                width=1500,
                height=100
            ))

        chart = alt.vconcat(
            alt.vconcat(*chart_group),
            ooni_logo
        ).configure_concat(
            spacing=-30
        ).configure_view(
            strokeOpacity=0
        ).properties(
            title=f'Telegram test outcomes in Kenya for {probe_as_org_name} (AS{probe_asn}) during {year} KCSE exams'
        )
        # chart = alt.vconcat(*chart_group)
        filename = f'charts/{year}-telegram{fname_extra}-AS{probe_asn}.png'
        print(filename)
        if save:
            chart.save(filename)
            
            df_sel.to_csv(filename.replace('.png', '.csv').replace('charts/', 'data/'))
        else:
            display(chart)

In [44]:
df_wc_2023_totals = calculate_grouped_with_totals(df_wc_2023)

In [45]:
df_tg_2023_totals = calculate_grouped_with_totals(df_tg_2023)

In [46]:
df_wc_2024_totals = calculate_grouped_with_totals(df_wc_2024)

In [47]:
df_tg_2024_totals = calculate_grouped_with_totals(df_tg_2024)

In [48]:
df_wc_2023_totals[
    (df_wc_2023_totals['measurement_start_time'] > '2023-11-10')
].groupby(
    ['probe_asn', 'probe_as_org_name']
)['obs_count'].sum().reset_index().sort_values(by='obs_count', ascending=False)

Unnamed: 0,probe_asn,probe_as_org_name,obs_count
5,33771,Safaricom Limited,1050
6,36866,Jamii Telecommunications Limited,646
4,30844,Liquid Telecommunications Ltd,284
8,37061,Safaricom Limited,264
3,15808,ACCESSKENYA GROUP LTD is an ISP serving,160
2,15399,Wananchi Group (Kenya) Limited,156
0,12455,Jambonet Autonomous System,150
14,329014,Vilcom Networks Limited,140
16,329183,FLINK TECHNOLOGIES LTD,80
18,329211,Novia East Africa Ltd,36


In [16]:
# taken from https://knec.ac.ke/wp-content/uploads/2023/05/2023-KCSE-Timetable-02.05.23.pdf
# start of exams: 01/11/2023
# end of exams: 24/11/2023
# mon-fri from 05 - 08 UTC (8 - 11 local) and then 11:00 - 14:00 UTC (14 - 17 local)
date_range = [
    '2023-11-01',
    '2023-11-02',
    '2023-11-03',
    '2023-11-06',
    '2023-11-07',
    '2023-11-08',
    '2023-11-09',
    '2023-11-10',
    '2023-11-13',
    '2023-11-14',
    '2023-11-15',
    '2023-11-16',
    '2023-11-17',
    '2023-11-20',
    '2023-11-21',
    '2023-11-22',
    '2023-11-23',
    '2023-11-24',
]
time_intervals = []
for dt in date_range:
    time_intervals.append({
        'start_time': pd.to_datetime(f'{dt}T05:00:00Z'), 
        'end_time': pd.to_datetime(f'{dt}T08:00:00Z')
    })
    time_intervals.append({
        'start_time': pd.to_datetime(f'{dt}T11:00:00Z'), 
        'end_time': pd.to_datetime(f'{dt}T14:00:00Z'), 
    })
df_time_intervals_2023 = pd.DataFrame(time_intervals)

In [50]:
plot_no_disaggregate(df_wc_2023_totals[
    (df_wc_2023_totals['ip'] == '149.154.167.99')
], df_time_intervals_2023, start_time='2023-10-15', save=True, 
 title='Overall accessibility of telegram.org (web_connectivity)', 
 fname_extra='.org-web_connectivity')

In [51]:
plot_all_charts_wc(df_wc_2023_totals, df_time_intervals_2023, start_time='2023-11-14', save=True)

In [52]:
# we exclude 149.154.167.99 and none
plot_no_disaggregate(df_tg_2023_totals[
    (~df_tg_2023_totals['ip'].isin(['149.154.167.99', 'none']))
], df_time_intervals_2023, start_time='2023-10-15', save=True, 
 title='Overall accessibility of telegram app endpoints', 
 fname_extra='')

In [53]:
plot_all_charts_tg(df_tg_2023_totals[
    ~df_tg_2023_totals['ip'].isin(['149.154.167.99', 'none'])
], df_time_intervals_2023, start_time='2023-11-03', save=True)

charts/2023-telegram-AS329014.png
charts/2023-telegram-AS33771.png
charts/2023-telegram-AS30844.png
charts/2023-telegram-AS328977.png
charts/2023-telegram-AS37100.png
charts/2023-telegram-AS328753.png
charts/2023-telegram-AS329111.png
charts/2023-telegram-AS36926.png
charts/2023-telegram-AS12455.png
charts/2023-telegram-AS15399.png
charts/2023-telegram-AS328331.png
charts/2023-telegram-AS15808.png
charts/2023-telegram-AS36866.png
charts/2023-telegram-AS37027.png
charts/2023-telegram-AS36914.png
charts/2023-telegram-AS37061.png
charts/2023-telegram-AS329211.png
charts/2023-telegram-AS328856.png
charts/2023-telegram-AS328475.png
charts/2023-telegram-AS206804.png
charts/2023-telegram-AS328993.png
charts/2023-telegram-AS327727.png
charts/2023-telegram-AS329183.png
charts/2023-telegram-AS329205.png
charts/2023-telegram-AS329044.png
charts/2023-telegram-AS37305.png


In [54]:
# we only include IP 149.154.167.99 for telegram web measurements
plot_no_disaggregate(df_tg_2023_totals[
    (df_tg_2023_totals['ip'] == '149.154.167.99')
], df_time_intervals_2023, start_time='2023-10-15', save=True, 
 title='Overall accessibility of telegram web', 
 fname_extra='_web')

In [55]:
plot_all_charts_tg(df_tg_2023_totals[
    df_tg_2023_totals['ip'].isin(['149.154.167.99'])
], df_time_intervals_2023, start_time='2023-11-03', save=True, fname_extra="_web")

charts/2023-telegram_web-AS329014.png
charts/2023-telegram_web-AS33771.png
charts/2023-telegram_web-AS30844.png
charts/2023-telegram_web-AS328977.png
charts/2023-telegram_web-AS37100.png
charts/2023-telegram_web-AS328753.png
charts/2023-telegram_web-AS329111.png
charts/2023-telegram_web-AS36926.png
charts/2023-telegram_web-AS12455.png
charts/2023-telegram_web-AS15399.png
charts/2023-telegram_web-AS328331.png
charts/2023-telegram_web-AS15808.png
charts/2023-telegram_web-AS36866.png
charts/2023-telegram_web-AS37027.png
charts/2023-telegram_web-AS36914.png
charts/2023-telegram_web-AS37061.png
charts/2023-telegram_web-AS329211.png
charts/2023-telegram_web-AS328856.png
charts/2023-telegram_web-AS328475.png
charts/2023-telegram_web-AS206804.png
charts/2023-telegram_web-AS328993.png
charts/2023-telegram_web-AS327727.png
charts/2023-telegram_web-AS329183.png
charts/2023-telegram_web-AS329205.png
charts/2023-telegram_web-AS329044.png
charts/2023-telegram_web-AS37305.png


In [15]:
# taken from https://knec.ac.ke/wp-content/uploads/2024/03/2024-KCSE.pdf
# start of exams: 30/10/2024
# end of exams: 22/11/2024
# mon-fri from 05 - 08 UTC (8 - 11 local) and then 11:00 - 14:00 UTC (14 - 17 local)
date_range = [
    '2024-10-30',
    '2024-10-31',
    '2024-11-01',
    '2024-11-04',
    '2024-11-05',
    '2024-11-06',
    '2024-11-07',
    '2024-11-08',
    '2024-11-11',
    '2024-11-12',
    '2024-11-13',
    '2024-11-14',
    '2024-11-15',
    '2024-11-18',
    '2024-11-19',
    '2024-11-20',
    '2024-11-21',
    '2024-11-22',
]
time_intervals = []
for dt in date_range:
    time_intervals.append({
        'start_time': pd.to_datetime(f'{dt}T05:00:00Z'), 
        'end_time': pd.to_datetime(f'{dt}T08:00:00Z')
    })
    time_intervals.append({
        'start_time': pd.to_datetime(f'{dt}T11:00:00Z'), 
        'end_time': pd.to_datetime(f'{dt}T14:00:00Z'),
    })
df_time_intervals_2024 = pd.DataFrame(time_intervals)

In [57]:
plot_no_disaggregate(df_wc_2024_totals[
    (df_wc_2024_totals['ip'] == '149.154.167.99')
], df_time_intervals_2024, start_time='2024-10-15', save=True, 
 title='Overall accessibility of telegram.org (web_connectivity)', 
 fname_extra='.org-web_connectivity')

In [58]:
plot_all_charts_wc(df_wc_2024_totals, df_time_intervals_2024, start_time='2024-11-03', save=True)

In [59]:
# we exclude 149.154.167.99 and none
plot_no_disaggregate(df_tg_2024_totals[
    (~df_tg_2024_totals['ip'].isin(['149.154.167.99', 'none']))
], df_time_intervals_2024, start_time='2024-10-15', save=True, 
 title='Overall accessibility of telegram app endpoints', 
 fname_extra='')

In [60]:
plot_all_charts_tg(df_tg_2024_totals[
    ~df_tg_2024_totals['ip'].isin(['149.154.167.99', 'none'])
], df_time_intervals_2024, start_time='2024-11-03', save=True)

charts/2024-telegram-AS329261.png
charts/2024-telegram-AS33771.png
charts/2024-telegram-AS37061.png
charts/2024-telegram-AS328856.png
charts/2024-telegram-AS329167.png
charts/2024-telegram-AS36926.png
charts/2024-telegram-AS15399.png
charts/2024-telegram-AS37100.png
charts/2024-telegram-AS30844.png
charts/2024-telegram-AS37684.png
charts/2024-telegram-AS36866.png
charts/2024-telegram-AS328977.png
charts/2024-telegram-AS329211.png
charts/2024-telegram-AS14593.png
charts/2024-telegram-AS15808.png
charts/2024-telegram-AS329014.png
charts/2024-telegram-AS328993.png
charts/2024-telegram-AS12455.png
charts/2024-telegram-AS328482.png
charts/2024-telegram-AS327972.png
charts/2024-telegram-AS327708.png
charts/2024-telegram-AS328331.png
charts/2024-telegram-AS328858.png
charts/2024-telegram-AS329390.png
charts/2024-telegram-AS329152.png
charts/2024-telegram-AS37305.png
charts/2024-telegram-AS328490.png
charts/2024-telegram-AS329205.png


In [61]:
# we only include IP 149.154.167.99 for telegram web measurements
plot_no_disaggregate(df_tg_2024_totals[
    (df_tg_2024_totals['ip'] == '149.154.167.99')
], df_time_intervals_2024, start_time='2024-10-15', save=True, 
 title='Overall accessibility of telegram web', 
 fname_extra='_web')

In [62]:
plot_all_charts_tg(df_tg_2024_totals[
    df_tg_2024_totals['ip'].isin(['149.154.167.99'])
], df_time_intervals_2024, start_time='2024-11-03', save=True, fname_extra="_web")

charts/2024-telegram_web-AS329261.png
charts/2024-telegram_web-AS33771.png
charts/2024-telegram_web-AS37061.png
charts/2024-telegram_web-AS328856.png
charts/2024-telegram_web-AS329167.png
charts/2024-telegram_web-AS36926.png
charts/2024-telegram_web-AS15399.png
charts/2024-telegram_web-AS37100.png
charts/2024-telegram_web-AS30844.png
charts/2024-telegram_web-AS37684.png
charts/2024-telegram_web-AS36866.png
charts/2024-telegram_web-AS328977.png
charts/2024-telegram_web-AS329211.png
charts/2024-telegram_web-AS14593.png
charts/2024-telegram_web-AS15808.png
charts/2024-telegram_web-AS329014.png
charts/2024-telegram_web-AS328993.png
charts/2024-telegram_web-AS12455.png
charts/2024-telegram_web-AS328482.png
charts/2024-telegram_web-AS327972.png
charts/2024-telegram_web-AS327708.png
charts/2024-telegram_web-AS328331.png
charts/2024-telegram_web-AS328858.png
charts/2024-telegram_web-AS329390.png
charts/2024-telegram_web-AS329152.png
charts/2024-telegram_web-AS37305.png
charts/2024-telegram_web

In [12]:
df_dns = df_wc_2023[
    (df_wc_2023['resolver_asn'] == 12455)
    & (~df_wc_2023['ip'].isnull())
][[
    'measurement_start_time',
    'probe_as_org_name',
    'hostname',
    'probe_asn',
    'resolver_asn',
    'ip',
    'measurement_uid'
]].groupby([
    pd.Grouper(freq='h', key='measurement_start_time'),
    'probe_as_org_name',
    'probe_asn',
    'resolver_asn',
    'hostname',
    'ip',
]).count().reset_index().rename(columns={'measurement_uid': 'obs_count'}).copy()

In [13]:
print_explorer_url(df_wc_2023.iloc[931])

https://explorer.ooni.org/m/20231119093613.171444_KE_webconnectivity_5173e157bf324f55


In [17]:
left_offset = -30
top_offset = -20
ooni_logo = alt.Chart(
    {"values": [{"url": "https://raw.githubusercontent.com/ooni/design-system/refs/heads/master/svgs/logos/OONI-HorizontalMonochrome.svg"}]}
).mark_image(opacity=0.5).encode(
    x=alt.value(left_offset), x2=alt.value(left_offset+80),  # pixels from left
    y=alt.value(top_offset), y2=alt.value(top_offset+40),  # pixels from top
    url="url:N"
)

base_msmts = alt.Chart(df_dns)

bar_chart = base_msmts.mark_bar(size=2).encode(
    x=alt.X('measurement_start_time:T', axis=alt.Axis(format="%Y-%m-%dT%H")),
    y='obs_count',
    color='ip',
    tooltip=(
        alt.Tooltip("measurement_start_time:T", format="%Y-%m-%dT%H"),
        'ip', 'obs_count' 
    )
)

box_overlay = alt.Chart(df_time_intervals_2023).mark_rect(stroke='black', color='blue').encode(
    x='start_time',
    y=alt.datum(-1),
    x2='end_time',
    y2=alt.datum(5),
    opacity=alt.value(0.2),
    tooltip=[
        alt.Tooltip("start_time:T", format="%Y-%m-%dT%H"),
        alt.Tooltip("end_time:T", format="%Y-%m-%dT%H"),
    ]
)

chart = alt.vconcat((box_overlay + bar_chart).resolve_legend(color='independent').properties(
    width=1500,
    height=300,
    title=f'DNS resolution for telegram.org on Jambonet (AS12455) during 2023 KCSE exams'
), ooni_logo).configure_concat(
    spacing=-30
).configure_view(
    strokeOpacity=0
)
chart