In [None]:
import warnings
warnings.filterwarnings("ignore")

import json
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns

%config InlineBackend.figure_format = 'retina'
plt.rc('pdf', fonttype=42) # Avoid type 3 fonts

sns.set_theme(style="white")
sns.set_palette(sns.color_palette("tab10"))
plt.rcParams.update({"xtick.bottom" : True, "ytick.left" : True}) # enables minor ticks

In [None]:
BASE_DIR = '../android'
RESULTS_DIR = os.path.join(BASE_DIR, '../results/CHANGE-ME/')
RELEASE_LOOKUP_JSON = os.path.join(BASE_DIR, 'release_years.json')


def read_device_info(path):
    with open(path) as f:
        x = json.load(f)
        
    name = x['name'].replace('"', '')

    os = x['os']
    if '.' in os:
        os = os[:os.find('.')]
    
    with open(RELEASE_LOOKUP_JSON) as f:
        release_year = json.load(f)[name]
    
    return {
        'name': name,
        'os': int(os),
        'release': release_year,
        'type': "Tablet" if x['formFactor'] == 'TABLET' else 'Phone',
    }


def read_test_results(dir_path):
    def read_log(test_name, index=1):
        filename = test_name + '.json'
        must_contain = test_name # the main result line includes the test name
        
        with open(os.path.join(dir_path, filename)) as f:
            x = json.load(f)
        x = [
            m['data']
            for m in x
            if m['tag'] == "HardwareKeySupportTest" and must_contain in m['data']
        ]
        
        if len(x) != 1: print(f"bad len: {x}")
        return x[0].split(' ')[index] == "true"
    
    return {
        # TEE checks
        'hw_backed_ctr': read_log('testIsDefaultKeyGenerationHardwareBackedCtr'),
        'hw_backed_gcm': read_log('testIsDefaultKeyGenerationHardwareBackedGcm'),
        'hw_backed_hmac_short': read_log('testIsDefaultKeyGenerationHardwareBackedHmacShort'),
        'hw_backed_hmac_long': read_log('testIsDefaultKeyGenerationHardwareBackedHmacLong'),
        
        # SE checks
        'strongbox_ctr': read_log('testCanCreateStrongBoxKeyCtr'),
        'strongbox_gcm': read_log('testCanCreateStrongBoxKeyGcm'),
        'strongbox_hmac_short': read_log('testCanCreateStrongBoxKeyHmacShort'),
        'strongbox_hmac_long': read_log('testCanCreateStrongBoxKeyHmacLong'),
        
        # extra checks
        'system_feature_strongbox': read_log('testIsStrongBoxSupportSystemFeaturePresent'),
        'system_feature_single_use_key': read_log('testIsSingleUseKeySupportSystemFeaturePresent'),
        'system_feature_limited_use_key': read_log('testIsLimitedUseKeySupportSystemFeaturePresent'),
        'strongbox_hmac_long_with_limited': read_log('testCanCreateStrongBoxKeyHmacLongWithLimitedUse'),
    }
    

def read_job_result(job_name, job_path):
    device_info = read_device_info(os.path.join(job_path, "device.json"))
    raw_test_results = read_test_results(job_path)
    
    support = 'n/a'
    if raw_test_results['hw_backed_ctr']:
        support = 'TEE'
    if raw_test_results['system_feature_strongbox'] and raw_test_results['strongbox_ctr']:
        support = 'SE'
        
    test_results = {}
    test_results['support'] = support
    test_results['hmacLong'] = raw_test_results['strongbox_hmac_long']
    test_results['limitedUse'] = raw_test_results['system_feature_limited_use_key']
    test_results['hmacLongLimitedUse'] = raw_test_results['strongbox_hmac_long_with_limited']
    
    return {**device_info, **test_results}
    
def read_all_jobs():
    all_data = []
    for job_name in sorted(os.listdir(RESULTS_DIR)):
        job_path = os.path.join(RESULTS_DIR, job_name)
        if not os.path.isdir(job_path):
            continue
        all_data.append(read_job_result(job_name, job_path))
    return pd.json_normalize(all_data)
        
df = read_all_jobs()

df['os'] = df['os'].astype(pd.api.types.CategoricalDtype(list(range(6, 14)), ordered=True))
df['release'] = df['release'].astype(pd.api.types.CategoricalDtype(list(range(2013, 2024)), ordered=True))

df

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches((5.5, 2.5))

sns.swarmplot(
    data=df,
    x='release',
    y='os',
    hue='support',
    ax=ax,
    legend=False,
    size=6,
)

ax.set_xlabel("Release Year")

ax.set_ylabel("Android OS Version")
ax.set_yticks([6, 7, 8, 9, 10, 11, 12, 13])
ax.invert_yaxis()

ax.grid(ls='--')

fig.tight_layout()
fig.savefig("output/android-se-survey-year-and-os.pdf", dpi=300, bbox_inches='tight')

In [None]:
#
# Survey details table
#

for index, row in df.iterrows():
    is_tee = "\\checkmark" if row['support'] in ('TEE', 'SE') else ""
    is_se = "\\checkmark" if row['support'] in ('SE') else ""
    print(f"{row['name']} & {row['type']} & {row['os']} & {row['release']} & {is_tee} & {is_se} \\\\")

In [None]:
#
# Calculate estimate based on OS version distribution
#

API_DISTRIBUTIONS = {}

API_DISTRIBUTIONS[2022] = {
    9: 77.3 - 62.8,
    10: 62.8 - 40.5,
    11: 40.5 - 13.5,
    12: 13.5,
}

API_DISTRIBUTIONS[2023] = {
    9: 81.2 - 68.0,
    10: 68.0 - 48.5,
    11: 48.5 - 24.1,
    12: 24.1,
}

PREVALANCE = {}
prev_string = ""
for k in API_DISTRIBUTIONS[2022].keys():
    df_for_api = df[df.os == k]
    PREVALANCE[k] = len(df_for_api[df_for_api.support == 'SE']) / len(df_for_api) if len(df_for_api) else 0
    prev_string += f"API {k} ({PREVALANCE[k]*100:.1f}\\%) "

prev_string = prev_string[:-1] + "."
print(prev_string)

for year in (2023, 2022,):
    print(year)
    calculation = "$"
    total = 0.0
    for k, v in API_DISTRIBUTIONS[year].items():
        presence = PREVALANCE[k]
        total += presence * v
        calculation += f"{v:.1f}\\% \\times {100*presence:.1f}\\% +"

    calculation = calculation[:-1]
    calculation += f"\\approx \\textbf{{{total:.1f}\\%}}$."
    print(calculation)