In [None]:
!pip3 freeze > requirements.txt

In [None]:
%history -g -f history

In [None]:
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
sns.set_context('paper')
plt.style.use('seaborn-paper')

def plot_bar(data, x_label, title, hue=None, save=False, prefix=''):
    plt.title(title)
    plt.tight_layout()
    sc = sns.countplot(data=data, x=x_label, palette='colorblind', hue=hue)
    sc.set(ylabel='Count')
    if save:
        plt.savefig('graphs/{}_bar_chart_{}.pdf'.format(prefix, title), dpi=300, bbox_inches='tight')
    return plt


def plot_line2(data, x, y, x_label, y_label, title, hue=None, ci=95, style=None, save=False, prefix=''):
    plt.title(title)
    plt.tight_layout()
    sc = sns.lineplot(x=x, y=y, data=data, palette='colorblind', hue=hue, style=style, ci=ci)
    sc.set(xlabel=x_label, ylabel=y_label)
    if save:
        plt.savefig('graphs/{}_plot_chart_{}.pdf'.format(prefix, title), dpi=300, bbox_inches='tight')
    return plt


def plot_line(data, x, y, x_label, y_label, title, x2=None, y2=None, y2_label=None, save=False, prefix=''):
    plt.title(title)
    plt.tight_layout()
    sc = sns.lineplot(x=x, y=y, data=data, palette='colorblind', ci=None)
    sc.set(xlabel=x_label, ylabel=y_label)
    if x2 is not None:
        anx = plt.twinx()
        sc = sns.lineplot(x=x2, y=y2, data=data, palette='colorblind', color='r', ax=anx, ci=None)
        sc.set(ylabel=y2_label)
        anx.figure.tight_layout()
    if save:
        plt.savefig('graphs/{}_plot_chart_{}.pdf'.format(prefix, title), dpi=300, bbox_inches='tight')
    return plt


def plot_hist(data, x, title, hue, save=False, prefix=''):
    plt.title(title)
    plt.tight_layout()
    sc = sns.histplot(data=data, x=x, hue=hue, palette='colorblind', discrete=True, multiple='stack')
    plt.xticks(range(1,10))
    if save:
        plt.savefig('graphs/{}_hist_{}.pdf'.format(prefix, title), dpi=300, bbox_inches='tight')
    return plt
    

In [None]:
try:
    cred = credentials.Certificate('./serviceAccount.json')
    firebase_admin.initialize_app(cred)
    db = firestore.client()
except Exception as e:
    print(e)

In [None]:
def set_updated_key(item_key, obj, doc):
    for key, value in obj.items():
        updated_key = "{}_{}".format(item_key, key)
        doc[updated_key] = value


def squash_doc(doc):
    keys = ['validationMetrics', 'oobMetrics', 'trainingInfo']
    for key in keys:
        obj = doc.get(key, None)
        if obj is not None:
            set_updated_key(key, obj, doc)
            del doc[key]
    return doc

In [None]:
all_models_csv = Path('./data/all_docs.csv')
all_models_docs = list()
all_models_pd = None

if all_models_csv.is_file():
    print('csv exists, using that')
    all_models_pd = pd.read_csv(all_models_csv, parse_dates=[2])
else:
    print('csv does not exist, getting data from firestore')
    doc_stream = db.collection(u'modelStats').stream()
    for doc in doc_stream:
        all_models_docs.append(squash_doc(doc.to_dict()))
    all_models_pd = pd.DataFrame(all_models_docs)
    all_models_pd.to_csv(all_models_csv, index=False)

In [None]:
connections_csv = Path('./data/all_connections.csv')
connections_docs = list()
connections_pd = None

if connections_csv.is_file():
    print('csv exists, using that')
    connections_pd = pd.read_csv(connections_csv)
else:
    print('csv does not exist, getting data from firestore')
    doc_stream = db.collection(u'connections').stream()
    for doc in doc_stream:
        connections_docs.append(doc.to_dict())
    connections_pd = pd.DataFrame(connections_docs)
    connections_pd.to_csv(connections_csv, index=False)

In [None]:
def to_dt(ts):
    return pd.to_datetime(ts, utc=True)

def to_dt2(ts):
    return pd.to_datetime(ts, utc=True, unit='ms')

def confusion_na(cm):
    if cm.startswith('['):
        return ''
    return cm

config_1_date = pd.to_datetime(np.datetime64('2021-03-03'), utc=True)
config_1_date_end = pd.to_datetime(np.datetime64(1615547734696, 'ms'), utc=True)

config_2_date = pd.to_datetime(np.datetime64('2021-03-12'), utc=True)
config_2_date_end = pd.to_datetime(np.datetime64(1616253120904, 'ms'), utc=True)

config_3_date = pd.to_datetime(np.datetime64('2021-03-21'), utc=True)
config_3_date_end = pd.to_datetime(np.datetime64(1617058800000, 'ms'), utc=True)

def set_trial(ts, config_1_date, config_1_date_end, start2, end2, start3, end3):
    if config_1_date <= ts <= config_1_date_end:
        return 1
    elif config_2_date <= ts <= config_2_date_end:
        return 2
    elif config_3_date <= ts <= config_3_date_end:
        return 3
    return 0

def set_day_of_trial(row, config_1_date, config_2_date, config_3_date):
    ts = None
    if 'timestamp_utc' in row.index:
        ts = row.timestamp_utc
    else:
        ts = row.event_timestamp
    trial = row.trial
    if ts is None: return -1
    day = -1
    if trial == 1:
        day = (ts - config_1_date).days + 1
    elif trial == 2:
        day = (ts - config_2_date).days + 1
    elif trial == 3:
        day = (ts - config_3_date).days + 1
    if day == 10: day = 9  # technically part of the same day, just the date was collected wrong for some reason
    return day

def set_config_num(trial):
    if trial == 1:
        return 3
    elif trial == 2:
        return 2
    elif trial == 3:
        return 1

In [None]:
connections_pd['timestamp_utc'] = connections_pd.timestamp.apply(to_dt2)

In [None]:
connections_pd['trial'] = connections_pd.timestamp_utc.apply(set_trial, args=(config_1_date, config_1_date_end, config_2_date, config_2_date_end, config_3_date, config_3_date_end))
connections_pd = connections_pd[connections_pd.trial!=0]
connections_pd['Trial Day'] = connections_pd.apply(set_day_of_trial, args=(config_1_date, config_2_date, config_3_date), axis=1)
connections_pd = connections_pd[(connections_pd['Trial Day']>=1) & (connections_pd['Trial Day']<=9)]
connections_pd['Configuration'] = connections_pd.trial.apply(set_config_num)
connections_pd['Call'] = connections_pd.apply(lambda x: 1, axis=1)

In [None]:
plot_hist(connections_pd[['Trial Day', 'Call', 'Configuration']], 'Trial Day', 'WebRTC Connections on a Daily Basis', 'Configuration', save=True, prefix='perf').show()

In [None]:
# performance data
performance_json_1 = Path('./data/performance.json')
performance_json_2 = Path('./data/performance_2.json')

In [None]:
performance_pd = pd.concat([pd.read_json(performance_json_1, lines=True), pd.read_json(performance_json_2, lines=True)])
performance_pd.event_timestamp = performance_pd.event_timestamp.apply(to_dt)
performance_pd = performance_pd[(performance_pd.event_timestamp>=config_1_date) & (performance_pd.event_timestamp<=config_3_date_end) & (performance_pd.app_display_version=='2.0')]
performance_pd = performance_pd[performance_pd.trace_info.apply(lambda x: isinstance(x, dict))]

In [None]:
performance_pd['trial'] = performance_pd.event_timestamp.apply(set_trial, args=(config_1_date, config_1_date_end, config_2_date, config_2_date_end, config_3_date, config_3_date_end))
performance_pd = performance_pd[performance_pd.trial!=0]
performance_pd['Trial Day'] = performance_pd.apply(set_day_of_trial, args=(config_1_date, config_2_date, config_3_date), axis=1)

In [None]:
print(performance_pd[performance_pd.event_name=='Trainer'].iloc[0].trace_info)
print(performance_pd.iloc[0].trace_info)

def trace_info_duration(trace_info):
    return pd.to_timedelta('{}us'.format(trace_info.get('duration_us'))).total_seconds() * 1e3

performance_pd['Duration (ms)'] = performance_pd.trace_info.apply(trace_info_duration)

In [None]:
def trace_info_metric(trace_info):
    if 'metric_info' in trace_info:
        return int(trace_info.get('metric_info').get('metric_value'))
    return -1  # if it is -1, then there is no parent_trace_name as it is just a plain trace with no metric

performance_pd['Metric Count'] = performance_pd.trace_info.apply(trace_info_metric)

In [None]:
performance_pd['Configuration'] = performance_pd.trial.apply(set_config_num)

In [None]:
save_data_app_count_pd = performance_pd[performance_pd.event_name=='save_data_app_count']
save_data_app_count_pd.trial.value_counts()
plot_line2(save_data_app_count_pd, 'Trial Day', 'Duration (ms)', 'Trial Day', 'Duration (ms)', 'Average Time Spent Saving Data', 'Configuration', save=True, prefix='perf').show()
plot_line2(save_data_app_count_pd[['Trial Day', 'Configuration', 'Metric Count']], 'Trial Day', 'Metric Count', 'Trial Day', 'Count', 'Average Amount of Applications Per Session', hue='Configuration', save=True, prefix='perf').show()

In [None]:
trainModel_pd = performance_pd[performance_pd.event_name=='trainModel']
trainModel_pd['Duration (s)'] = trainModel_pd['Duration (ms)'].apply(lambda x: x/1000)
plot_line2(trainModel_pd, 'Trial Day', 'Duration (s)', 'Trial Day', 'Duration (s)', 'Average Time Spent on Model Training', hue='Configuration', save=True, prefix='perf').show()

In [None]:
df = pd.DataFrame()
training_model_pd = performance_pd[(performance_pd.event_name=='trainModel') | (performance_pd.event_name=='setUpData') | (performance_pd.event_name=='Trainer')]
training_model_pd['Duration (s)'] = training_model_pd['Duration (ms)'].apply(lambda x: x / 1000)
training_model_pd['Task'] = training_model_pd.event_name.apply(lambda x: 'Model Training' if x=='trainModel' else ('Setup, Train, Export' if x=='Trainer' else 'Data Setup'))


for config in range(1, 4):
    export_avgs = list()
    export_df = pd.DataFrame(['Export']*9)
    config_df = pd.DataFrame([config]*9)
    trial_day_df = pd.DataFrame([1,2,3,4,5,6,7,8,9])
    config_data = training_model_pd[training_model_pd.Configuration==config]
    for day in range(1, 10):
        setup_avg = config_data[(config_data['Trial Day']==day) & (config_data.event_name=='setUpData')]['Duration (ms)'].mean()
        trainModel_avg = config_data[(config_data['Trial Day']==day) & (config_data.event_name=='trainModel')]['Duration (ms)'].mean()
        trainer_avg = config_data[(config_data['Trial Day']==day) & (config_data.event_name=='Trainer')]['Duration (ms)'].mean()
        export_avgs.append(abs(trainer_avg - (setup_avg + trainModel_avg)))
    df = pd.concat([df, pd.concat([pd.DataFrame(export_avgs), export_df, config_df, trial_day_df], axis=1, ignore_index=True)], ignore_index=True)

df = df.rename(columns={0: 'Duration (ms)', 1: 'event_name', 2: 'Configuration', 3: 'Trial Day'})
df['Duration (s)'] = df['Duration (ms)'].apply(lambda x: x/1000)
df['Task'] = df.event_name

plot_line2(df, 'Trial Day', 'Duration (s)', 'Trial Day', 'Duration (s)', 'Average Time Spent on Model Statistics Export', hue='Configuration', ci=95, save=True, prefix='perf').show()

In [None]:
plot_line2(pd.concat([training_model_pd, df]), 'Trial Day', 'Duration (s)', 'Trial Day', 'Duration (s)', 'Average Time Breakdowns for the Machine Learning Task', hue='Configuration', style='Task', ci=None, save=True, prefix='pref').show()

In [None]:
setUpData_pd = performance_pd[performance_pd.event_name=='setUpData']
setUpData_pd['Duration (s)'] = setUpData_pd['Duration (ms)'].apply(lambda x: x/1000)
plot_line2(setUpData_pd, 'Trial Day', 'Duration (s)', 'Trial Day', 'Duration (ms)', 'Average Time Spent on Data Setup for Model Training', hue='Configuration', save=True, prefix='perf')

In [None]:
Trainer_pd = performance_pd[performance_pd.event_name=='Trainer']
Trainer_pd['Duration (s)'] = Trainer_pd['Duration (ms)'].apply(lambda x: x/1000)
plot_line2(Trainer_pd, 'Trial Day', 'Duration (s)', 'Trial Day', 'Duration (s)', 'Average Time Spent on Model Prep, Training, and Export', 'Configuration', save=True, prefix='perf').show()

In [None]:
dcObserverOnMessage_pd = performance_pd[performance_pd.event_name=='dcObserverOnMessage']
plot_line2(dcObserverOnMessage_pd, 'Trial Day', 'Duration (ms)', 'Trial Day', 'Duration (ms)', 'Average Time Spent Handling WebRTC DataChannel Messages', 'Configuration', save=True, prefix='perf').show()

In [None]:
# sentry data
sentry_csv = Path('./data/sentry_issues_export.csv')
sentry_pd = pd.read_csv(sentry_csv)

count_sum = sentry_pd['count'].sum()
sentry_pd = sentry_pd.sort_values(by=['count'], ascending=False, ignore_index=True)
print(count_sum)
# top 10
for i in range(10):
    cur = sentry_pd.iloc[i]
    print(cur)
    print((cur['count'] / count_sum) * 100) 
    print('-----')

In [None]:
# survey data
survey_pd = pd.read_csv('./data/survey.csv')
print(survey_pd.columns)
print('-------')

print('avg age: ', survey_pd.Age.mean())
print('std dev age: ', survey_pd.Age.std())
print('min max age: ', survey_pd.Age.min(), survey_pd.Age.max())
print('-------')

survey_android_version = pd.Series([10, 8, 10, 9, 7, 11, 11, 10, 10, 10, 10, 10, 10, 11, 8, 11, 11, 11, 9, 10, 10, 10])
print(survey_android_version.size)
print(survey_android_version.value_counts(normalize=True).mul(100).round(1).astype(str) + '%')
print('--------')

survey_sias = survey_pd['Was your SIAS score above 33? (This can be found in the Settings, which can be found by clicking on the 3 dots on the top right of the screen when the app is launched)']
print(survey_sias.value_counts(normalize=True).mul(100).round(1).astype(str) + '%')
survey_android_manufacturer = pd.Series(['1plus', '1plus', '1plus', '1plus', 'samsung', 'samsung', 'samsung', 'samsung', 'samsung', 'samsung', 'samsung',
                                         'xiaomi', 'xiaomi', 'xiaomi', 'xiaomi', 'xiaomi', 'pixel', 'pixel', 'pixel', 'moto', 'lg', 'huawei'])
print('--------')
print(survey_android_manufacturer.value_counts(normalize=True).mul(100).round(1).astype(str) + '%')

In [None]:
# survey data contd.
epi_sharing_comfort = pd.DataFrame([4]*9 + [5]*5 + [3]*4 + [2]*3 + [1]*2)
epi_sharing_comfort = epi_sharing_comfort.rename({0: 'Comfort Level (1=least, 5=very)'},axis=1)
print(epi_sharing_comfort.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')
print('-----')

future_epi_usage = pd.DataFrame(['Yes']*8 + ['No']*9 + ['Depends']*6)
print(future_epi_usage.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')
print('-----')

data_analysis_on_phone_comfort = pd.DataFrame([4]*11 + [5]*9 + [3]*2 + [2]*1)
data_analysis_on_phone_comfort = data_analysis_on_phone_comfort.rename({0: 'Comfort Level (1=least, 5=very)'},axis=1)
print(data_analysis_on_phone_comfort.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')
print('-----')

usage_stats_comfort = pd.DataFrame([3]*7 + [2]*6 + [5]*5 + [4]*4 + [1])
usage_stats_comfort = usage_stats_comfort.rename({0: 'Comfort Level (1=least, 5=very)'},axis=1)
print(usage_stats_comfort.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')
print('-----')

centralised_comfort = pd.DataFrame([3]*10 + [2]*5 + [4]*4 + [5]*2 + [1])
centralised_comfort = centralised_comfort.rename({0: 'Comfort Level (1=least, 5=very)'},axis=1)
print(centralised_comfort.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')
print('-----')

fed_comfort = pd.DataFrame([4]*10 + [3]*9 + [5]*2 + [2]*1)
fed_comfort = fed_comfort.rename({0: 'Comfort Level (1=least, 5=very)'},axis=1)
print(fed_comfort.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')

In [None]:
# convert timestamp
all_models_pd['timestamp_utc'] = all_models_pd.timestamp.apply(to_dt)
# remove those that were there before trial began
all_models_pd = all_models_pd[all_models_pd.timestamp_utc>=config_1_date]
all_models_pd = all_models_pd.drop(columns=['timestamp'])

all_models_pd.confusionMatrix = all_models_pd.confusionMatrix.apply(confusion_na)
all_models_pd = all_models_pd[all_models_pd.confusionMatrix!='']

In [None]:
cols = all_models_pd.columns.tolist()
cols[-1],cols[1] = cols[1],cols[-1]
all_models_pd = all_models_pd[cols]

In [None]:
def count_overall_anxious(row):
    return row.trainingInfo_anxiousCountTest + row.trainingInfo_anxiousCountTrain

def count_overall_non_anxious(row):
    return row.trainingInfo_testSize + row.trainingInfo_trainSize

def count_overall_data_size(row):
    return row.trainingInfo_anxiousCountTest + row.trainingInfo_anxiousCountTrain + row.trainingInfo_testSize + row.trainingInfo_trainSize

all_models_pd['overall_anxious'] = all_models_pd.apply(count_overall_anxious, axis=1)
all_models_pd['overall_data'] = all_models_pd.apply(count_overall_data_size, axis=1)
all_models_pd['overall_non_anxious'] = all_models_pd.apply(count_overall_non_anxious, axis=1)

all_models_pd['overall_anxious'] = all_models_pd['overall_anxious'].astype(int)
all_models_pd['overall_data'] = all_models_pd['overall_data'].astype(int)
all_models_pd['overall_non_anxious'] = all_models_pd['overall_non_anxious'].astype(int)

In [None]:
all_models_pd = all_models_pd[(all_models_pd.validationMetrics_f1score<=1.0) & (all_models_pd.validationMetrics_f1score>=0.0)]

In [None]:
all_models_pd['trial'] = all_models_pd.timestamp_utc.apply(set_trial, args=(config_1_date, config_1_date_end, config_2_date, config_2_date_end, config_3_date, config_3_date_end))
all_models_pd['trialDay'] = all_models_pd.apply(set_day_of_trial, args=(config_1_date, config_2_date, config_3_date), axis=1)

all_models_pd = all_models_pd[(all_models_pd.trial!=0) & (all_models_pd.trialDay!=-1)]

In [None]:
config_1_data = all_models_pd[all_models_pd.trial==3]
config_2_data = all_models_pd[all_models_pd.trial==2]
config_3_data = all_models_pd[all_models_pd.trial==1]

In [None]:
config_1_data_avgs = dict()
for x in range(1,10):
    c = config_1_data[config_1_data.modelType=='COMBINED']
    c = c[c.trialDay==x]
    config_1_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_1_data_avgs_pd = pd.DataFrame.from_dict(config_1_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_1_data_avgs_pd.T)
display(config_1_data_avgs_pd.accuracy.pct_change())
q = pd.DataFrame(config_1_data_avgs_pd.accuracy.pct_change())


config_1_data_avgs = dict()
for x in range(1, 10):
    c = config_1_data[config_1_data.modelType=='DAILY']
    c = c[c.trialDay==x]
    config_1_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_1_data_avgs_pd = pd.DataFrame.from_dict(config_1_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_1_data_avgs_pd.T)
display(config_1_data_avgs_pd.accuracy.pct_change())

In [None]:
config_2_data_avgs = dict()
for x in range(1, 10):
    c = config_2_data[config_2_data.modelType=='COMBINED']
    c = c[c.trialDay==x]
    config_2_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_2_data_avgs_pd = pd.DataFrame.from_dict(config_2_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_2_data_avgs_pd.T)
display(config_2_data_avgs_pd.accuracy.pct_change())
q2 = config_2_data_avgs_pd.accuracy.pct_change()


config_2_data_avgs = dict()
for x in range(1, 10):
    c = config_2_data[config_2_data.modelType=='DAILY']
    c = c[c.trialDay==x]
    config_2_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_2_data_avgs_pd = pd.DataFrame.from_dict(config_2_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_2_data_avgs_pd.T)
display(config_2_data_avgs_pd.accuracy.pct_change())

In [None]:
config_3_data_avgs = dict()
for x in range(1, 10):
    c = config_3_data[config_3_data.modelType=='COMBINED']
    c = c[c.trialDay==x]
    config_3_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_3_data_avgs_pd = pd.DataFrame.from_dict(config_3_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_3_data_avgs_pd.T)
display(config_3_data_avgs_pd.accuracy.pct_change())

config_3_data_avgs = dict()
for x in range(1, 10):
    c = config_3_data[config_3_data.modelType=='DAILY']
    c = c[c.trialDay==x]
    config_3_data_avgs[x] = [
        c.oobMetrics_accuracy.mean() * 100.000,
        c.oobMetrics_accuracy.std() * 100.00,
        c.overall_data.mean(),
        c.overall_anxious.mean(),
        c.overall_anxious.mean() / c.overall_data.mean() * 100.00,
        c.validationMetrics_f1score.mean(),
        c.validationMetrics_f1score.std()
    ]

config_3_data_avgs_pd = pd.DataFrame.from_dict(config_3_data_avgs, orient='index', columns=['accuracy', 'accuracy std', 'overall data', 'anxious data', '% anxious', 'f1', 'f1 std'])
display(config_3_data_avgs_pd.T)
display(config_3_data_avgs_pd.accuracy.pct_change())