In [1]:
import altair as alt
import sqlite3
import pandas as pd
import numpy as np
DATABASE = './schemas/syslog.db'

In [2]:
conn = sqlite3.connect(DATABASE)

In [4]:
res = conn.execute(
    """
    select m.model_name, GROUP_CONCAT(p.responsetime, ',') from prediction_log p join models m on p.model_id = m.id group by m.model_name;
    """
).fetchall()

scores = {
    "desiciontree" : 0.9601735454444321,
    "randomforest" : 0.9692958059850929,
    "xgbooster" : 0.9770831015685838
}

In [38]:
data = {
    "model": [],
    "responsetime" : [],
    "score" : []
}
for i in range(len(res)):
    name = res[i][0]
    vals = [int(d) for d in res[i][1].split(',')]

    for val in vals[:1000]:
        data['model'].append(name)
        data['responsetime'].append(val)
        data['score'].append(scores[name])

In [39]:
df = pd.DataFrame(data)
q1 = df[df['model'] == 'desiciontree']["responsetime"].quantile(0.70)
q2 = df[df['model'] == 'randomforest']["responsetime"].quantile(0.70)
q3 = df[df['model'] == 'xgbooster']["responsetime"].quantile(0.70)

In [40]:
df = df[((df['responsetime'] < q1) & (df['model'] == 'desiciontree')) 
| ((df['responsetime'] < q2) & (df['model'] == 'randomforest')) 
| ((df['responsetime'] < q3) & (df['model'] == 'xgbooster'))]

In [50]:
desiciontree_plt = alt.Chart(df[df['model'] == 'desiciontree']).mark_bar().encode(
    alt.X("responsetime:Q", bin = True),
    alt.Y("count()"),
    color="model"
)
randomforest_plt = alt.Chart(df[df['model'] == 'randomforest']).mark_bar().encode(
    alt.X("responsetime:Q", bin = True),
    alt.Y("count()"),
    color="model"
)
xgbooster_plt = alt.Chart(df[df['model'] == 'xgbooster']).mark_bar().encode(
    alt.X("responsetime:Q", bin = True),
    alt.Y("count()"),
    color="model"
)

responsetime = (desiciontree_plt & randomforest_plt & xgbooster_plt)
summary = {"model" : ["randomforest",
                    "desiciontree",
                    "xgbooster"],
        "r1_score" : [0.969295806,
                        0.9601735454,
                        0.9770831016],
        "log_avg_responsetime" : [np.log(10627.56775),
                            np.log(278.8169054),
                            np.log(670.0944257)],
}
summary_df = pd.DataFrame(summary)
summary_plt = alt.Chart(summary_df).mark_circle().encode(
    alt.X("r1_score:Q"),
    alt.Y("avg_responsetime:Q"),
    color = "model:N"
).
summary_plt


