In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pymongo
from pprint import pprint

In [2]:
client = pymongo.MongoClient()
db = client.metrics_causal

In [3]:
def group_by(df, bycols, agg_map):
    """

    @param df:      DataFrame
    @param bycols:  str or list
                        Column(s) to group by
    @param agg_map: dictionary or list of 2-tuples
                        Mapping from column to aggregate function e.g. [("city", "count"), ("salary", "mean"]
    @return:        DataFrame
                        Flattened dataframe, with multi-level index removed
    """
    grps = []
    if type(bycols) == str:
        bycols = [bycols]

    if type(agg_map) == dict:
        agg_map = agg_map.items()

    for k,v in agg_map:
        grp = df[bycols + [k]].groupby(bycols, ).agg(v)
        grp.reset_index(inplace=True)
        grp["%s(%s)" % (v,k)] = grp[k]
        del grp[k]
        grps.append(grp)

    m = grps[0]
    for grp in grps[1:]:
        m = pd.merge(m, grp, on=bycols, how="inner")
    return m

In [4]:
from bson.son import SON # needed to ensure dictionary is ordered (python default is not)
import hashlib

def hash_feats(fts):
    vals = fts.values
    joined = "|".join(map(lambda s: str(s),vals)).encode('utf-8') 
    return hashlib.sha224(joined).hexdigest()

def get_df_sorted_by_f1score(collection, params=None, filter_cols=True):
    if not params:
        params = []
    if type(params) == str:
        params = params.split(",")
    
    project = {
            "weighted_f1_score":"$WEIGHTED_MEAN_CONCEPT_CODES.f1_score",
            "micro_f1_score":  "$MICRO_F1.f1_score",
            "micro_recall":    "$MICRO_F1.recall",
            "micro_precision": "$MICRO_F1.precision",
    
    # PARAMETERS            
    #        "window_size":    "$parameters.window_size",
            "feats":          "$parameters.extractors",
    #        "count": {        "$size" : "$parameters.extractors" },
            "asof" :          "$asof",
            "_id":1
    }
    
    # No count for HMM
    if "_hmm" in collection.lower():
        del project["count"]
    
    for param in params:
        project[param] = "$parameters." + param

    feats_pipeline = [{
        "$project": project
    },
    {
        "$match":{
            "micro_f1_score": { "$exists" : True }        
        }
    },
    {
        "$sort":{
            "micro_f1_score": -1
        }
    },
    ]
    
    rows = [row for row in db[collection].aggregate(feats_pipeline)]
    df = pd.DataFrame(rows).sort_values("micro_f1_score", ascending=False)
    if params:
        df["hs_params"] = df[params].apply(hash_feats, axis=1)
        
    if filter_cols:
        cols = ["micro_f1_score", "micro_recall" ,"micro_precision" ] + params
        return df[cols]
    return df

In [5]:
from Metrics import rpf1a_from_tp_fp_tn_fn
from collections import defaultdict

def tally_counts(r, filter):
    tally = defaultdict(int)
    for k,v in r.items():
        if filter(k):
            for prop in "tp,tn,fp,fn".split(","):
                tally[prop] += v[prop]
    return tally

def get_causal_relation_metrics(collection, params, include_concept_codes=True):
    dicts = []
    for r in db[collection].find({}):
        d = {}
        cr_counts = tally_counts(r, lambda c: "->" in c)
        (rec, p, cr_f1, a) = rpf1a_from_tp_fp_tn_fn(cr_counts["tp"],cr_counts["fp"],cr_counts["tn"],cr_counts["fn"])
        d["cr_micro_f1"] = cr_f1
        d["cr_micro_rec"]  = rec
        d["cr_micro_prec"] = p
        if include_concept_codes:
            concept_counts = tally_counts(r, lambda c: c[0].isdigit())
            (rec, p, concept_f1, a) = rpf1a_from_tp_fp_tn_fn(concept_counts["tp"],concept_counts["fp"],concept_counts["tn"],concept_counts["fn"])
            d["concept_micro_f1"] = concept_f1
            d["concept_micro_rec"]  = rec
            d["concept_micro_prec"] = p
        parms = r["parameters"]
        for p in params:
            d[p] = parms[p]
        dicts.append(d)
    df = pd.DataFrame(dicts)
    fields = ("cr_micro_f1,cr_micro_rec,cr_micro_prec,concept_micro_f1,concept_micro_rec,concept_micro_prec," + ",".join(params)).split(",")
    if not include_concept_codes:
        fields = [f for f in fields if "concept" not in f]
    return df[fields].sort_values("cr_micro_f1", ascending=False)

In [6]:
def round_data(df, places=3):
    df_copy = df.copy()
    fmt_str = "{0:." + str(places) + "f}"
    cols = set([v for v in df_copy.columns.values if "micro_" in v])
    for c in cols:
        df_copy[c] = df[c].apply(lambda d: fmt_str.format(d))  
    return df_copy

# Binary Relevance - CR and Concept Codes

## Coral Bleaching
(no skin cancer results for this experiment)

### <span style="color:red">No longer valid - not included in final experiments</span>

# Most Common Tag

## Coral Bleaching

In [7]:
params = "merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size"

### Training

In [8]:
df = get_df_sorted_by_f1score("CR_CB_TAGGING_TD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.8688,0.8714,0.8662,sum,2,True,True,256
1,0.8054,0.8128,0.798,sum,2,True,True,128
2,0.7852,0.798,0.7729,sum,1,True,True,256
3,0.7542,0.8007,0.7128,sum,1,True,True,128
4,0.7418,0.7381,0.7455,sum,2,True,True,64
5,0.7169,0.7509,0.6858,sum,1,True,True,64


### Validation

In [9]:
df = get_df_sorted_by_f1score("CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.6798,0.6949,0.6653,sum,2,True,True,256
1,0.6731,0.6803,0.6661,sum,2,True,True,128
2,0.6498,0.6564,0.6433,sum,1,True,True,256
3,0.6358,0.679,0.5978,sum,1,True,True,128
4,0.6351,0.654,0.6173,sum,1,True,True,64
5,0.6342,0.6281,0.6404,sum,2,True,True,64


### Test

In [10]:
df = get_df_sorted_by_f1score("TEST_CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.6764,0.6561,0.698,sum,2,True,True,256


## Skin Cancer

### Training

In [11]:
df = get_df_sorted_by_f1score("CR_SC_TAGGING_TD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.8718,0.8549,0.8894,sum,2,True,True,256
1,0.8527,0.8534,0.852,sum,2,True,True,128
2,0.8494,0.838,0.861,sum,2,True,True,64
3,0.8423,0.8369,0.8478,sum,1,True,True,256
4,0.8325,0.8569,0.8095,sum,1,True,True,128
5,0.7996,0.8188,0.7812,sum,1,True,True,64


### Validation

In [12]:
df = get_df_sorted_by_f1score("CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.7686,0.7608,0.7765,sum,2,True,True,256
1,0.7562,0.7628,0.7497,sum,2,True,True,128
2,0.7539,0.7545,0.7533,sum,1,True,True,256
3,0.7521,0.7415,0.7631,sum,2,True,True,64
4,0.7418,0.7726,0.7134,sum,1,True,True,128
5,0.7295,0.7499,0.7102,sum,1,True,True,64


### Test

In [13]:
df = get_df_sorted_by_f1score("TEST_CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN", params)
round_data(df,4)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,merge_mode,num_rnns,use_pretrained_embedding,bi-directional,hidden_size
0,0.7918,0.7979,0.7859,sum,2,True,True,256


## Stacked Model

### Coral Bleaching

### Training

In [14]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_CB_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7415,0.7113,0.7742,False,l1,100.0,False,False,False,True,True
1,0.7412,0.7106,0.7745,False,l2,100.0,False,False,False,True,True
2,0.7407,0.7096,0.7747,False,l1,10.0,False,False,False,True,True
3,0.7387,0.7072,0.7733,False,l2,10.0,False,False,False,True,True
4,0.7386,0.7068,0.7733,True,l2,10.0,False,False,False,True,True


### Validation

In [15]:
df = get_df_sorted_by_f1score("CR_CB_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.6946,0.656,0.738,False,l1,1.0,False,False,False,True,True
3,0.6936,0.649,0.7447,False,l2,1.0,False,False,False,True,True
1,0.6936,0.649,0.7447,True,l2,1.0,False,False,False,True,True
2,0.6936,0.649,0.7447,True,l2,1.0,False,False,False,True,True
4,0.693,0.6567,0.7336,False,l1,0.5,False,False,False,True,True


### Test

In [16]:
df = get_df_sorted_by_f1score("TEST_CR_CB_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7038,0.6745,0.7359,True,l2,0.5,True,False,True,False,True


### Skin Cancer

### Training

In [17]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_SC_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.8007,0.7566,0.8504,False,l1,100.0,True,False,True,False,True
1,0.7952,0.7492,0.8473,False,l2,100.0,True,False,True,False,True
2,0.7929,0.7459,0.8464,False,l1,10.0,True,False,True,False,True
3,0.7891,0.7407,0.8444,False,l1,5.0,True,False,True,False,True
4,0.7888,0.7407,0.8436,False,l2,10.0,True,False,True,False,True


### Validation

In [18]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("CR_SC_STACKED_VD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7632,0.7113,0.8232,True,l2,1.0,True,False,True,False,True
1,0.7632,0.7113,0.8232,True,l2,1.0,False,True,True,False,True
2,0.7632,0.7113,0.8232,True,l2,1.0,True,False,True,False,True
3,0.7632,0.7113,0.8232,False,l2,1.0,True,False,True,False,True
4,0.763,0.7121,0.8216,True,l2,1.0,True,False,True,True,True


### Test

In [19]:
sparams = "dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats".split(",")
df = get_df_sorted_by_f1score("TEST_CR_SC_STACKED_TD", sparams)
round_data(df,4).head(5)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,penalty,C,max_feats,min_feats,average_feats,binary_feats,combo_feats
0,0.7762,0.7236,0.8372,True,l2,10.0,True,False,False,True,True


## S-R Parser

In [35]:
def extract_parameter(s, param_name):
    s = s.replace("("," ").replace(")"," ")
    keys = s.split(" ")
    return [(key,val.replace(",","").replace("'","")) for key,val in [k.split("=") for k in keys if "=" in k] if key == param_name][0][-1]

extract_c_val = lambda s: extract_parameter(s, "C")
extract_penalty_val = lambda s: extract_parameter(s, "penalty")
extract_dual_val = lambda s: extract_parameter(s, "dual")

s = "LogisticRegression(C=0.1, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)"
extract_c_val(s)

'0.1'

### Coral Bleaching

### Training

In [29]:
col = ["CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_TD"]

df_feat_sel = get_df_sorted_by_f1score(col[0], "algorithm,beta,max_epochs", filter_cols=True) 
df_feat_sel["C"] = df_feat_sel["algorithm"].apply(extract_c_val)
df_feat_sel["penalty"] = df_feat_sel["algorithm"].apply(extract_penalty_val)
df_feat_sel["dual"] = df_feat_sel["algorithm"].apply(extract_dual_val)
df_feat_sel.head(10)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,algorithm,beta,max_epochs,C,penalty,dual
0,0.864852,0.766384,0.992354,"LogisticRegression(C=10.0, class_weight=None, ...",0.1,20,10.0,l2,False
1,0.864812,0.766384,0.992247,"LogisticRegression(C=10.0, class_weight=None, ...",0.3,15,10.0,l2,False
2,0.864758,0.766301,0.992246,"LogisticRegression(C=10.0, class_weight=None, ...",0.5,15,10.0,l2,False
3,0.864733,0.766134,0.992459,"LogisticRegression(C=10.0, class_weight=None, ...",0.3,20,10.0,l2,False
4,0.864692,0.766134,0.992352,"LogisticRegression(C=10.0, class_weight=None, ...",0.4,15,10.0,l2,False
5,0.864598,0.766051,0.992244,"LogisticRegression(C=10.0, class_weight=None, ...",0.4,20,10.0,l2,False
6,0.864586,0.765968,0.99235,"LogisticRegression(C=10.0, class_weight=None, ...",0.5,20,10.0,l2,False
7,0.864571,0.766134,0.992031,"LogisticRegression(C=10.0, class_weight=None, ...",0.4,10,10.0,l2,False
8,0.864474,0.766301,0.991499,"LogisticRegression(C=10.0, class_weight=None, ...",0.4,20,10.0,l2,True
9,0.864457,0.766717,0.990758,"LogisticRegression(C=10.0, class_weight=None, ...",0.2,20,10.0,l2,True


### Validation

In [28]:
col = ["CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD"]

df_feat_sel = get_df_sorted_by_f1score(col[0], "algorithm,beta,max_epochs", filter_cols=True) 
df_feat_sel["C"] = df_feat_sel["algorithm"].apply(extract_c_val)
df_feat_sel["penalty"] = df_feat_sel["algorithm"].apply(extract_penalty_val)
df_feat_sel["dual"] = df_feat_sel["algorithm"].apply(extract_dual_val)
df_feat_sel.head(10)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,algorithm,beta,max_epochs,C,penalty,dual
0,0.720357,0.670991,0.777564,"LogisticRegression(C=0.1, class_weight=None, d...",0.5,5,0.1,l2,True
1,0.720357,0.670991,0.777564,"LogisticRegression(C=0.1, class_weight=None, d...",0.5,5,0.1,l2,False
2,0.720242,0.672322,0.775518,"LogisticRegression(C=0.1, class_weight=None, d...",0.5,10,0.1,l2,True
3,0.720242,0.672322,0.775518,"LogisticRegression(C=0.1, class_weight=None, d...",0.5,10,0.1,l2,False
4,0.7201,0.667997,0.781019,"LogisticRegression(C=0.5, class_weight=None, d...",0.2,20,0.5,l2,True
5,0.7201,0.667997,0.781019,"LogisticRegression(C=0.5, class_weight=None, d...",0.2,20,0.5,l2,False
6,0.719943,0.671324,0.776154,"LogisticRegression(C=0.1, class_weight=None, d...",0.4,5,0.1,l2,True
7,0.719943,0.671324,0.776154,"LogisticRegression(C=0.1, class_weight=None, d...",0.4,5,0.1,l2,False
9,0.719715,0.670991,0.776068,"LogisticRegression(C=0.1, class_weight=None, d...",0.3,15,0.1,l2,False
8,0.719715,0.670991,0.776068,"LogisticRegression(C=0.1, class_weight=None, d...",0.3,15,0.1,l2,True


### Test

In [39]:
col = ["TEST_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_VD"]

df_feat_sel = get_df_sorted_by_f1score(col[0], "algorithm,beta,max_epochs", filter_cols=True) 
df_feat_sel["C"] = df_feat_sel["algorithm"].apply(extract_c_val)
df_feat_sel["penalty"] = df_feat_sel["algorithm"].apply(extract_penalty_val)
df_feat_sel["dual"] = df_feat_sel["algorithm"].apply(extract_dual_val)
df_feat_sel.head(10)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,algorithm,beta,max_epochs,C,penalty,dual
0,0.727744,0.702838,0.75448,"LogisticRegression(C=0.1, class_weight=None, d...",0.5,5,0.1,l2,True


### Skin Cancer

### Training

In [30]:
col = ["CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_TD"]

df_feat_sel = get_df_sorted_by_f1score(col[0], "algorithm,beta,max_epochs", filter_cols=True) 
df_feat_sel["C"] = df_feat_sel["algorithm"].apply(extract_c_val)
df_feat_sel["penalty"] = df_feat_sel["algorithm"].apply(extract_penalty_val)
df_feat_sel["dual"] = df_feat_sel["algorithm"].apply(extract_dual_val)
df_feat_sel.head(10)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,algorithm,beta,max_epochs,C,penalty,dual
0,0.879436,0.789706,0.99217,"LogisticRegression(C=100.0, class_weight=None,...",0.2,20,100.0,l2,False
1,0.87935,0.789254,0.992667,"LogisticRegression(C=100.0, class_weight=None,...",0.5,20,100.0,l2,False
2,0.879277,0.789807,0.991607,"LogisticRegression(C=100.0, class_weight=None,...",0.3,20,100.0,l2,False
3,0.879254,0.789455,0.992105,"LogisticRegression(C=100.0, class_weight=None,...",0.3,10,100.0,l2,False
4,0.87923,0.789455,0.992042,"LogisticRegression(C=100.0, class_weight=None,...",0.5,15,100.0,l2,False
5,0.879209,0.789304,0.992228,"LogisticRegression(C=100.0, class_weight=None,...",0.4,20,100.0,l2,False
6,0.879196,0.789204,0.992353,"LogisticRegression(C=100.0, class_weight=None,...",0.4,15,100.0,l2,False
7,0.879163,0.789505,0.991792,"LogisticRegression(C=100.0, class_weight=None,...",0.1,20,100.0,l2,False
8,0.879111,0.789304,0.991978,"LogisticRegression(C=100.0, class_weight=None,...",0.2,10,100.0,l2,False
9,0.879089,0.789505,0.991604,"LogisticRegression(C=100.0, class_weight=None,...",0.5,10,100.0,l2,False


### Validation

In [38]:
col = ["CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD"]

df_feat_sel = get_df_sorted_by_f1score(col[0], "algorithm,beta,max_epochs", filter_cols=True) 
df_feat_sel["C"] = df_feat_sel["algorithm"].apply(extract_c_val)
df_feat_sel["penalty"] = df_feat_sel["algorithm"].apply(extract_penalty_val)
df_feat_sel["dual"] = df_feat_sel["algorithm"].apply(extract_dual_val)
df_feat_sel.head(10)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,algorithm,beta,max_epochs,C,penalty,dual
0,0.766909,0.710092,0.833609,"LogisticRegression(C=0.5, class_weight=None, d...",0.2,5,0.5,l2,False
1,0.7668,0.712304,0.830326,"LogisticRegression(C=0.5, class_weight=None, d...",0.4,20,0.5,l2,False
2,0.766602,0.712505,0.829588,"LogisticRegression(C=0.5, class_weight=None, d...",0.5,10,0.5,l2,False
3,0.76658,0.711098,0.831453,"LogisticRegression(C=0.5, class_weight=None, d...",0.3,20,0.5,l2,False
4,0.766512,0.715119,0.825865,"LogisticRegression(C=0.5, class_weight=None, d...",0.1,20,0.5,l1,False
5,0.766396,0.710696,0.831569,"LogisticRegression(C=0.5, class_weight=None, d...",0.2,15,0.5,l2,True
6,0.766367,0.711902,0.829857,"LogisticRegression(C=0.5, class_weight=None, d...",0.4,20,0.5,l2,True
7,0.766363,0.710897,0.831218,"LogisticRegression(C=0.5, class_weight=None, d...",0.4,10,0.5,l2,True
8,0.766311,0.714314,0.826471,"LogisticRegression(C=0.1, class_weight=None, d...",0.2,5,0.1,l2,True
9,0.766266,0.7115,0.830167,"LogisticRegression(C=0.5, class_weight=None, d...",0.3,15,0.5,l2,True


# Compare Top Validation Metrics By Algorithm

### Coral Bleaching

In [32]:
cb_collections = [
    "CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN", 
    "CR_CB_STACKED_VD",
    "CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD"
]

In [33]:
rows = []
for coll in cb_collections:
    df = get_df_sorted_by_f1score(coll, "")
    dct = df.iloc[0].to_dict()
    dct["Algo"] = coll
    rows.append(dct)

df=pd.DataFrame(rows)
df.sort_values("micro_f1_score", ascending=False)

Unnamed: 0,Algo,micro_f1_score,micro_precision,micro_recall
2,CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARA...,0.720357,0.777564,0.670991
1,CR_CB_STACKED_VD,0.694611,0.738024,0.656021
0,CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN,0.679792,0.665287,0.694943


### Skin Cancer

In [34]:
rows = []
for coll in cb_collections: 
    coll = coll.replace("CB", "SC")
    df = get_df_sorted_by_f1score(coll, "")
    dct = df.iloc[0].to_dict()
    dct["Algo"] = coll
    rows.append(dct)

df=pd.DataFrame(rows)
df.sort_values("micro_f1_score", ascending=False)

Unnamed: 0,Algo,micro_f1_score,micro_precision,micro_recall
0,CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN,0.768559,0.776524,0.760756
2,CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARA...,0.766909,0.833609,0.710092
1,CR_SC_STACKED_VD,0.763158,0.823174,0.711299


## Test Data

In [45]:
# Parser model not present yet
test_collections = ["TEST_" + c.replace("HYPER_PARAM_","") for c in cb_collections]
cb_collections

['CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN',
 'CR_CB_STACKED_VD',
 'CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD']

In [46]:
# Still need this for RNN and the parser model
rows = []
for coll in test_collections:

    df = get_df_sorted_by_f1score(coll, "")
    dct = df.iloc[0].to_dict()
    dct["Algo"] = coll
    rows.append(dct)

df=pd.DataFrame(rows)
df.sort_values("micro_f1_score", ascending=False)

Unnamed: 0,Algo,micro_f1_score,micro_precision,micro_recall
2,TEST_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_VD,0.727744,0.75448,0.702838
1,TEST_CR_CB_STACKED_VD,0.703833,0.735883,0.674457
0,TEST_CR_CB_TAGGING_VD_MOST_COMMON_TAG_RNN,0.67642,0.698046,0.656093


In [48]:
# Still need this for RNN and the parser model
rows = []
for coll in test_collections:
    
    coll = coll.replace("CB", "SC")
    df = get_df_sorted_by_f1score(coll, "")
    dct = df.iloc[0].to_dict()
    dct["Algo"] = coll
    rows.append(dct)

df=pd.DataFrame(rows)
df.sort_values("micro_f1_score", ascending=False)

Unnamed: 0,Algo,micro_f1_score,micro_precision,micro_recall
0,TEST_CR_SC_TAGGING_VD_MOST_COMMON_TAG_RNN,0.791833,0.785903,0.797853
2,TEST_CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_VD,0.79033,0.822846,0.760286
1,TEST_CR_SC_STACKED_VD,0.764622,0.816244,0.719141
