## Get Best-Performing Hyper Parameters

In [1]:
!which python

/Users/simon.hughes/anaconda3/envs/phd_py36/bin/python


In [2]:
!pip freeze

appnope==0.1.0
attrs==19.1.0
backcall==0.1.0
bleach==3.1.0
boto==2.47.0
boto3==1.9.18
botocore==1.12.18
bz2file==0.98
certifi==2016.2.28
costcla==0.5
cycler==0.10.0
cymem==1.31.2
cytoolz==0.8.2
decorator==4.4.0
defusedxml==0.5.0
dill==0.2.8.2
docutils==0.14
entrypoints==0.3
ftfy==4.4.3
gensim==0.13.4
h5py==2.7.0
html5lib==0.999
ipykernel==5.1.1
ipython==7.6.1
ipython-genutils==0.2.0
ipywidgets==7.4.2
jedi==0.14.0
Jinja2==2.10.1
jmespath==0.9.3
joblib==0.9.4
json5==0.8.4
jsonschema==3.0.1
jupyter==1.0.0
jupyter-client==5.2.4
jupyter-console==6.0.0
jupyter-core==4.4.0
jupyterlab==1.0.1
jupyterlab-server==1.0.0
MarkupSafe==1.1.1
matplotlib==2.0.0
mistune==0.8.4
murmurhash==0.26.4
nbconvert==5.5.0
nbformat==4.4.0
nltk==3.2.2
nose==1.3.7
notebook==5.7.8
numpy==1.15.2
pandas==0.19.2
pandocfilters==1.4.2
parso==0.5.0
pathlib==1.0.1
pexpect==4.7.0
pickleshare==0.7.5
plac==0.9.6
preshed==1.0.1
prometheus-client==0.7.1
prompt-toolkit==2.0.9
ptyprocess==0.6.0
pyea==0.2
Pygments==2.4.2
pymongo==3.

In [3]:
import pandas as pd
import pymongo

In [4]:
client = pymongo.MongoClient(serverSelectionTimeoutMS=100, host="127.0.0.1")
db = client.metrics_causal_model_reranker

In [5]:
import datetime

def fmt_dt(date_time_str):
    dt= datetime.datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
    return dt.strftime("%m-%d-%Y %H:%M")

def query_params_for(db, collection):
#     print("DB:" + db.name)
    project = {
            "params": "$parameters",
            "micro_f1": "$MICRO_F1",
            "asof": "$asof",
            "_id": 1
        }
    feats_pipeline = [{ "$project": project }]
    return [row for row in db[collection].aggregate(feats_pipeline)]    

def query_collection(collection):
    rows = query_params_for(db=client.metrics_causal_model_reranker, collection=collection)
 
    if "SENT_" in collection or len(rows) == 0:
        db = client.metrics_causal_model_parser        
        rows = query_params_for(db=client.metrics_causal_model_parser, collection=collection)
        
    if len(rows) == 0:
        return pd.DataFrame([])

    results = []
    for r in rows:
        d = dict(r["params"])
        d.update(r["micro_f1"])
        d["asof"] = str(r["asof"])
        results.append(d)
    
    df = pd.DataFrame(results)
    df["extractors"] = df["extractors"].apply(lambda l: ",".join(l))
#     df["asof"] = df["asof"].apply(fmt_dt)
    df = df.sort_values(by="f1_score",ascending=False)
    return df

In [6]:
def get_df(collection, training_only=False, cols=None):
    if not training_only:
        # Safety check for default params
        assert "_VD" in collection
    
    if cols is None:
        cols = ["best_top_n", "C", "best_max_parses", 
            #"best_max_upd", 
            "max_update_items", 
            # "best_min_prob", "extractors", 
            "initial_weight", "loss_type",\
            #"min_feat_freq",
            "pa_type", "early_stopping_iters",]    

        if "SENT" in collection:
            cols = []
        elif "PCPTRN" in collection:
            cols = ["best_top_n", "learning_rate", "best_max_parses", 
            #"best_max_upd", 
            "max_update_items", 
            # "best_min_prob", "extractors", 
            "initial_weight","early_stopping_iters",]
            #"min_feat_freq"]
    
    # add common cols
    cols = ["f1_score", "precision", "recall", "asof"] + cols + ["extractors", "num_feats_MEAN"]
#     print("cols:" + str(cols))
    df = query_collection(collection)
    return df[cols]

In [7]:
col = "SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD_3"
get_df(col).head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,learning_rate,best_max_parses,max_update_items,initial_weight,early_stopping_iters,extractors,num_feats_MEAN
4,0.813806,0.850235,0.78037,2019-07-10 22:41:56.085000,5,0.05,600,1,0.01,2,"num_crels,Inv-,Prob-",50.8


In [8]:
# MONGO_COLLECTION = "SC_RE-RANKER_HYPER_PARAM_VD"
# MONGO_COLLECTION = "SC_COST_INSENS_RE-RANKER_HYPER_PARAM_VD"

## CB

### VD

In [9]:
"beam size range:",sorted(get_df("CB_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD").best_top_n.unique())

('beam size range:', [1, 2, 3, 5, 7, 10])

In [10]:
"learning rate range:",sorted(get_df("CB_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD").learning_rate.unique())

('learning rate range:', [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0])

In [11]:
df_pa = get_df("SENT_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_FIXED_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,extractors,num_feats_MEAN
0,0.74358,0.758542,0.729197,2019-05-03 14:18:47.905000,"single_words,between_word_features,label_set,t...",27479.6


In [12]:
# df_pa = query_collection("CB_STR_PCPTRN_RE-RANKER_FEATURE_SEL_VD")[["f1_score", "early_stopping_iters", "extractors"]]
# df_pa.head(1)

In [13]:
df_pa = get_df("CB_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,learning_rate,best_max_parses,max_update_items,initial_weight,early_stopping_iters,extractors,num_feats_MEAN
29,0.741353,0.782873,0.704015,2019-06-24 20:43:02.777000,1,0.1,300,1,0.01,2,"Prob-,Above-",33.0


In [14]:
# query_collection("CB_PA_RE-RANKER_HYPER_PARAM_VD")

In [15]:
df_pa = get_df("CB_PA_RE-RANKER_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
8,0.743145,0.760015,0.727007,2019-06-30 15:00:24.686000,2,0.0005,300,1,0.01,None - cost insens,1,1,CREL_,1371.8


In [16]:
df_pa = get_df("CB_RE-RANKER_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
37,0.746014,0.777294,0.717153,2019-06-18 01:55:37.752000,2,0.0025,300,1,0.01,ml,1,3,"CREL_,Prob-,CChainStats-",1410.6


### Test

In [17]:
df_pa = get_df("SENT_TEST_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_FIXED_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,extractors,num_feats_MEAN
0,0.737027,0.710169,0.765996,2019-05-03 14:19:17.204000,"single_words,between_word_features,label_set,t...",30367.0


In [18]:
df_pa = get_df("TEST_CB_STR_PCPTRN_RE-RANKER_VD")
df_pa.head(10) # Improved precision over recall. Top n was restricted to 1, so only capable of upping pr

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,learning_rate,best_max_parses,max_update_items,initial_weight,early_stopping_iters,extractors,num_feats_MEAN
0,0.749771,0.751838,0.747715,2019-07-05 14:21:05.573000,1,0.1,300,1,0.01,2,"Prob-,Above-",33.0


In [19]:
df_pa = get_df("TEST_CB_PA_RE-RANKER_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
0,0.740741,0.715503,0.767824,2019-07-05 14:40:40.998000,2,0.0005,300,1,0.01,None - cost insens,1,1,CREL_,1504.0


In [20]:
# To improve upon this, you can use early stopping
df_pa = get_df("TEST_CB_RE-RANKER_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
0,0.741208,0.731317,0.751371,2019-07-05 14:51:25.146000,2,0.0025,300,1,0.01,ml,1,3,"CREL_,Prob-,CChainStats-",1543.0


## SC

### VD

In [21]:
"beam size range:",sorted(get_df("SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD_3").best_top_n.unique())

('beam size range:', [1, 2, 3, 5, 7, 10])

In [22]:
"learning rate range:",sorted(get_df("SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD_3").learning_rate.unique())

('learning rate range:', [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0])

In [23]:
df_pa = get_df("SENT_CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,extractors,num_feats_MEAN
0,0.80962,0.860857,0.764139,2019-05-03 14:27:42.611000,"three_words,between_word_features,size_feature...",26260.0


In [24]:
cols = ["f1_score", "extractors", "early_stopping_iters"]
query_collection("SC_STR_PCPTRN_RE-RANKER_FEATURE_SEL_VD").head(1)[cols]

Unnamed: 0,f1_score,extractors,early_stopping_iters
30,0.811784,"Above-,Inv-,num_crels,CChainStats-",1


In [25]:
df_pa = get_df("SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD_3")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,learning_rate,best_max_parses,max_update_items,initial_weight,early_stopping_iters,extractors,num_feats_MEAN
4,0.813806,0.850235,0.78037,2019-07-10 22:41:56.085000,5,0.05,600,1,0.01,2,"num_crels,Inv-,Prob-",50.8


In [26]:
df_pa = get_df("SC_PA_RE-RANKER_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
4,0.806789,0.868206,0.753487,2019-06-30 15:13:25.720000,1,0.01,300,1,0.01,None - cost insens,1,6,"num_crels,Inv-",37.0


In [27]:
df_pa = get_df("SC_RE-RANKER_HYPER_PARAM_VD")
df_pa.head(1)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
41,0.808089,0.856129,0.765153,2019-06-18 10:15:07.357000,3,0.01,300,1,0.01,ml,1,1,"CREL_,CChain-,Prob-",1386.4


### Test

In [28]:
df_pa = get_df("SENT_TEST_CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,extractors,num_feats_MEAN
0,0.827273,0.856471,0.8,2019-05-03 14:28:38.825000,"three_words,between_word_features,size_feature...",28839.0


In [29]:
df_pa = get_df("TEST_SC_STR_PCPTRN_RE-RANKER_VD_3")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,learning_rate,best_max_parses,max_update_items,initial_weight,early_stopping_iters,extractors,num_feats_MEAN
0,0.829323,0.844926,0.814286,2019-07-11 07:12:46.645000,5,0.05,600,1,0.01,2,"num_crels,Inv-,Prob-",51.0


In [30]:
df_pa = get_df("TEST_SC_PA_RE-RANKER_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
0,0.828457,0.866747,0.793407,2019-07-05 14:45:29.204000,1,0.01,300,1,0.01,None - cost insens,1,6,"num_crels,Inv-",37.0


In [31]:
df_pa = get_df("TEST_SC_RE-RANKER_VD")
df_pa.head(10)

Unnamed: 0,f1_score,precision,recall,asof,best_top_n,C,best_max_parses,max_update_items,initial_weight,loss_type,pa_type,early_stopping_iters,extractors,num_feats_MEAN
0,0.826111,0.846597,0.806593,2019-07-05 14:54:53.566000,3,0.01,300,1,0.01,ml,1,1,"CREL_,CChain-,Prob-",1488.0


## Feature Selection

In [32]:
df_pa = query_collection("CB_STR_PCPTRN_RE-RANKER_FEATURE_SEL_VD")[["f1_score","learning_rate","max_update_items", "extractors"]]
df_pa.head(1)

Unnamed: 0,f1_score,learning_rate,max_update_items,extractors
15,0.740483,0.3,2,"Prob-,Above-"


In [33]:
df_pa = query_collection("CB_STR_PCPTRN_RE-RANKER_FEATURE_SEL_VD")[["f1_score","learning_rate","max_update_items", "extractors"]]
df_pa.head(1)

Unnamed: 0,f1_score,learning_rate,max_update_items,extractors
15,0.740483,0.3,2,"Prob-,Above-"


In [34]:
df_pa = query_collection("SC_STR_PCPTRN_RE-RANKER_FEATURE_SEL_VD")[["f1_score","learning_rate","max_update_items"]]
df_pa.head(1)

Unnamed: 0,f1_score,learning_rate,max_update_items
30,0.811784,0.3,2


## Stacked by Dataset (for Results Section)

### CB

In [35]:
import numpy
df_train = get_df("CB_STR_PCPTRN_RE-RANKER_HYPER_PARAM_TD", training_only=True).head(1)
df_train["Dataset"] = "Train"
df_vd = get_df("CB_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD").head(1)
df_vd["Dataset"] = "Valid"
df_test = get_df("TEST_CB_STR_PCPTRN_RE-RANKER_VD")
df_test["Dataset"] = "Test"

df_concat = pd.concat([df_train, df_vd, df_test])[["f1_score","recall", "precision", "Dataset", "best_top_n", "learning_rate"]]
df_concat.round(3)

Unnamed: 0,f1_score,recall,precision,Dataset,best_top_n,learning_rate
29,0.743,0.704,0.786,Train,1,0.1
29,0.741,0.704,0.783,Valid,1,0.1
0,0.75,0.748,0.752,Test,1,0.1


In [36]:
# Check rounding
# df_concat.round(5)

In [37]:
df_train = get_df("SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_TD_3", training_only=True).head(1)
df_train["Dataset"] = "Train"
df_vd = get_df("SC_STR_PCPTRN_RE-RANKER_HYPER_PARAM_VD_3").head(1)
df_vd["Dataset"] = "Valid"
df_test = get_df("TEST_SC_STR_PCPTRN_RE-RANKER_VD_3")
df_test["Dataset"] = "Test"

df_concat = pd.concat([df_train, df_vd, df_test])[["f1_score","recall", "precision", "Dataset", "best_top_n", "learning_rate"]]
df_concat.round(3)

Unnamed: 0,f1_score,recall,precision,Dataset,best_top_n,learning_rate
6,0.812,0.779,0.849,Train,5,0.5
4,0.814,0.78,0.85,Valid,5,0.05
0,0.829,0.814,0.845,Test,5,0.05


## Test Summary Results

## CB

In [50]:
frp = ["f1_score", "recall", "precision"]

df_sentp = get_df("SENT_TEST_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_FIXED_VD")[frp].copy(deep=True)
df_sentp["Model"] = "Sent. Parser"

df_essayp = get_df("TEST_CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM2_VD", cols=[])[frp].copy(deep=True)
df_essayp["Model"] = "Essay Parser"

df_reranker = get_df("TEST_CB_STR_PCPTRN_RE-RANKER_VD", cols=[])[frp].copy(deep=True)
df_reranker["Model"] = "Re-Ranker"

df_concat_sum = pd.concat([df_sentp, df_essayp, df_reranker])
df_concat_sum.round(3)

Unnamed: 0,f1_score,recall,precision,Model
0,0.737,0.766,0.71,Sent. Parser
0,0.74,0.729,0.75,Essay Parser
0,0.75,0.748,0.752,Re-Ranker


### SC

In [55]:
frp = ["f1_score", "recall", "precision"]

df_sentp = get_df("SENT_TEST_CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM_VD")[frp].copy(deep=True)
df_sentp["Model"] = "Sent. Parser"

df_essayp = get_df("TEST_CR_SC_SHIFT_REDUCE_PARSER_TEMPLATED_HYPER_PARAM2_VD", cols=[])[frp].copy(deep=True)
df_essayp["Model"] = "Essay Parser"

df_reranker = get_df("TEST_SC_STR_PCPTRN_RE-RANKER_VD_3", cols=[])[frp].copy(deep=True)
df_reranker["Model"] = "Re-Ranker"

df_concat_sum = pd.concat([df_sentp, df_essayp, df_reranker])
df_concat_sum.round(3)

Unnamed: 0,f1_score,recall,precision,Model
0,0.827,0.8,0.856,Sent. Parser
0,0.821,0.813,0.829,Essay Parser
0,0.829,0.814,0.845,Re-Ranker
