# CRF fine-grained experiments analysis

In [5]:
from collections import defaultdict
from pprint import pprint
import os

from pymongo import MongoClient
import pandas as pd

In [3]:
client = MongoClient(os.environ['SACRED_MONGO_URL'])
db = client[os.environ['SACRED_DB_NAME']]

## Best configuration

In [13]:
def get_data(run_criteria):
    data = defaultdict(list)
    for run in db.runs.find(run_criteria):
        data['run_id'].append(run['_id'])
        for conf in 'c2 min_freq use_prefix use_suffix use_wordshape window'.split():
            data[conf].append(run['config'][conf])
        for which in ('train', 'dev'):
            metric_name = f'final_f1({which})'
            metric = db.metrics.find_one({'run_id': run['_id'], 'name': metric_name})
            if metric is not None:
                if len(metric['values']) != 1:
                    print(f"run {run['_id']} metric {metric_name} has length != 1, taking the last one")
                data[metric_name].append(metric['values'][-1])
    return data

### Fold 1

In [4]:
db.runs.count({'experiment.name': 'id-pos-tagging-crf-fine-fold1', 'status': 'COMPLETED'})

50

In [6]:
run_criteria = {'experiment.name': 'id-pos-tagging-crf-fine-fold1', 'status': 'COMPLETED'}

In [14]:
df = pd.DataFrame(get_data(run_criteria))

In [15]:
df.head()

Unnamed: 0,c2,final_f1(dev),final_f1(train),min_freq,run_id,use_prefix,use_suffix,use_wordshape,window
0,0.013532,0.940546,0.993455,1,23,False,False,True,4
1,0.017698,0.959884,0.986087,4,24,True,True,False,2
2,0.95426,0.958111,0.980786,1,25,True,True,True,2
3,0.069159,0.954609,0.996956,2,26,True,False,False,4
4,0.019186,0.945468,0.994915,3,27,False,True,False,5


In [16]:
df['final_f1(dev)'].idxmax()

18

In [17]:
df.iloc[df['final_f1(dev)'].idxmax()]

c2                 0.0223126
final_f1(dev)       0.960338
final_f1(train)     0.989564
min_freq                   1
run_id                    41
use_prefix              True
use_suffix             False
use_wordshape          False
window                     1
Name: 18, dtype: object

### Fold 2

In [18]:
run_criteria = {'experiment.name': 'id-pos-tagging-crf-fine-fold2', 'status': 'COMPLETED'}

In [19]:
df = pd.DataFrame(get_data(run_criteria))

In [20]:
df.head()

Unnamed: 0,c2,final_f1(dev),final_f1(train),min_freq,run_id,use_prefix,use_suffix,use_wordshape,window
0,0.010775,0.953605,0.962854,3,73,True,False,True,0
1,0.505165,0.941358,0.949741,5,74,False,True,False,0
2,0.361862,0.957621,0.996979,1,75,True,True,False,4
3,0.001021,0.950308,0.996845,4,76,True,True,False,5
4,0.819127,0.957711,0.976038,3,77,True,True,False,2


In [21]:
df.iloc[df['final_f1(dev)'].idxmax()]

c2                 0.169665
final_f1(dev)      0.964613
final_f1(train)    0.988525
min_freq                  1
run_id                   89
use_prefix             True
use_suffix             True
use_wordshape         False
window                    1
Name: 16, dtype: object

### Fold 3

In [22]:
run_criteria = {'experiment.name': 'id-pos-tagging-crf-fine-fold3', 'status': 'COMPLETED'}
df = pd.DataFrame(get_data(run_criteria))
df.iloc[df['final_f1(dev)'].idxmax()]

c2                 0.0151549
final_f1(dev)       0.959867
final_f1(train)     0.999276
min_freq                   1
run_id                   150
use_prefix              True
use_suffix              True
use_wordshape          False
window                     3
Name: 27, dtype: object

### Fold 4

In [23]:
run_criteria = {'experiment.name': 'id-pos-tagging-crf-fine-fold4', 'status': 'COMPLETED'}
df = pd.DataFrame(get_data(run_criteria))
df.iloc[df['final_f1(dev)'].idxmax()]

c2                 0.189562
final_f1(dev)      0.963507
final_f1(train)    0.991749
min_freq                  1
run_id                  205
use_prefix             True
use_suffix             True
use_wordshape          True
window                    2
Name: 32, dtype: object

### Fold 5

In [24]:
run_criteria = {'experiment.name': 'id-pos-tagging-crf-fine-fold5', 'status': 'COMPLETED'}
df = pd.DataFrame(get_data(run_criteria))
df.iloc[df['final_f1(dev)'].idxmax()]

c2                 0.00762765
final_f1(dev)        0.963838
final_f1(train)      0.986588
min_freq                    2
run_id                    234
use_prefix               True
use_suffix               True
use_wordshape           False
window                      1
Name: 11, dtype: object