In [None]:
!pip install reconner

# Introduction

This notebook walks through some of the basic use cases of Recon.

In [1]:
from pprint import pprint
from recon import Dataset, get_ner_stats

## Loading Data

In [2]:
ds = Dataset("dev", verbose=True).from_disk('./data/skills')
print(ds)

<recon.dataset.Dataset object at 0x12c2b20b0>


In [6]:
print(get_ner_stats(ds.data))

{
    "n_examples": 110,
    "n_examples_no_entities": 49,
    "n_annotations": 180,
    "n_annotations_per_type": {
        "SKILL": 159,
        "PRODUCT": 20,
        "JOB_ROLE": 1
    },
    "examples_with_type": null
}


## Applying Dataset Operations using `Dataset.apply`

If we run `get_ner_stats` on the data in our Dataset, we see the same stats that are printed above. The `Dataset` `__str__` runs the `get_ner_stats` function internally.

In [3]:
print(ds.apply(get_ner_stats, serialize=True))

{
    "n_examples": 110,
    "n_examples_no_entities": 49,
    "n_annotations": 180,
    "n_annotations_per_type": {
        "SKILL": 159,
        "PRODUCT": 20,
        "JOB_ROLE": 1
    },
    "examples_with_type": null
}


In [13]:
print(ds.apply(get_ner_stats).json(indent=4))

{
    "n_examples": 110,
    "n_examples_no_entities": 49,
    "n_annotations": 180,
    "n_annotations_per_type": {
        "SKILL": 159,
        "PRODUCT": 20,
        "JOB_ROLE": 1
    },
    "examples_with_type": null
}


## Make in-place Dataset Corrections using `Dataset.apply_`

In [None]:
ds.apply_("recon.v1.upcase_labels")

In [None]:
ds.apply_("recon.v1.fix_tokenization_and_spacing")

## Chain Dataset operations together and run sequentially

In [None]:
ds.pipe_(["recon.v1.upcase_labels", "recon.v1.fix_tokenization_and_spacing"])

In [None]:
print(ds)