# Primer: API implementation
This notebook reconstructs the `Translator` showcased in the [Translation primer](../../../translation-primer.rst) using the API.

In [1]:
import sys
import rics
import id_translation

# Print relevant versions
print(f"{rics.__version__=}")
print(f"{id_translation.__version__=}")
print(f"{sys.version=}")
!git log --pretty=oneline --abbrev-commit -1

rics.__version__='0.17.0.dev1'
id_translation.__version__='0.1.0.dev0'
sys.version='3.10.6 (main, Nov  2 2022, 18:53:38) [GCC 11.3.0]'
[33m538ca56[m[33m ([m[1;36mHEAD -> [m[1;32mtmp[m[33m, [m[1;31morigin/tmp[m[33m, [m[1;32mmain[m[33m)[m split


In [2]:
from rics.utility import configure_stuff

configure_stuff(rics_level="DEBUG")



## Translatable data

In [3]:
from pandas import read_csv

bite_report = read_csv("biting-victims-2019-05-11.csv")
bite_report

Unnamed: 0,human_id,bitten_by
0,1904,1
1,1991,0
2,1991,2
3,1999,0


## Name-to-source mapping

In [4]:
from rics.mapping import HeuristicScore, Mapper

score_function = HeuristicScore("equality", heuristics=["like_database_table"])
mapper = Mapper(score_function, overrides={"bitten_by": "animals"})

## Translation format

In [5]:
translation_format = "[{title}. ]{name} (id={id})[ the {species}]"

## Placeholder mapping
Define heuristic score function.

In [6]:
from id_translation.fetching import PandasFetcher


def smurf_column_heuristic(value, candidates, context):
    """Heuristic for matching columns that use the "smurf" convention.

    The value is the desired placeholder, the context is the name of the source
    for which placeholder mapping is being performed.
    """
    return (
        f"{context[:-1]}_{value}" if context[-1] == "s" else f"{context}_{value}",
        candidates,
    )


smurf_score = HeuristicScore("equality", heuristics=[smurf_column_heuristic])

## Create fetcher

In [7]:
fetcher = PandasFetcher(
    read_csv, read_path_format="./sources/{}.csv", mapper=Mapper(smurf_score)
)

## Moment of truth

In [8]:
from id_translation import Translator

translator = Translator(fetcher, fmt=translation_format, mapper=mapper)
translated_bite_report = translator.translate(bite_report)
translated_bite_report

2022-11-26T16:07:38.258 [rics.mapping.Mapper:DEBUG] Begin computing match scores for values=('human_id', 'bitten_by') to candidates=('animals', 'humans') using HeuristicScore([like_database_table()] -> equality).
2022-11-26T16:07:38.259 [rics.mapping.Mapper:DEBUG] Computed 2x2 match scores in 0.00123532 sec:
candidates  animals  humans
values                     
human_id          0       1
bitten_by       inf    -inf
2022-11-26T16:07:38.264 [rics.mapping.Mapper.accept:DEBUG] Accepted: 'bitten_by' -> 'animals'; score=inf (short-circuit or override).
2022-11-26T16:07:38.265 [rics.mapping.Mapper.accept.details:DEBUG] This match supersedes 1 other matches:
    'bitten_by' -> 'humans'; score=-inf (superseded by short-circuit or override).
2022-11-26T16:07:38.265 [rics.mapping.Mapper.accept:DEBUG] Accepted: 'human_id' -> 'humans'; score=1.000 >= 1.0.
2022-11-26T16:07:38.266 [rics.mapping.Mapper.accept.details:DEBUG] This match supersedes 1 other matches:
    'human_id' -> 'animals'; score=0

Unnamed: 0,human_id,bitten_by
0,Mr. Fred (id=1904),Morris (id=1) the dog
1,Mr. Richard (id=1991),Tarzan (id=0) the cat
2,Mr. Richard (id=1991),Simba (id=2) the lion
3,Dr. Sofia (id=1999),Tarzan (id=0) the cat


In [9]:
assert translated_bite_report.equals(
    read_csv("biting-victims-2019-05-11-translated.csv")
)