# Primer: API implementation
This notebook reconstructs the `Translator` showcased in the [Translation primer](../../../translation-primer.rst) using the API.

In [1]:
import sys
import rics
import id_translation

# Print relevant versions
print(f"{rics.__version__=}")
print(f"{id_translation.__version__=}")
print(f"{sys.version=}")
rics.configure_stuff(id_translation_level="DEBUG")
!git log --pretty=oneline --abbrev-commit -1

rics.__version__='3.0.0'
id_translation.__version__='0.3.1.dev1'
sys.version='3.10.6 (main, Mar 10 2023, 10:55:28) [GCC 11.3.0]'
👻 Configured some stuff just the way I like it!
[33md2093a4[m[33m ([m[1;36mHEAD[m[33m, [m[1;31morigin/main[m[33m, [m[1;31morigin/HEAD[m[33m, [m[1;32mmain[m[33m)[m Update help link in TOML files


## Translatable data

In [2]:
from pandas import read_csv

bite_report = read_csv("biting-victims-2019-05-11.csv")
bite_report

Unnamed: 0,human_id,bitten_by
0,1904,1
1,1991,0
2,1991,2
3,1999,0


## Name-to-source mapping

In [3]:
from rics.mapping import HeuristicScore, Mapper

score_function = HeuristicScore("equality", heuristics=["like_database_table"])
mapper = Mapper(score_function, overrides={"bitten_by": "animals"})

## Translation format

In [4]:
translation_format = "[{title}. ]{name} (id={id})[ the {species}]"

## Placeholder mapping
Define heuristic score function.

In [5]:
from id_translation.fetching import PandasFetcher


def smurf_column_heuristic(value, candidates, context):
    """Heuristic for matching columns that use the "smurf" convention.

    The value is the desired placeholder, the context is the name of the source
    for which placeholder mapping is being performed.
    """
    return (
        f"{context[:-1]}_{value}" if context[-1] == "s" else f"{context}_{value}",
        candidates,
    )


smurf_score = HeuristicScore("equality", heuristics=[smurf_column_heuristic])

## Create fetcher

In [6]:
fetcher = PandasFetcher(
    read_csv, read_path_format="./sources/{}.csv", mapper=Mapper(smurf_score)
)

## Moment of truth

In [7]:
from id_translation import Translator

translator = Translator(fetcher, fmt=translation_format, mapper=mapper)
translated_bite_report = translator.translate(bite_report)
translated_bite_report

2023-03-25T11:23:14.013 [id_translation.fetching.pandas.discovery:DEBUG] Sources initialized: ['humans', 'animals']
2023-03-25T11:23:14.016 [id_translation.Translator:INFO] Begin translation of 'DataFrame' using sources=['humans', 'animals']. Names to translate: Will be derived based on 'DataFrame'.
2023-03-25T11:23:14.016 [id_translation.Translator:DEBUG] Begin name-to-source mapping of names=['human_id', 'bitten_by'] in DataFrame against sources=['humans', 'animals'].
2023-03-25T11:23:14.019 [id_translation.Translator:DEBUG] Finished name-to-source mapping of names=['human_id', 'bitten_by'] in DataFrame against sources=['humans', 'animals']: {'human_id': ('humans',), 'bitten_by': ('animals',)}.
2023-03-25T11:23:14.019 [id_translation.fetching:DEBUG] Begin wanted-to-actual placeholder mapping of placeholders={'name', 'title', 'species', 'id'} to actual placeholders={'id', 'title', 'name'} for source='humans'.
2023-03-25T11:23:14.021 [id_translation.fetching:DEBUG] Finished wanted-to-a

Unnamed: 0,human_id,bitten_by
0,Mr. Fred (id=1904),Morris (id=1) the dog
1,Mr. Richard (id=1991),Tarzan (id=0) the cat
2,Mr. Richard (id=1991),Simba (id=2) the lion
3,Dr. Sofia (id=1999),Tarzan (id=0) the cat


In [8]:
assert translated_bite_report.equals(
    read_csv("biting-victims-2019-05-11-translated.csv")
)