In [None]:
from io import StringIO
from pathlib import Path

from dapla_pseudo import PseudoData
import pandas as pd

JSON_FILE = "data/personer.json"
CSV_FILE = "data/personer.csv"

df = pd.read_json(
    JSON_FILE,
    dtype={
        "fnr": "string",
        "fornavn": "string",
        "etternavn": "string",
        "kjonn": "category",
        "fodselsdato": "string",
    },
)
df.head()

# Case: Single field default encryption (daead)

In [None]:
result = (
    PseudoData.from_pandas(df).on_field("fnr").pseudonymize()
)
result.to_polars().head()

# Case: Single field sid mapping

In [None]:
result = (
    PseudoData.from_pandas(df)
    .on_field("fnr")
    .map_to_stable_id()
    .pseudonymize()
)
result.to_polars().head()

# Case: Single field FPE (used for e.g. existing stable ID/snr/sid)

In [None]:
result = (
    PseudoData.from_pandas(df)
    .on_field("fnr")
    .pseudonymize(preserve_formatting=True)
)
result.to_polars().head()

# Case: Multiple fields default encryption (daead)

In [None]:
result = (
    PseudoData.from_pandas(df)
    .on_fields("fornavn", "etternavn", "fodselsdato")
    .pseudonymize()
)
result.to_polars().head()

# Case: Chaining calls
Calls may simply be chained together to apply different pseudonymization to different fields.

In [None]:
result = (
    PseudoData.from_pandas(df)
    .on_field("fnr")
    .map_to_stable_id()
    .pseudonymize()
)
result = (
    PseudoData.from_pandas(result.to_polars())
    .on_fields("fornavn", "etternavn", "fodselsdato")
    .pseudonymize()
)
result.to_polars().head()

# Case: Single field SID mapping with specific SID version
The versions are deduced by supplying a timestamp

In [None]:
result = (
    PseudoData.from_pandas(df)
    .on_field("fnr")
    .map_to_stable_id(sid_snapshot_date="2023-07-01")
    .pseudonymize()
)
result.to_polars().head()

In [None]:
# With date type
from datetime import date
result = (
    PseudoData.from_pandas(df)
    .on_field("fnr")
    .map_to_stable_id(sid_snapshot_date=date.today())
    .pseudonymize()
)
result.to_polars().head()

# Case: Validate field SID mapping
Validate that all values in a column have valid SIDs

In [None]:
from dapla_pseudo import Validator

result = (
    Validator.from_pandas(df)
    .on_field("fnr")
    .validate_map_to_stable_id()
)
result.to_polars().head()