In [None]:
from demo_utils import load_users_data, clean_up, fancy_dataframe, pretty_print, Tracker

# Synth: the fastest way to get synthetic data to your engineers

In [None]:
users_data = await load_users_data()

In [None]:
fancy_dataframe(users_data).head()

## Synthesize your data in two steps

### Step 1: Upload the raw sensitive data to the persistent process

- This is done by someone in the organization who is highly credentialed, but here is the key: it is done only **once**!
- This lets `Synth` do two things:
  1. Scan the data and automagically flag bits and pieces as sensitive: this makes sure the synthetic data is anonymized.
  2. Build an ML-driven model of the data: this makes sure the synthetic data is highly representative of the original.

In [None]:
from synthpy import Synth
synth = Synth()

In [None]:
with Tracker("Synth is working..."):
    await synth.ingest.put_documents(
        namespace="dev", 
        collection="users", 
        batch=users_data
    )

### Step 2: Collect as many synthetic documents as you need, whenever you need it
- This is done by any engineer in my team. Because the data is anonymized, it is safe to move around.
- `Synth` is built **API-first**: it can be integrated in any testing suite or environment in a jiffy.

In [None]:
synthesized_data = await synth.generate.get_documents(
    namespace="dev", 
    collection="users",
    size=10
)
fancy_dataframe(synthesized_data).head()

# How did it do that??

In [None]:
schema = await synth.namespace.get_schema(
    namespace="dev", 
    collection="users"
)
pretty_print(schema)