# ðŸ§  Misata - Getting Started

**Generate realistic multi-table datasets from natural language.**

This notebook shows you how to:
1. Generate synthetic data from a schema
2. Apply noise for ML training data
3. Customize attribute distributions

In [None]:
!pip install misata -q
print("âœ… Misata installed!")

## Step 1: Basic Data Generation

In [None]:
from misata import DataSimulator, SchemaConfig, Column, Table
import pandas as pd

config = SchemaConfig(
    name="Demo",
    seed=42,
    tables=[Table(name="customers", row_count=1000), Table(name="orders", row_count=5000)],
    columns={
        "customers": [
            Column(name="id", type="int", distribution_params={"min": 1, "max": 1000}, unique=True),
            Column(name="name", type="text", distribution_params={"text_type": "name"}),
            Column(name="age", type="int", distribution_params={"min": 18, "max": 70}),
        ],
        "orders": [
            Column(name="id", type="int", distribution_params={"min": 1, "max": 5000}, unique=True),
            Column(name="customer_id", type="foreign_key", distribution_params={}),
            Column(name="amount", type="float", distribution_params={"min": 10, "max": 500}),
        ]
    },
    relationships=[{"parent_table": "customers", "child_table": "orders", "parent_key": "id", "child_key": "customer_id"}],
    events=[]
)

sim = DataSimulator(config)
data = {}
for name, batch in sim.generate_all():
    data[name] = batch if name not in data else pd.concat([data[name], batch])

print(f"âœ… {len(data['customers'])} customers, {len(data['orders'])} orders")
data["customers"].head()

## Step 2: Add Noise for ML Training

In [None]:
from misata import add_noise

noisy = add_noise(data["orders"], null_rate=0.05, outlier_rate=0.02, duplicate_rate=0.03, seed=42)
print(f"Original: {len(data['orders'])} rows | With noise: {len(noisy)} rows, {noisy.isnull().sum().sum()} nulls")
noisy[noisy.isnull().any(axis=1)].head()

## Step 3: Custom Distributions

In [None]:
from misata import Customizer, ColumnOverride
import numpy as np
import matplotlib.pyplot as plt

c = Customizer(seed=42)
c.add_override("customers", ColumnOverride(name="age", generator=lambda n: np.random.normal(35, 12, n).clip(18, 80).astype(int)))
custom = c.apply(data["customers"].copy(), "customers")

fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax[0].hist(data["customers"]["age"], bins=20, color='blue'); ax[0].set_title("Original (Uniform)")
ax[1].hist(custom["age"], bins=20, color='green'); ax[1].set_title("Custom (Normal)")
plt.show()

---
**Next:** Install locally with `pip install misata` or try the CLI!

ðŸ“§ Enterprise support: rasinbinabdulla@gmail.com