### Initalise API

In [1]:
from datallm import DataLLM
import pandas as pd
import os

datallm = DataLLM(api_key=os.getenv('API_KEY'), base_url='https://data.mostly.ai')


### Enrich Data Set

In [2]:
df = pd.DataFrame({
    "age in years": [5, 10, 13, 19, 30, 40, 50, 60, 70, 80],
    "gender": ["m", "f", "m", "f", "m", "f", "m", "f", "m", "f"],
    "country code": ["AT", "DE", "FR", "IT", "ES", "PT", "GR", "UK", "SE", "FI"],
})

# enrich the DataFrame with a new column containing the official country name
df["country"] = datallm.enrich(df, prompt="official name of the country")

# enrich the DataFrame with first name and last name
df["first name"] = datallm.enrich(df, prompt="the first name of that person")
df["last name"] = datallm.enrich(df, prompt="the last name of that person")

# enrich the DataFrame with a categorical
df["age group"] = datallm.enrich(
    df, prompt="age group", categories=["kid", "teen", "adult", "elderly"]
)

# enrich with a boolean value and a integer value
df["speaks german"] = datallm.enrich(df, prompt="speaks german?", dtype="boolean")
df["body height"] = datallm.enrich(df, prompt="the body height in cm", dtype="integer")
print(df)

Output()

Output()

Output()

Output()

Output()

Output()

   age in years gender country code  \
0             5      m           AT   
1            10      f           DE   
2            13      m           FR   
3            19      f           IT   
4            30      m           ES   
5            40      f           PT   
6            50      m           GR   
7            60      f           UK   
8            70      m           SE   
9            80      f           FI   

                                             country    first name  last name  \
0                                            Austria         Marco    Schnell   
1                                            Germany          Nina      Röhrs   
2                                             France        Adrien   Lefebvre   
3                                              Italy     Gabriella  Finocchio   
4                                              Spain          Jose     Mendia   
5                                           Portugal         Carla  Filgueira   
6  

### Generate Data Set from scratch

In [3]:
df = datallm.mock(
    n=100,  # number of generated records 
    data_description="Generate a list of transactions including wash trades",
    columns={
        "traderId": {"prompt": "Id of person who made the trade"},
        "country": {"prompt": "the 2-letter code the place in where the wash trade was made"},
        "date_of_transaction": {"prompt": "the date of the transaction", "dtype": "date"},
        "amount": {"prompt": "the amount of the transaction", "dtype": "float"},
        "currency": {"prompt": "the currency of the transaction"},
        "is_wash_trade": {"prompt": "is this a wash trade?", "dtype": "boolean"}
    },
    temperature=0.7
)
print(df)



Output()

Output()

Output()

Output()

Output()

Output()

   traderId country date_of_transaction   amount currency is_wash_trade
0       138      BZ          2016-07-14      0.0      USD          True
1     T6430      JT          2021-01-18  1136.09      JPY         False
2      6241      NA          2011-09-19     5.07      USD          True
3    L04717      US          2022-12-19    247.0      USD         False
4   1111744      HK          2016-12-28  34830.0      USD          True
..      ...     ...                 ...      ...      ...           ...
95      986      SK          2016-10-18   671.09      EUR         False
96    64889      EC          2017-10-20     6.54      USD         False
97    U1539      LA          1990-10-04     24.8      USD         False
98  2112925      HK          2021-06-28     2.89      USD          True
99        0      US          2016-07-04    10.41      USD          True

[100 rows x 6 columns]
