In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
INDOX_API_KEY = os.environ['INDOX_API_KEY']
NVIDIA_API_KEY = os.environ['NVIDIA_API_KEY']

In [2]:
columns = ["name", "age", "occupation"]
example_data = [
    {"name": "Alice Johnson", "age": 35, "occupation": "Manager"},
    {"name": "Bob Williams", "age": 42, "occupation": "Accountant"}
]

In [3]:
columns_medical = ["Patient ID","Patient Name","Diagnosis Code","Procedure Code","Total Charge","Insurance Claim Amount"]
examples_medical = [
    {
        "example": """Patient ID: 123456, Patient Name: John Doe, Diagnosis Code: 
        J20.9, Procedure Code: 99203, Total Charge: $500, Insurance Claim Amount: $350"""
    },
    {
        "example": """Patient ID: 789012, Patient Name: Johnson Smith, Diagnosis 
        Code: M54.5, Procedure Code: 99213, Total Charge: $150, Insurance Claim Amount: $120"""
    },
    {
        "example": """Patient ID: 345678, Patient Name: Emily Stone, Diagnosis Code: 
        E11.9, Procedure Code: 99214, Total Charge: $300, Insurance Claim Amount: $250"""
    },
]

In [4]:
from indoxGen.llms import OpenAi, IndoxApi

indox = IndoxApi(api_key=INDOX_API_KEY)

nemotron = OpenAi(api_key=NVIDIA_API_KEY, model="nvidia/nemotron-4-340b-instruct",
                  base_url="https://integrate.api.nvidia.com/v1")

In [5]:
feedback_range = (0.3, 0.8)

In [6]:
from indoxGen.synthCore import SyntheticDataGeneratorHF
generator = SyntheticDataGeneratorHF(
    generator_llm=nemotron,
    judge_llm=indox,
    columns=columns,
    example_data=example_data,
    user_instruction="Generate realistic data including name, age and occupation. Ensure a mix of common and rare procedures, varying race, and appropriate date ranges for age.",
    verbose=1,
    diversity_threshold=0.4,
    feedback_range = feedback_range
)

In [7]:
# Generate data
generated_data = generator.generate_data(num_samples=8)

Generated data point: {'name': 'Dr. Maya Patel', 'age': 37, 'occupation': 'Neurosurgeon'}
Generated data point: {'name': 'Capt. Jamal Al-Rashid', 'age': 39, 'occupation': 'Aircraft Pilot'}
Generated data point: {'name': 'Rev. Prof. Esperanza Mendoza-Chavez', 'age': 36, 'occupation': 'Theologian and University Professor'}
Generated data point: {'name': 'Sgt. Maj. Kenji Tanaka', 'age': 40, 'occupation': 'Military Musician and Composer'}
Generated data point: {'name': 'Dr. Indira Patel-Singh, MD, PhD', 'age': 37, 'occupation': 'Neurosurgeon and Medical Researcher'}
Generated data point: {'name': 'Chief Mate Thandiwe Mokoena', 'age': 39, 'occupation': "Marine Engineer and Ship's Officer"}
Generated data point: {'name': 'Rev. Fr. José María González, S.J.', 'age': 36, 'occupation': 'Jesuit Priest, Astrophysicist and University Professor'}


In [8]:
generated_data

Unnamed: 0,name,age,occupation
0,Dr. Maya Patel,37,Neurosurgeon
1,Capt. Jamal Al-Rashid,39,Aircraft Pilot
2,Rev. Prof. Esperanza Mendoza-Chavez,36,Theologian and University Professor
3,Sgt. Maj. Kenji Tanaka,40,Military Musician and Composer
4,"Dr. Indira Patel-Singh, MD, PhD",37,Neurosurgeon and Medical Researcher
5,Chief Mate Thandiwe Mokoena,39,Marine Engineer and Ship's Officer
6,"Rev. Fr. José María González, S.J.",36,"Jesuit Priest, Astrophysicist and University P..."


In [9]:
generator.pending_review

[{'name': 'Dr. Maya Patel', 'age': 37, 'occupation': 'Neurosurgeon'}]

In [14]:
generator.user_review_and_regenerate(regenerate_rows=[0],accepted_rows=[],regeneration_feedback= 'change name to another name , also change occupation to another occupation',min_score=0.7)

{'name': 'Dr. Amelia Wong, DVM, DACVIM', 'age': '39', 'occupation': 'Veterinary Specialist in Neurology and Neurosurgery, and Animal Welfare Advocate'}


Unnamed: 0,name,age,occupation
0,Dr. Maya Patel,37,Neurosurgeon
1,Capt. Jamal Al-Rashid,39,Aircraft Pilot
2,Rev. Prof. Esperanza Mendoza-Chavez,36,Theologian and University Professor
3,Sgt. Maj. Kenji Tanaka,40,Military Musician and Composer
4,"Dr. Indira Patel-Singh, MD, PhD",37,Neurosurgeon and Medical Researcher
5,Chief Mate Thandiwe Mokoena,39,Marine Engineer and Ship's Officer
6,"Rev. Fr. José María González, S.J.",36,"Jesuit Priest, Astrophysicist and University P..."
7,"Dr. Amelia Wong, DVM, DACVIM",39,Veterinary Specialist in Neurology and Neurosu...
