In [None]:
import os
from dotenv import load_dotenv

load_dotenv('api.env')
INDOX_API_KEY = os.environ['INDOX_API_KEY']
NVIDIA_API_KEY = os.environ['NVIDIA_API_KEY']

In [None]:
NVIDIA_API_KEY

In [None]:
# from indoxGen.llms import OpenAi, IndoxApi

indox = IndoxApi(api_key=INDOX_API_KEY)
nemotron = OpenAi(api_key=NVIDIA_API_KEY, model="nvidia/nemotron-4-340b-instruct", base_url="https://integrate.api.nvidia.com/v1")


In [None]:
# Sample dataset as a list of dictionaries
sample_data = [
    {
        'age': 25,
        'income': 45.5,
        'years_of_experience': 3,
        'job_title': 'Junior Developer',
        'remarks': 'Looking to grow my career.'
    },
    {
        'age': 32,
        'income': 60.0,
        'years_of_experience': 7,
        'job_title': 'Developer',
        'remarks': 'Experienced professional.'
    },
    {
        'age': 45,
        'income': 80.2,
        'years_of_experience': 20,
        'job_title': 'Lead Developer',
        'remarks': 'Seasoned expert in the field.'
    },
    {
        'age': 28,
        'income': 50.1,
        'years_of_experience': 5,
        'job_title': 'Developer',
        'remarks': 'Eager to take on new challenges.'
    },
    {
        'age': 38,
        'income': 70.0,
        'years_of_experience': 15,
        'job_title': 'Senior Developer',
        'remarks': 'Dedicated to delivering quality software.'
    },
    {
        'age': 23,
        'income': 40.0,
        'years_of_experience': 2,
        'job_title': 'Junior Developer',
        'remarks': 'Passionate about coding and learning.'
    },
    {
        'age': 50,
        'income': 90.5,
        'years_of_experience': 25,
        'job_title': 'Lead Developer',
        'remarks': 'Expert in software architecture.'
    },
    {
        'age': 29,
        'income': 55.3,
        'years_of_experience': 6,
        'job_title': 'Developer',
        'remarks': 'Enjoys collaborating with teams.'
    },
    {
        'age': 35,
        'income': 65.0,
        'years_of_experience': 10,
        'job_title': 'Senior Developer',
        'remarks': 'Focused on backend development.'
    },
    {
        'age': 27,
        'income': 48.7,
        'years_of_experience': 4,
        'job_title': 'Developer',
        'remarks': 'Interested in front-end technologies.'
    }
]

# You can convert this list into a pandas DataFrame if needed
import pandas as pd

data = pd.DataFrame(sample_data)

# Preview the data
data


Unnamed: 0,age,income,years_of_experience,job_title,remarks
0,25,45.5,3,Junior Developer,Looking to grow my career.
1,32,60.0,7,Developer,Experienced professional.
2,45,80.2,20,Lead Developer,Seasoned expert in the field.
3,28,50.1,5,Developer,Eager to take on new challenges.
4,38,70.0,15,Senior Developer,Dedicated to delivering quality software.
5,23,40.0,2,Junior Developer,Passionate about coding and learning.
6,50,90.5,25,Lead Developer,Expert in software architecture.
7,29,55.3,6,Developer,Enjoys collaborating with teams.
8,35,65.0,10,Senior Developer,Focused on backend development.
9,27,48.7,4,Developer,Interested in front-end technologies.


In [None]:
numerical_columns = ['age', 'income', 'years_of_experience']
text_columns = ['job_title', 'remarks']
integer_columns = ['age','years_of_experience']

all_columns = numerical_columns + text_columns

example_data = data[all_columns].to_dict(orient='records')

In [None]:
from libs.indoxGen.indoxGen.hybrid_synth import TextTabularSynth, initialize_gan_synth, initialize_llm_synth

In [None]:
# Initialize LLM setup
llm_setup = initialize_llm_synth(
    generator_llm=nemotron,
    judge_llm=indox,
    columns=text_columns,
    example_data=example_data,
    user_instruction="Generate realistic and diverse text data based on the numerical context provided.",
    diversity_threshold=0.3,  # Adjusted for higher diversity
    max_diversity_failures=30,
    verbose=1
)

# Initialize numerical data
numerical_data = pd.DataFrame(data[numerical_columns])

# Initialize GAN setup
gan_setup = initialize_gan_synth(
    input_dim=200,
    generator_layers=[128, 256, 512],
    discriminator_layers=[512, 256, 128],
    learning_rate=2e-4,
    beta_1=0.5,
    beta_2=0.9,
    batch_size=64,
    epochs=1,
    n_critic=5,
    categorical_columns=[],
    mixed_columns={},
    integer_columns=integer_columns,
    data=numerical_data
)

# Create an instance of TextTabularSynth
synth_pipeline = TextTabularSynth(tabular=gan_setup, text=llm_setup)

# Number of synthetic samples to generate
num_samples = 10

# Generate synthetic data
synthetic_data = synth_pipeline.generate(num_samples)

# Preview the synthetic data
print("\nSynthetic Data:")
synthetic_data.head()


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/1] - D Loss: 2.3142, G Loss: 0.2910

Synthetic Data:


Unnamed: 0,age,income,years_of_experience,job_title,remarks
0,34,57.568924,13,Full-Stack Developer,A motivated and skilled professional with a so...
1,38,69.196709,6,Software Engineer III,A motivated and skilled professional with a so...
2,36,55.074268,9,Full-Stack Developer,A motivated individual with a strong backgroun...
3,31,54.202019,3,Intermediate Software Engineer,"With a few years of experience under my belt, ..."
4,36,65.059616,13,Experienced Software Engineer,A skilled professional with a proven track rec...


In [None]:
synthetic_data

Unnamed: 0,age,income,years_of_experience,job_title,remarks
0,34,57.568924,13,Full-Stack Developer,A motivated and skilled professional with a so...
1,38,69.196709,6,Software Engineer III,A motivated and skilled professional with a so...
2,36,55.074268,9,Full-Stack Developer,A motivated individual with a strong backgroun...
3,31,54.202019,3,Intermediate Software Engineer,"With a few years of experience under my belt, ..."
4,36,65.059616,13,Experienced Software Engineer,A skilled professional with a proven track rec...
5,38,63.87207,14,Principal Software Engineer,With a wealth of experience and a drive for in...
6,35,72.185509,15,Principal Software Engineer,With a wealth of experience and a drive for in...
7,28,53.970493,10,Full-Stack Developer,With a decade of experience and a solid income...
8,27,55.272762,10,Full-Stack Developer,With a decade of experience and a solid income...
9,30,60.851093,9,Full-Stack Developer,With a solid background and a drive for innova...
