In [1]:
import os
from dotenv import load_dotenv
from models import IndoxApi
load_dotenv()
INDOX_API_KEY = os.getenv("INDOX_API_KEY")

## Generate data from LLm without data

In [2]:
from synthCore import DataFromPrompt
from SynthCore import DataGenerationPrompt 


user_prompt = "Generate a dataset with 3 column and 3 row about astronomy."

LLM = IndoxApi(api_key=INDOX_API_KEY)
instruction = DataGenerationPrompt.get_instruction(user_prompt)

data_generator = DataFromPrompt(
    prompt_name="Generate Research Paper Abstracts",
    args={
        "llm": LLM,
        "n": 1,
        "temperature": 0.1,
        "instruction": instruction,
    },
    outputs={"generations": "generate"},

)

generated_df = data_generator.run()

print(generated_df)
data_generator.save_to_excel("output_data.xlsx")


[32mINFO[0m: [1mGenerated DataFrame with shape: (3, 3)[0m
    Celestial Body  Distance from Earth (light years)      Discovery Method
0  Exoplanet Xylon                                 42    Transit Photometry
1   Nebula Whisper                               1500   Radio Wave Analysis
2   Asteroid Drift                                320  Spectroscopic Survey
[32mINFO[0m: [1mDataFrame saved to Excel file at: output_data.xlsx[0m


In [3]:
generated_df

Unnamed: 0,Celestial Body,Distance from Earth (light years),Discovery Method
0,Exoplanet Xylon,42,Transit Photometry
1,Nebula Whisper,1500,Radio Wave Analysis
2,Asteroid Drift,320,Spectroscopic Survey


## Generate data with uploaded data

In [4]:
from SynthCore import DataFromPrompt
from SynthCore import DataGenerationPrompt 
from SynthCore import Excel

dataset_file_path = "output_data.xlsx"

excel_loader = Excel(dataset_file_path) 
df = excel_loader.load()  
user_prompt = " based on given dataset generate one unique row about astronomy"
LLM = IndoxApi(api_key=INDOX_API_KEY)

instruction = DataGenerationPrompt.get_instruction(user_prompt)

dataset = DataFromPrompt(
    prompt_name="Generate New Column",
    args={
        "llm": LLM,
        "n": 1,
        "temperature": 0.7,
        "instruction": instruction,
    },
    outputs={"generations": "generate"},
    dataframe=df
)
updated_df = dataset.run()
print(updated_df)


[32mINFO[0m: [1mGenerated DataFrame with shape: (4, 3)[0m
    Celestial Body  Distance from Earth (light years)       Discovery Method
0      Quasar Echo                               8000  Gravitational Lensing
1  Exoplanet Xylon                                 42     Transit Photometry
2   Nebula Whisper                               1500    Radio Wave Analysis
3   Asteroid Drift                                320   Spectroscopic Survey


In [5]:
updated_df

Unnamed: 0,Celestial Body,Distance from Earth (light years),Discovery Method
0,Quasar Echo,8000,Gravitational Lensing
1,Exoplanet Xylon,42,Transit Photometry
2,Nebula Whisper,1500,Radio Wave Analysis
3,Asteroid Drift,320,Spectroscopic Survey


## Generate Data with Few-shot input output Examples

In [9]:
from SynthCore import FewShotPrompt

examples = [
    {
        "input": "Generate a dataset with 3 columns and 2 rows about biology.",
        "output": '[{"Species": "Human", "Cell Count": 37.2, "Age": 30}, {"Species": "Mouse", "Cell Count": 3.2, "Age": 2}]'
    },
    {
        "input": "Generate a dataset with 3 columns and 2 rows about chemistry.",
        "output": '[{"Element": "Hydrogen", "Atomic Number": 1, "Weight": 1.008}, {"Element": "Oxygen", "Atomic Number": 8, "Weight": 15.999}]'
    }
]

user_prompt = "Generate a dataset with 3 columns and 2 rows about astronomy."
#instruction = DataGenerationPrompt.get_instruction(user_prompt)
LLM = IndoxApi(api_key=INDOX_API_KEY)

data_generator= FewShotPrompt(
    prompt_name="Generate Astronomy Dataset",
    args={
        "llm": LLM,
        "n": 1,  
        "instruction": user_prompt,  
    },
    outputs={"generations": "generate"},
    examples=examples  
)

generated_df = data_generator.run()

print(generated_df)
data_generator.save_to_excel("output_data.xlsx",df)


  Celestial Body  Diameter (km)  Distance from Sun (million km)
0          Earth          12742                           149.6
1           Mars           6779                           227.9
[32mINFO[0m: [1mDataFrame saved to Excel file at: output_data.xlsx[0m


In [10]:
df

Unnamed: 0,Celestial Body,Diameter (km),Distance from Sun (million km)
0,Earth,12742,149.6
1,Mars,6779,227.9


## Generate Data with Attribute

In [11]:
from SynthCore import DataFromAttributedPrompt
LLM = IndoxApi(api_key=INDOX_API_KEY)

args = {
    "instruction": "Generate a {adjective} sentence that is {length}.",
    "attributes": {
        "adjective": ["serious", "funny"],
        "length": ["short", "long"]
    },
    "llm":LLM
}

dataset = DataFromAttributedPrompt(prompt_name="ExamplePrompt",
                                            args=args,
                                            outputs={})

df = dataset .run()

print(df)

[32mINFO[0m: [1mGenerated 4 prompts from attributes.[0m
[32mINFO[0m: [1mRunning prompt: Generate a serious sentence that is short.[0m
[32mINFO[0m: [1mRunning prompt: Generate a serious sentence that is long.[0m
[32mINFO[0m: [1mRunning prompt: Generate a funny sentence that is short.[0m
[32mINFO[0m: [1mRunning prompt: Generate a funny sentence that is long.[0m
[32mINFO[0m: [1mGenerated DataFrame with 4 records.[0m
                                            response
0                             Time waits for no one.
1  In the quiet stillness of the early morning, a...
2  I told my dog he was adopted, and now he won’t...
3  As the overly ambitious squirrel, donning a ti...


In [None]:
df