In [None]:

# !pip install snowflake-connector-python
# !pip install snowflake-connector-python[pandas]

## Combined code >



In [None]:
## adding all code here 

from snowflake.connector.pandas_tools import write_pandas
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
import snowflake.connector
import pandas as pd
import json
import csv
import os

conn = snowflake.connector.connect(
    user = os.environ["SNOWFLAKE_USER"],
    password = os.environ["SNOWFLAKE_PASSWORD"],
    account = os.environ["SNOWFLAKE_ACCOUNT"],
    warehouse = os.environ["SNOWFLAKE_WAREHOUSE"],
    database = os.environ["SNOWFLAKE_DATABASE"],
    schema = os.environ["SNOWFLAKE_SCHEMA"],
)

cur = conn.cursor()

# invoke model
model = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    openai_api_key = os.environ["AZURE_OPENAI_API_KEY"],
)

cur.execute("""
    SELECT table_name 
    FROM information_schema.tables
    WHERE table_schema = 'TEST' AND table_type = 'BASE TABLE'
""")

tables = cur.fetchall()
table_names = [table[0] for table in tables]

# Initialize an empty dictionary to store data from all tables
all_data = {}

# Fetch data from all tables
for table_name in table_names:
    cur.execute(f"SELECT * FROM {table_name} LIMIT 100")  # Limit the rows for simplicity
    data2 = cur.fetchall()
    df = pd.DataFrame(data2, columns=[col[0] for col in cur.description])
    all_data[table_name] = df


cur.execute("""
    select get_ddl('SCHEMA','TEST');
""")

ff = cur.fetchall()

cur.execute("""
    SELECT 
    TABLE_NAME, 
    COLUMN_NAME, 
    DATA_TYPE, 
    IS_NULLABLE, 
    COLUMN_DEFAULT 
FROM 
    INFORMATION_SCHEMA.COLUMNS
WHERE 
    TABLE_SCHEMA = 'TEST'
    order by table_name
;
""")

metadata = cur.fetchall()


tables = {}

# Loop through each column data tuple
for table_name, column_name, data_type, is_nullable, default_value in metadata:
    # Initialize a new table in the dictionary if it doesn't exist
    if table_name not in tables:
        tables[table_name] = []
    
    # Add a new entry for the column in the table
    tables[table_name].append({
        "column_name": column_name,
        "data_type": data_type,
        "is_nullable": is_nullable  # Using None as placeholder value
    })

# Convert the tables dictionary to JSON format
json_data = json.dumps(tables, indent=4)

template = (
    '''You are a data generator tasked with creating synthetic data. 
    Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
    Ensure the data adheres to the specified types, constraints, and formats. 
    Provide 10 rows of sample data in JSON array format, and ensure it is realistic and coherent.
    {json_data}
    '''
)

prompt = PromptTemplate.from_template(template)


prompt_template = """
You are a data generator tasked with creating synthetic data. Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
- Adhere to the specified types, constraints, and formats.
- Provide 10 rows of sample data in JSON array format.
- Ensure the data is realistic and coherent.

Metadata:
{metadata}

Expected Output:
Provide 10 rows of JSON data for each table. use same format as metadata.
Provide the output in pure json format which I can parse as a json data to various platforms.
Generate json serializable data.

This is required format in which we require generated data.
please remove any additional content.
Provide only json data which is in curly braces.
Remove all line which consist ``` and json word.
"""

# Create the prompt
prompt = PromptTemplate(input_variables=["metadata"], template=prompt_template)
formatted_prompt = prompt.format(metadata=json_data)

response = model.invoke(formatted_prompt)

synthetic_data = response.content

f1 = synthetic_data.replace("```json", "").replace("```", "").strip()
# print(f1)

try:
    data1 = json.loads(f1)
except json.JSONDecodeError:
    print("Error: Failed to parse generated JSON data.")
    data1 = []


# Data in JSON format
data = json.loads(f1)

output_dir =  '../data/csv_output'
os.makedirs(output_dir, exist_ok=True)

# Process each table
for table_name, rows in data.items():
    if rows:  # Check if the table has data
        # Define output CSV file path
        output_csv_file = os.path.join(output_dir, f"{table_name}.csv")
        
        # Get column names from the first row
        column_names = rows[0].keys()

        # Write data to CSV file
        with open(output_csv_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=column_names)
            
            # Write header and rows
            writer.writeheader()
            writer.writerows(rows)

        print(f"Table '{table_name}' saved to {output_csv_file}")
    else:
        print(f"Table '{table_name}' is empty. No file created.")

## testing functions >>

In [25]:
from snowflake.connector.pandas_tools import write_pandas
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import PromptTemplate
import snowflake.connector
import pandas as pd
import json
import csv
import os
from pathlib import Path
from typing import Dict, List, Any

def create_snowflake_connection(env_vars: Dict[str, str]) -> snowflake.connector.SnowflakeConnection:
    """
    Create and return a Snowflake connection using environment variables.
    """
    return snowflake.connector.connect(
        user=env_vars["SNOWFLAKE_USER"],
        password=env_vars["SNOWFLAKE_PASSWORD"],
        account=env_vars["SNOWFLAKE_ACCOUNT"],
        warehouse=env_vars["SNOWFLAKE_WAREHOUSE"],
        database=env_vars["SNOWFLAKE_DATABASE"],
        schema=env_vars["SNOWFLAKE_SCHEMA"],
    )

def initialize_azure_model(env_vars: Dict[str, str]) -> AzureChatOpenAI:
    """
    Initialize and return Azure OpenAI model instance.
    """
    return AzureChatOpenAI(
        azure_endpoint=env_vars["AZURE_OPENAI_ENDPOINT"],
        azure_deployment=env_vars["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
        openai_api_version=env_vars["AZURE_OPENAI_API_VERSION"],
        openai_api_key=env_vars["AZURE_OPENAI_API_KEY"],
    )

def get_table_names(cursor) -> List[str]:
    """
    Fetch all table names from the TEST schema.
    """
    cursor.execute("""
        SELECT table_name 
        FROM information_schema.tables
        WHERE table_schema = 'TEST' AND table_type = 'BASE TABLE'
    """)
    return [table[0] for table in cursor.fetchall()]

def fetch_table_data(cursor, table_names: List[str], limit: int = 100) -> Dict[str, pd.DataFrame]:
    """
    Fetch data from all tables and return as dictionary of DataFrames.
    """
    table_data = {}
    for table_name in table_names:
        cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
        data = cursor.fetchall()
        df = pd.DataFrame(data, columns=[col[0] for col in cursor.description])
        table_data[table_name] = df
    return table_data

def get_table_metadata(cursor) -> Dict[str, List[Dict[str, str]]]:
    """
    Fetch and structure table metadata.
    """
    cursor.execute("""
        SELECT 
            TABLE_NAME, 
            COLUMN_NAME, 
            DATA_TYPE, 
            IS_NULLABLE, 
            COLUMN_DEFAULT 
        FROM 
            INFORMATION_SCHEMA.COLUMNS
        WHERE 
            TABLE_SCHEMA = 'TEST'
        ORDER BY table_name
    """)
    
    metadata = cursor.fetchall()
    tables = {}
    
    for table_name, column_name, data_type, is_nullable, _ in metadata:
        if table_name not in tables:
            tables[table_name] = []
        
        tables[table_name].append({
            "column_name": column_name,
            "data_type": data_type,
            "is_nullable": is_nullable
        })
    
    return tables

def get_synthetic_data_prompt() -> str:
    """
    Return the prompt template for synthetic data generation.
    """
    return """
    You are a data generator tasked with creating synthetic data. Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
    - Adhere to the specified types, constraints, and formats.
    - Provide 10 rows of sample data in JSON array format.
    - Ensure the data is realistic and coherent.

    Metadata:
    {metadata}

    Expected Output:
    Provide 10 rows of JSON data for each table. Use same format as metadata.
    Provide the output in pure json format which I can parse as a json data to various platforms.
    Generate json serializable data.
    """

def generate_synthetic_data(model: AzureChatOpenAI, metadata: Dict) -> Dict:
    """
    Generate synthetic data using the AI model.
    """
    prompt = PromptTemplate(
        input_variables=["metadata"],
        template=get_synthetic_data_prompt()
    )
    
    formatted_prompt = prompt.format(metadata=json.dumps(metadata, indent=4))
    response = model.invoke(formatted_prompt)
    
    # Clean up the response
    cleaned_response = response.content.replace("```json", "").replace("```", "").strip()
    
    try:
        return json.loads(cleaned_response)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return {}

def save_to_csv(data: Dict[str, List[Dict]], base_dir: str) -> None:
    """
    Save generated data to CSV files in the specified output directory.
    
    Args:
        data: Dictionary containing table data
        base_dir: Base project directory path
    """
    # Construct the output directory path
    ## modify this path if needed >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> just create new ones for "csv_output" if needed

    output_dir = os.path.join(base_dir, "data", "csv_output")
    
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    for table_name, rows in data.items():
        if not rows:
            print(f"Table '{table_name}' is empty. No file created.")
            continue
            
        output_csv_file = os.path.join(output_dir, f"{table_name}.csv")
        column_names = rows[0].keys()
        
        with open(output_csv_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=column_names)
            writer.writeheader()
            writer.writerows(rows)
            
        print(f"Table '{table_name}' saved to {output_csv_file}")

def main():
    # Get environment variables
    env_vars = {
        "SNOWFLAKE_USER": os.environ["SNOWFLAKE_USER"],
        "SNOWFLAKE_PASSWORD": os.environ["SNOWFLAKE_PASSWORD"],
        "SNOWFLAKE_ACCOUNT": os.environ["SNOWFLAKE_ACCOUNT"],
        "SNOWFLAKE_WAREHOUSE": os.environ["SNOWFLAKE_WAREHOUSE"],
        "SNOWFLAKE_DATABASE": os.environ["SNOWFLAKE_DATABASE"],
        "SNOWFLAKE_SCHEMA": os.environ["SNOWFLAKE_SCHEMA"],
        "AZURE_OPENAI_ENDPOINT": os.environ["AZURE_OPENAI_ENDPOINT"],
        "AZURE_OPENAI_4o_DEPLOYMENT_NAME": os.environ["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
        "AZURE_OPENAI_API_VERSION": os.environ["AZURE_OPENAI_API_VERSION"],
        "AZURE_OPENAI_API_KEY": os.environ["AZURE_OPENAI_API_KEY"],
    }
    
    # Get the project root directory (one level up from src)
    project_root = str(Path(__file__).parent.parent)
    
    # Initialize connections and model
    conn = create_snowflake_connection(env_vars)
    model = initialize_azure_model(env_vars)
    cursor = conn.cursor()
    
    try:
        # Get table names and metadata
        table_names = get_table_names(cursor)
        table_data = fetch_table_data(cursor, table_names)
        metadata = get_table_metadata(cursor)
        
        # Generate and save synthetic data
        synthetic_data = generate_synthetic_data(model, metadata)
        save_to_csv(synthetic_data, project_root)
        
    finally:
        cursor.close()
        conn.close()

if __name__ == "__main__":
    main()

NameError: name '__file__' is not defined

## test 2 for csv path


In [23]:
from snowflake.connector.pandas_tools import write_pandas
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import PromptTemplate
import snowflake.connector
import pandas as pd
import json
import csv
import os
from typing import Dict, List, Any
from pathlib import Path

def create_snowflake_connection(env_vars: Dict[str, str]) -> snowflake.connector.SnowflakeConnection:
    """
    Create and return a Snowflake connection using environment variables.
    """
    return snowflake.connector.connect(
        user=env_vars["SNOWFLAKE_USER"],
        password=env_vars["SNOWFLAKE_PASSWORD"],
        account=env_vars["SNOWFLAKE_ACCOUNT"],
        warehouse=env_vars["SNOWFLAKE_WAREHOUSE"],
        database=env_vars["SNOWFLAKE_DATABASE"],
        schema=env_vars["SNOWFLAKE_SCHEMA"],
    )

def initialize_azure_model(env_vars: Dict[str, str]) -> AzureChatOpenAI:
    """
    Initialize and return Azure OpenAI model instance.
    """
    return AzureChatOpenAI(
        azure_endpoint=env_vars["AZURE_OPENAI_ENDPOINT"],
        azure_deployment=env_vars["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
        openai_api_version=env_vars["AZURE_OPENAI_API_VERSION"],
        openai_api_key=env_vars["AZURE_OPENAI_API_KEY"],
    )

def get_table_names(cursor) -> List[str]:
    """
    Fetch all table names from the TEST schema.
    """
    cursor.execute("""
        SELECT table_name 
        FROM information_schema.tables
        WHERE table_schema = 'TEST' AND table_type = 'BASE TABLE'
    """)
    return [table[0] for table in cursor.fetchall()]

def fetch_table_data(cursor, table_names: List[str], limit: int = 100) -> Dict[str, pd.DataFrame]:
    """
    Fetch data from all tables and return as dictionary of DataFrames.
    """
    table_data = {}
    for table_name in table_names:
        cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
        data = cursor.fetchall()
        df = pd.DataFrame(data, columns=[col[0] for col in cursor.description])
        table_data[table_name] = df
    return table_data

def get_table_metadata(cursor) -> Dict[str, List[Dict[str, str]]]:
    """
    Fetch and structure table metadata.
    """
    cursor.execute("""
        SELECT 
            TABLE_NAME, 
            COLUMN_NAME, 
            DATA_TYPE, 
            IS_NULLABLE, 
            COLUMN_DEFAULT 
        FROM 
            INFORMATION_SCHEMA.COLUMNS
        WHERE 
            TABLE_SCHEMA = 'TEST'
        ORDER BY table_name
    """)
    
    metadata = cursor.fetchall()
    tables = {}
    
    for table_name, column_name, data_type, is_nullable, _ in metadata:
        if table_name not in tables:
            tables[table_name] = []
        
        tables[table_name].append({
            "column_name": column_name,
            "data_type": data_type,
            "is_nullable": is_nullable
        })
    
    return tables

def get_synthetic_data_prompt() -> str:
    """
    Return the prompt template for synthetic data generation.
    """
    return """
    You are a data generator tasked with creating synthetic data. Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
    - Adhere to the specified types, constraints, and formats.
    - Provide 10 rows of sample data in JSON array format.
    - Ensure the data is realistic and coherent.

    Metadata:
    {metadata}

    Expected Output:
    Provide 10 rows of JSON data for each table. Use same format as metadata.
    Provide the output in pure json format which I can parse as a json data to various platforms.
    Generate json serializable data.
    """

def generate_synthetic_data(model: AzureChatOpenAI, metadata: Dict) -> Dict:
    """
    Generate synthetic data using the AI model.
    """
    prompt = PromptTemplate(
        input_variables=["metadata"],
        template=get_synthetic_data_prompt()
    )
    
    formatted_prompt = prompt.format(metadata=json.dumps(metadata, indent=4))
    response = model.invoke(formatted_prompt)
    
    # Clean up the response
    cleaned_response = response.content.replace("```json", "").replace("```", "").strip()
    
    try:
        return json.loads(cleaned_response)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return {}
    
def get_project_root() -> Path:
    """
    Get the project root directory (assuming src is one level deep).
    Returns the parent directory of the directory containing this script.
    """
    current_file = Path(__file__).resolve()  # Get the path of the current script
    print(current_file)
    return current_file.parent.parent

def save_to_csv(data: Dict[str, List[Dict]], output_dir_name: str = 'csv_output') -> None:
    """
    Save generated data to CSV files in the project's data directory.
    
    Args:
        data: Dictionary containing the data to save
        output_dir_name: Name of the output directory within the data directory
    """
    # Get project root and construct paths
    project_root = get_project_root()
    data_dir = project_root / 'data'
    output_dir = data_dir / output_dir_name
    
    # Create directories if they don't exist
    output_dir.mkdir(parents=True, exist_ok=True)
    
    for table_name, rows in data.items():
        if not rows:
            print(f"Table '{table_name}' is empty. No file created.")
            continue
            
        output_csv_file = output_dir / f"{table_name}.csv"
        column_names = rows[0].keys()
        
        with open(output_csv_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=column_names)
            writer.writeheader()
            writer.writerows(rows)
            
        print(f"Table '{table_name}' saved to {output_csv_file}")

def main():
    # Get environment variables
    env_vars = {
        "SNOWFLAKE_USER": os.environ["SNOWFLAKE_USER"],
        "SNOWFLAKE_PASSWORD": os.environ["SNOWFLAKE_PASSWORD"],
        "SNOWFLAKE_ACCOUNT": os.environ["SNOWFLAKE_ACCOUNT"],
        "SNOWFLAKE_WAREHOUSE": os.environ["SNOWFLAKE_WAREHOUSE"],
        "SNOWFLAKE_DATABASE": os.environ["SNOWFLAKE_DATABASE"],
        "SNOWFLAKE_SCHEMA": os.environ["SNOWFLAKE_SCHEMA"],
        "AZURE_OPENAI_ENDPOINT": os.environ["AZURE_OPENAI_ENDPOINT"],
        "AZURE_OPENAI_4o_DEPLOYMENT_NAME": os.environ["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
        "AZURE_OPENAI_API_VERSION": os.environ["AZURE_OPENAI_API_VERSION"],
        "AZURE_OPENAI_API_KEY": os.environ["AZURE_OPENAI_API_KEY"],
    }
    
    # Initialize connections and model
    conn = create_snowflake_connection(env_vars)
    model = initialize_azure_model(env_vars)
    cursor = conn.cursor()
    
    try:
        # Get table names and metadata
        table_names = get_table_names(cursor)
        table_data = fetch_table_data(cursor, table_names)
        metadata = get_table_metadata(cursor)
        
        # Generate and save synthetic data
        synthetic_data = generate_synthetic_data(model, metadata)
        save_to_csv(synthetic_data)  # Now uses project-relative paths
        
    finally:
        cursor.close()
        conn.close()

if __name__ == "__main__":
    main()

Error parsing JSON: Expecting value: line 1 column 1 (char 0)


In [24]:
# import os
# current_dir = os.getcwd()



# print(f"{current_dir}\data\csv_output")



## old notebook

In [23]:
from snowflake.connector.pandas_tools import write_pandas
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
import snowflake.connector
import pandas as pd
import json
import csv
import os

In [2]:
# conn = snowflake.connector.connect(
#     user = 'ashika',
#     password = 'Cervello123#',
#     account = 'bpwmwqd-bk67062',
#     warehouse = 'compute_wh',
#     database = 'RAW',
#     schema = 'test',
# )

In [3]:
cur = conn.cursor()

In [None]:
cur.execute("""
    SELECT table_name 
    FROM information_schema.tables
    WHERE table_schema = 'TEST' AND table_type = 'BASE TABLE'
""")

tables = cur.fetchall()
table_names = [table[0] for table in tables]

# Initialize an empty dictionary to store data from all tables
all_data = {}

# Fetch data from all tables
for table_name in table_names:
    cur.execute(f"SELECT * FROM {table_name} LIMIT 100")  # Limit the rows for simplicity
    data2 = cur.fetchall()
    df = pd.DataFrame(data2, columns=[col[0] for col in cur.description])
    all_data[table_name] = df


# Display the first few rows from all tables (for review)
for table, data2 in all_data.items():
    print(f"\nData from table: {table}")
    print(data2.head())

In [None]:
cur.description

In [None]:
all_data

In [None]:
cur.execute("""
    select get_ddl('SCHEMA','TEST');
""")

ff = cur.fetchall()

ff

In [None]:

cur.execute("""
    SELECT 
    TABLE_NAME, 
    COLUMN_NAME, 
    DATA_TYPE, 
    IS_NULLABLE, 
    COLUMN_DEFAULT 
FROM 
    INFORMATION_SCHEMA.COLUMNS
WHERE 
    TABLE_SCHEMA = 'TEST'
    order by table_name
;
""")

metadata = cur.fetchall()

metadata

In [None]:
tables

In [None]:
tables = {}

# Loop through each column data tuple
for table_name, column_name, data_type, is_nullable, default_value in metadata:
    # Initialize a new table in the dictionary if it doesn't exist
    if table_name not in tables:
        tables[table_name] = []
    
    # Add a new entry for the column in the table
    tables[table_name].append({
        "column_name": column_name,
        "data_type": data_type,
        "is_nullable": is_nullable  # Using None as placeholder value
    })

# Convert the tables dictionary to JSON format
json_data = json.dumps(tables, indent=4)

# Output the JSON data
print(json_data)

In [13]:


model = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_4o_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    openai_api_key = os.environ["AZURE_OPENAI_API_KEY"],
)

In [15]:


template = (
    '''You are a data generator tasked with creating synthetic data. 
    Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
    Ensure the data adheres to the specified types, constraints, and formats. 
    Provide 10 rows of sample data in JSON array format, and ensure it is realistic and coherent.
    {json_data}
    '''
)

prompt = PromptTemplate.from_template(template)

In [None]:


prompt_template = """
You are a data generator tasked with creating synthetic data. Based on the following JSON metadata describing table structure and data types, generate sample data rows for each column. 
- Adhere to the specified types, constraints, and formats.
- Provide 10 rows of sample data in JSON array format.
- Ensure the data is realistic and coherent.

Metadata:
{metadata}

Expected Output:
Provide 10 rows of JSON data for each table. use same format as metadata.
Provide the output in pure json format which I can parse as a json data to various platforms.
Generate json serializable data.

This is required format in which we require generated data.
please remove any additional content.
Provide only json data which is in curly braces.
Remove all line which consist ``` and json word.
"""

# Create the prompt
prompt = PromptTemplate(input_variables=["metadata"], template=prompt_template)
formatted_prompt = prompt.format(metadata=json_data)

# print(formatted_prompt)
response = model(formatted_prompt)

synthetic_data = response.content
# Display the result
print("Generated Synthetic Data:\n", synthetic_data)

# try:
#     data = json.loads(json_data)
# except json.JSONDecodeError:
#     print("Error: Failed to parse generated JSON data.")
#     data = []

# # Save to CSV if data is valid
# if data:
#     output_file = "synthetic_data.csv"
#     column_names = [col["name"] for col in json_data["columns"]]

#     with open(output_file, mode="w", newline="", encoding="utf-8") as file:
#         writer = csv.DictWriter(file, fieldnames=column_names)
#         writer.writeheader()
#         writer.writerows(data)

#     print(f"Data successfully saved to {output_file}")
# else:
#     print("No valid data generated.")

In [None]:
f1 = synthetic_data.replace("```json", "").replace("```", "").strip()
f1

In [None]:
try:
    data1 = json.loads(f1)
except json.JSONDecodeError:
    print("Error: Failed to parse generated JSON data.")
    data1 = []

data1

In [None]:
type(data1)

In [None]:


# Data in JSON format
data = json.loads(f1)

output_dir = "csv_output"
os.makedirs(output_dir, exist_ok=True)

# Process each table
for table_name, rows in data.items():
    if rows:  # Check if the table has data
        # Define output CSV file path
        output_csv_file = os.path.join(output_dir, f"{table_name}.csv")
        
        # Get column names from the first row
        column_names = rows[0].keys()

        # Write data to CSV file
        with open(output_csv_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=column_names)
            
            # Write header and rows
            writer.writeheader()
            writer.writerows(rows)

        print(f"Table '{table_name}' saved to {output_csv_file}")
    else:
        print(f"Table '{table_name}' is empty. No file created.")