In [14]:
import pandas as pd
import os

def generate_sql_from_csv(csv_file, table_name):
    # Read the CSV file
    df = pd.read_csv(csv_file)
    
    # Step 1: Generate CREATE TABLE statement
    create_table_sql = f'CREATE TABLE "{table_name}" (\n'
    for col in df.columns:
        if df[col].dtype == 'int64':
            sql_type = 'bigint'
        elif df[col].dtype == 'float64':
            sql_type = 'float'
        elif pd.api.types.is_datetime64_any_dtype(df[col]):
            sql_type = 'datetime'
        else:
            sql_type = 'text'
        create_table_sql += f'  "{col}" {sql_type},\n'
    create_table_sql = create_table_sql.rstrip(",\n") + "\n);\n\n"
    
    # Step 2: Generate INSERT INTO statements
    column_list = ", ".join(f'"{col}"' for col in df.columns)
    insert_into_sql = f'INSERT INTO "{table_name}" ({column_list})\nVALUES\n'
    
    values = []
    for _, row in df.iterrows():  # Correct indentation
        row_values = []
        for col in df.columns:  # Access each column explicitly
            value = row[col]
            if pd.isnull(value):  # Handle NULL values
                row_values.append("NULL")
            elif isinstance(value, str):  # Handle string values
                # Escape single quotes properly in SQL by doubling them
                escaped_value = value.replace("'", "''")
                row_values.append(f"'{escaped_value}'")
            else:  # Handle non-string values (int, float, etc.)
                row_values.append(str(value))
        # Join row values with commas and wrap in parentheses
        values.append(f"({', '.join(row_values)})")
    # Join all rows with commas and end with a semicolon
    insert_into_sql += ",\n".join(values) + ";\n"
    
    # Combine CREATE TABLE and INSERT INTO statements
    return create_table_sql + insert_into_sql


# Example Usage
def process_multiple_csv_files(input_directory, output_directory):
    """Process all CSV files in a directory and generate SQL scripts."""
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    for file_name in os.listdir(input_directory):
        if file_name.endswith(".csv"):
            csv_file = os.path.join(input_directory, file_name)
            table_name = os.path.splitext(file_name)[0]  # Use file name (without extension) as table name
            sql_script = generate_sql_from_csv(csv_file, table_name)
            
            # Save the SQL script to a corresponding .sql file
            output_file = os.path.join(output_directory, f"{table_name}.sql")
            with open(output_file, "w") as f:
                f.write(sql_script)
            
            print(f"SQL script for '{table_name}' generated and saved to '{output_file}'.")

# Example Usage
input_directory = "/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data"  # Replace 'data' with the actual folder containing CSV files
output_directory = "/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql"  # Directory to save SQL files
process_multiple_csv_files(input_directory, output_directory)


SQL script for 'part_categories' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql/part_categories.sql'.
SQL script for 'inventories' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql/inventories.sql'.
SQL script for 'parts' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql/parts.sql'.
SQL script for 'inventory_sets' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql/inventory_sets.sql'.
SQL script for 'inventory_parts' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data_sql/inventory_parts.sql'.
SQL script for 'colors' generated and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_chall

In [15]:
import os

def process_multiple_csv_files_into_one(input_directory, output_file):
    """Process all CSV files in a directory and generate a single SQL script."""
    all_sql_scripts = []

    for file_name in os.listdir(input_directory):
        if file_name.endswith(".csv"):
            csv_file = os.path.join(input_directory, file_name)
            table_name = os.path.splitext(file_name)[0]  # Use file name (without extension) as table name
            sql_script = generate_sql_from_csv(csv_file, table_name)
            
            # Add this script to the list
            all_sql_scripts.append(sql_script)
            print(f"SQL script for '{table_name}' generated.")

    # Combine all SQL scripts into one
    with open(output_file, "w") as f:
        f.write("\n\n".join(all_sql_scripts))  # Add newlines between scripts
    
    print(f"All SQL scripts combined and saved to '{output_file}'.")

# Example Usage
input_directory = "/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/data"
output_file = "/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/all_tables.sql"

process_multiple_csv_files_into_one(input_directory, output_file)


SQL script for 'part_categories' generated.
SQL script for 'inventories' generated.
SQL script for 'parts' generated.
SQL script for 'inventory_sets' generated.
SQL script for 'inventory_parts' generated.
SQL script for 'colors' generated.
SQL script for 'sets' generated.
SQL script for 'themes' generated.
All SQL scripts combined and saved to '/Users/kevintr/Documents/ds_portfolio/ds_portfolio_github-repo/lego_analysis_challenge-main/all_tables.sql'.
