# ALL-CSV-to-SQL-CREATE-TABLES

This notebook is a template for generating DROP and CREATE TABLES queries
based on csv files inside a folder.

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import os

# Function to generate the DROP TABLE IF EXISTS and CREATE TABLE queries
def generate_create_table_query(df, table_name):
    drop_query = f'DROP TABLE IF EXISTS "{table_name}";'

    data_types_map = {
        'int64': 'INTEGER',
        'float64': 'REAL',
        'datetime64[ns]': 'TIMESTAMP',
        'bool': 'BOOLEAN',
        'object': 'TEXT'
    }

    column_definitions = ', '.join([f'"{col}" {data_types_map[str(df[col].dtype)]}' for col in df.columns])
    create_query = f'CREATE TABLE "{table_name}" ({column_definitions});'

    return drop_query, create_query

# Specify the directory containing the CSV files
csv_directory = '/workspace/InstaCart-Online-Grocery-Basket-Analysis-Dataset'

# List all CSV files in the directory
csv_files = [f for f in os.listdir(csv_directory) if f.endswith('.csv')]

# Process each CSV file
for csv_file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(csv_directory, csv_file))

    # Extract the table_name from the CSV file name
    table_name = os.path.splitext(csv_file)[0]

    # Generate the queries
    drop_query, create_query = generate_create_table_query(df, table_name)

    # Combine the DROP and CREATE queries
    full_query = f'{drop_query}\n{create_query}'

    # Print the combined SQL queries
    print(f"/***** {table_name} *****/\n{full_query}\n")
