# Method 1: Use ipython-sql for SQL Magic in Jupyter

In [None]:
# command to install
!pip install ipython-sql psycopg2 duckdb

# command to upgade install packages
!pip install --upgrade ipython-sql psycopg2 duckdb prettytable

# comand to display version of nstalled packages
!pip show ipython-sql psycopg2 duckdb

In [1]:
%load_ext sql

In [None]:
import prettytable

prettytable.DEFAULT = "PLAIN_COLUMNS"  # Change the default style to avoid KeyError
# prettytable.DEFAULT = "MSWORD_FRIENDLY"  # or any other valid style

**If above setting not works then try this:**

In [None]:
from prettytable import TableStyle
import prettytable

prettytable.DEFAULT = TableStyle.PLAIN_COLUMNS  # Use TableStyle instead of deprecated constants


In [None]:
%sql postgresql://your_username:your_password@your_host:5432/your_database

In [3]:
%sql select* from department;

dept_id,dept_name
D1,IT
D2,HR
D3,Finance
D4,Admin


In [5]:
%sql output_df << select* from department;

In [6]:
output_df

dept_id,dept_name
D1,IT
D2,HR
D3,Finance
D4,Admin


**Single-line SQL Query**

In [7]:
%sql select* from department;

dept_id,dept_name
D1,IT
D2,HR
D3,Finance
D4,Admin


**Multi-line SQL Query**

In [8]:
%%sql
SELECT *
from department;


dept_id,dept_name
D1,IT
D2,HR
D3,Finance
D4,Admin


**Storing SQL Query Results in Python Variables**

In [9]:
result = %sql SELECT COUNT(*) FROM department;
print(result)

+-------+
| count |
+-------+
|   4   |
+-------+


**convert to a Pandas DataFrame:**

In [10]:
import pandas as pd

result = %sql SELECT * FROM department;
df = result.DataFrame()
df.head()


Unnamed: 0,dept_id,dept_name
0,D1,IT
1,D2,HR
2,D3,Finance
3,D4,Admin


# Method2: Use DuckDB for In-Memory SQL Queries

In [1]:
!pip install duckdb pandas



You should consider upgrading via the 'e:\notebook\notebook_env\scripts\python.exe -m pip install --upgrade pip' command.


In [1]:
import duckdb
import pandas as pd

# Sample DataFrame
input_df = pd.DataFrame({'i': [1, 2, 3, 4, 5]})

# Connect to DuckDB in memory
con = duckdb.connect()

# Register Pandas DataFrame as a DuckDB table
con.register('input_df', input_df)

# Execute SQL query on the DataFrame
output_df = con.execute("SELECT SUM(i) as total_i FROM input_df").fetchdf()

print(output_df)


   total_i
0     15.0


**If you want to use %sql syntax, you need ipython-sql with a database connection.**

In [None]:
!pip install ipython-sql duckdb pandas

In [3]:
%load_ext sql

# Create an in-memory DuckDB database
%sql duckdb:///:memory:

# Load input_df into DuckDB
import pandas as pd
import duckdb

input_df = pd.DataFrame({'i': [1, 2, 3, 4, 5]})
duckdb.connect().register('input_df', input_df)

# Now, execute SQL using magic
%sql output_df << SELECT SUM(i) as total_i FROM input_df;

# Display the result
print(output_df)


The sql extension is already loaded. To reload it, use:
  %reload_ext sql


+---------+
| total_i |
+---------+
|    15   |
+---------+


# Method3

In [None]:
!pip install psycopg2 sqlalchemy

In [2]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine

In [None]:
db_params = {
    "host": "your_host",       # Example: "localhost" or "127.0.0.1"
    "database": "your_database",
    "user": "your_username",
    "password": "your_password",
    "port": "your_port"        # Default PostgreSQL port is 5432
}


In [4]:
try:
    conn = psycopg2.connect(**db_params)
    print("Connected to PostgreSQL database successfully!")
except Exception as e:
    print("Error while connecting to PostgreSQL:", e)


Connected to PostgreSQL database successfully!


In [5]:
engine = create_engine(f"postgresql+psycopg2://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['database']}")

# Example: Fetch data from a table
query = "SELECT * FROM department"
df = pd.read_sql(query, engine)

# Display the first few rows
df.head()

Unnamed: 0,dept_id,dept_name
0,D1,IT
1,D2,HR
2,D3,Finance
3,D4,Admin


# Method4

In [None]:
import psycopg2

conn = psycopg2.connect(
    dbname="your_database",
    user="your_username",
    password="your_password",
    host="localhost",
    port="5432"
)

cur = conn.cursor()

In [2]:
cur.execute("SELECT * from department;")
users = cur.fetchall()

for user in users:
    print(user)

cur.close()
conn.close()


('D1', 'IT')
('D2', 'HR')
('D3', 'Finance')
('D4', 'Admin')


# Method5

In [None]:
!pip install duckdb psycopg2

In [2]:
import duckdb

# Connect to DuckDB
con = duckdb.connect()

# Load PostgreSQL extension
con.execute("INSTALL postgres;")
con.execute("LOAD postgres;")


<duckdb.duckdb.DuckDBPyConnection at 0x197057de570>

In [None]:
# Define PostgreSQL connection string
postgres_conn = "host=your_host dbname=your_database user=your_username password=your_password port=5432"

# Scan a table from PostgreSQL into DuckDB
df = con.execute(f"SELECT * FROM postgres_scan('{postgres_conn}', 'public', 'your_table_name');").fetchdf()

# Display data
print(df.head())


In [3]:
# Scan a table from PostgreSQL into DuckDB
df = con.execute(f"SELECT * FROM postgres_scan('{postgres_conn}', 'public', 'department');").fetchdf()

# Display data
print(df.head())


  dept_id dept_name
0      D1        IT
1      D2        HR
2      D3   Finance
3      D4     Admin


# creating tables from csv file

**From mutiple csv files in a folder**

In [None]:
import pandas as pd
import psycopg2
import os

# List of CSV files and their corresponding table names
csv_files = [
    ('customers.csv', 'customers'),
    ('orders.csv', 'orders'),
    ('sellers.csv', 'sellers'),
    ('products.csv', 'products'),
    ('geolocation.csv', 'geolocations'),
    ('order_items.csv', 'order_items'),
    ('payments.csv', 'payments')  # Added payments.csv for specific handling
]

# Connect to the PostgreSQL database
conn = psycopg2.connect(
    dbname="ecommerceDB",
    user="postgres",
    password="844120",
    host="localhost",
    port="5432"
)
cursor = conn.cursor()

# Folder containing the CSV files
folder_path = 'E:/Datasets/Ecommerce'

# Function to determine SQL data type based on Pandas data type
def get_sql_type(dtype):
    if pd.api.types.is_integer_dtype(dtype):
        return 'INTEGER'
    elif pd.api.types.is_float_dtype(dtype):
        return 'FLOAT'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'TIMESTAMP'
    else:
        return 'TEXT'  # Default type for string and unrecognized types

# Loop through each CSV file and create tables dynamically
for csv_file, table_name in csv_files:
    file_path = os.path.join(folder_path, csv_file)

    # Read CSV file into a Pandas DataFrame
    df = pd.read_csv(file_path)

    # Replace NaN with None to handle SQL NULL
    df = df.where(pd.notnull(df), None)

    # Debugging: Check for NaN values
    print(f"Processing {csv_file}")
    print(f"NaN values before replacement:\n{df.isnull().sum()}\n")

    # Clean column names (replace spaces, special characters)
    df.columns = [col.replace(' ', '_').replace('-', '_').replace('.', '_') for col in df.columns]

    # Generate the CREATE TABLE statement with appropriate data types
    columns = ', '.join([f'"{col}" {get_sql_type(df[col].dtype)}' for col in df.columns])
    create_table_query = f'CREATE TABLE IF NOT EXISTS "{table_name}" ({columns});'
    cursor.execute(create_table_query)

    # Insert DataFrame data into the PostgreSQL table
    for _, row in df.iterrows():
        values = tuple(None if pd.isna(x) else x for x in row)
        
        # Correct f-string formatting for INSERT query
        col_names = ', '.join([f'"{col}"' for col in df.columns])
        placeholders = ', '.join(['%s'] * len(row))
        
        sql = f'INSERT INTO "{table_name}" ({col_names}) VALUES ({placeholders})'
        cursor.execute(sql, values)

    # Commit the transaction for the current CSV file
    conn.commit()

# Close the connection
cursor.close()
conn.close()

print("Data successfully loaded into PostgreSQL!")


**From a csv file**

In [None]:
import pandas as pd
import psycopg2

# Database connection details
conn = psycopg2.connect(
    dbname="ecommerceDB",
    user="postgres",
    password="844120",
    host="localhost",
    port="5432"
)
cursor = conn.cursor()

# Specify the CSV file path and table name
csv_file = "E:/Datasets/Ecommerce/customers.csv"  # Change this to your file path
table_name = "customers"  # Change this to your desired table name

# Function to determine SQL data type based on Pandas data type
def get_sql_type(dtype):
    if pd.api.types.is_integer_dtype(dtype):
        return 'INTEGER'
    elif pd.api.types.is_float_dtype(dtype):
        return 'FLOAT'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'TIMESTAMP'
    else:
        return 'TEXT'  # Default type for string and unrecognized types

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_file)

# Replace NaN with None to handle SQL NULL
df = df.where(pd.notnull(df), None)

# Debugging: Check for NaN values
print(f"Processing {csv_file}")
print(f"NaN values before replacement:\n{df.isnull().sum()}\n")

# Clean column names (replace spaces, special characters)
df.columns = [col.replace(' ', '_').replace('-', '_').replace('.', '_') for col in df.columns]

# Generate the CREATE TABLE statement with appropriate data types
columns = ', '.join([f'"{col}" {get_sql_type(df[col].dtype)}' for col in df.columns])
create_table_query = f'CREATE TABLE IF NOT EXISTS "{table_name}" ({columns});'
cursor.execute(create_table_query)

# Insert DataFrame data into the PostgreSQL table
for _, row in df.iterrows():
    values = tuple(None if pd.isna(x) else x for x in row)
    
    # Correct f-string formatting for INSERT query
    col_names = ', '.join([f'"{col}"' for col in df.columns])
    placeholders = ', '.join(['%s'] * len(row))
    
    sql = f'INSERT INTO "{table_name}" ({col_names}) VALUES ({placeholders})'
    cursor.execute(sql, values)

# Commit the transaction
conn.commit()

# Close the connection
cursor.close()
conn.close()

print(f"Data from {csv_file} successfully loaded into {table_name} table in PostgreSQL!")
