In [None]:
# !pip install duckdb

In [None]:
import duckdb

conn = duckdb.connect()

In [None]:
conn = duckdb.connect(database = 'mydb.duckdb', read_only = False)

In [None]:
import duckdb

conn = duckdb.connect()
conn.execute('''
  CREATE TABLE flights 
    as 
  SELECT 
    * 
  FROM read_csv_auto('flights.csv')    
''')

In [None]:
import duckdb

conn = duckdb.connect()
conn.execute('''
  CREATE TABLE flights 
    as 
  SELECT 
    * 
  FROM read_csv_auto('flights.csv')
''').df()

In [None]:
conn.execute('''
  CREATE OR REPLACE TABLE flights 
    as 
  FROM 'flights.csv'
''').df()

In [None]:
display(conn.execute('SHOW TABLES').df())

In [None]:
display(conn.execute('SELECT * FROM flights').df())

In [None]:
conn.execute('''
  CREATE TABLE airports(
    IATA_CODE VARCHAR, AIRPORT VARCHAR, CITY VARCHAR,
    STATE VARCHAR, COUNTRY VARCHAR, LATITUDE VARCHAR,
    LONGITUDE VARCHAR); 
  COPY airports FROM 'airports.csv' (AUTO_DETECT TRUE);
''')

display(conn.execute('SELECT * FROM airports').df())

In [None]:
result = conn.execute('''
  SELECT COUNT(*) AS column_count
  FROM information_schema.columns
  WHERE table_name = 'airports';
''').fetchall()

# Print the number of columns
print(f"Number of columns: {result[0][0]}")

In [None]:
conn.execute('''
  DROP TABLE IF EXISTS airports;
  CREATE TABLE airports
  AS
  FROM read_csv('airports.csv', all_varchar=true)
''')

In [None]:
display(conn.execute('SHOW TABLES').df())

In [None]:
airlines = conn.execute('''
  SELECT 
    * 
  FROM read_csv('airlines.csv',  
                Header = True, 
                Columns = {'IATA_CODE': 'VARCHAR', 'AIRLINE': 'VARCHAR'})
''').df()

airlines

In [None]:
airlines = conn.execute('''
  SELECT 
    * 
  FROM read_csv_auto('airlines.csv')
''').df()
airlines

In [None]:
conn.register("airlines", airlines)

In [None]:
display(conn.execute('SHOW TABLES').df())

In [None]:
display(conn.execute('SELECT * FROM airlines').df())

In [None]:
import pandas as pd

# load the CSV using pandas
df_airlines = pd.read_csv("airlines.csv")

# associate the dataframe with the DuckDB database
conn.register("airlines", df_airlines)

In [None]:
conn.execute('''
COPY
  (SELECT IATA_CODE, LATITUDE, LONGITUDE FROM airports)
TO
  'airports_location.csv' WITH (HEADER 1, DELIMITER ',');
''')

In [None]:
conn.execute('''
COPY
 (SELECT
   IATA_CODE, LATITUDE, LONGITUDE
  FROM 'airports.csv'
  LIMIT 10)
  TO
    'airports_location.csv' WITH (HEADER 1, DELIMITER ',');
''')

In [None]:
conn.close()

In [None]:
!pip install fastparquet

In [None]:
import pandas as pd

df_airports = pd.read_csv("airports.csv")
df_airports.to_parquet('airports.parquet', engine='fastparquet')

In [None]:
import duckdb

conn = duckdb.connect()
conn.execute('''
  CREATE TABLE airports
    as
  SELECT * FROM read_parquet('airports.parquet')
  LIMIT 100
''')

In [None]:
display(conn.execute('SELECT * FROM airports').df())

In [None]:
conn.execute('''
  INSERT INTO airports
  SELECT * FROM read_parquet('airports.parquet')
  ORDER BY 1 DESC
  LIMIT 100
''')
display(conn.execute('SELECT * FROM airports').df())

In [None]:
conn.execute('''
  COPY airports
  FROM 'airports.parquet' (FORMAT PARQUET);
''')

In [None]:
conn.execute('''
  COPY airports
  TO
    'airports_all.parquet' (FORMAT PARQUET);
''')

In [None]:
conn.execute('''
  COPY
    (SELECT * FROM airports LIMIT 100)
  TO
    'airports_100.parquet' (FORMAT PARQUET);
''')

In [None]:
import duckdb

conn = duckdb.connect()

conn.execute('INSTALL spatial')
conn.execute('LOAD spatial')
conn.execute('''
  CREATE TABLE airports
  as
  SELECT * FROM st_read('airports_and_airlines.xlsx', layer='airports');
''')

display(conn.execute('SELECT * FROM airports').df())

In [None]:
import os
os.environ['OGR_XLSX_HEADERS'] = 'DISABLE'

In [None]:
os.environ['OGR_XLSX_HEADERS'] = 'FORCE'

In [None]:
os.environ['OGR_XLSX_HEADERS'] = 'AUTO'

In [None]:
conn.execute('''
  CREATE TABLE airlines
  AS
  SELECT * FROM st_read('airports_and_airlines.xlsx', layer='airlines');
''')
display(conn.execute('SELECT * FROM airlines').df())

In [None]:
conn = duckdb.connect()

conn.execute('INSTALL spatial')
conn.execute('LOAD spatial')
conn.execute('''
  CREATE TABLE airlines (
    IATA_CODE STRING,
    AIRLINES STRING
  );
  INSERT INTO airlines
  SELECT * FROM st_read('airports_and_airlines.xlsx', layer='airlines');
''')
display(conn.execute('SELECT * FROM airlines').df())

In [None]:
os.environ['OGR_XLSX_FIELD_TYPES'] = 'STRING' # default is AUTO

In [None]:
conn.execute('''
  COPY airlines
  TO 'airlines.xlsx' WITH (FORMAT GDAL, DRIVER 'xlsx');
''')

In [None]:
pip install mysql-connector-python

In [None]:
import mysql.connector
import duckdb

# MySQL connection information
mysql_host = 'localhost'
mysql_user = 'user1'
mysql_password = 'password'
mysql_database = 'My_DB'
mysql_table = 'airlines'

# create a DuckDB connection
duckdb_conn = duckdb.connect()

# connect to MySQL
mysql_conn = mysql.connector.connect(
host = mysql_host,
user = mysql_user,
password = mysql_password,
database = mysql_database
)

# create a cursor for MySQL
mysql_cursor = mysql_conn.cursor()

# query data from MySQL
mysql_query = f'SELECT * FROM {mysql_table}'
mysql_cursor.execute(mysql_query)

# create a DuckDB table with the same schema as MySQL
duckdb_create_table_query = \
f'CREATE TABLE airlines (IATA_CODE VARCHAR(2), AIRLINES VARCHAR)'
duckdb_conn.execute(duckdb_create_table_query)

# get column names from MySQL result
mysql_columns = [column[0] for column in mysql_cursor.description]

# fetch data from MySQL and insert into DuckDB table
duckdb_insert_query = \
f'INSERT INTO airlines VALUES ({", ".join(["?" for _ in mysql_columns])})'

for row in mysql_cursor.fetchall():
    duckdb_conn.execute(duckdb_insert_query, row)

# query the data in DuckDB
display(duckdb_conn.execute('SELECT * FROM airlines').df())

# close the MySQL and DuckDB connections
mysql_cursor.close()
mysql_conn.close()
duckdb_conn.close()

In [None]:
import duckdb

# create a DuckDB connection
conn = duckdb.connect()

# install and load the MySQL extension
conn.execute('INSTALL mysql')
conn.execute('LOAD mysql')

# define MySQL connection parameters
mysql_host     = 'localhost'
mysql_user     = 'user1'
mysql_password = 'password'
mysql_database = 'My_DB'
mysql_table    = 'airlines'
mysql_port     = 3306

# create a MySQL connection
mysql_connection = f'mysql://{mysql_user}:{mysql_password}@{mysql_host}/{mysql_database}'

attach_command = f'''
    ATTACH 'host={mysql_host} 
    user={mysql_user} 
    password={mysql_password} 
    port={mysql_port} 
    database={mysql_database}' 
    AS mysqldb (TYPE MYSQL);
'''
conn.execute(attach_command)
conn.execute('USE mysqldb;')

display(conn.execute(f'''
    SELECT * FROM {mysql_table}
''').df())

display(conn.execute(f'''
    show tables
''').df())

# close the DuckDB connection
conn.close()