In [None]:
!python -m pip install duckdb

# Getting Started With DuckDB

In [None]:
import duckdb

duckdb.sql("SELECT 'whistling_duck' AS waterfowl, 'whistle' AS call")

## Creating a Database From a Data Source

In [None]:
import duckdb

conn = duckdb.connect(database="presidents.db")

presidents_relation = conn.read_parquet("presidents.parquet")

conn.sql(
    """
    SELECT sequence, last_name, first_name
    FROM presidents_relation
    WHERE sequence <= 2
    """
).show()

presidents_relation.to_table("presidents")

conn.close()

In [None]:
with duckdb.connect(database="presidents.db") as conn:
    conn.sql(
        """
        SELECT last_name, first_name
        FROM presidents
        WHERE last_name = 'Adams' 
        """
    ).show()

**This code won't work.**

```python
with duckdb.connect(database="presidents.db") as conn:
    conn.sql("SELECT * FROM presidents_relation")
```

## Correcting Your Data Import Errors

In [None]:
import duckdb

with duckdb.connect(database="presidents.db") as conn:
    presidents_relation = conn.read_csv("presidents.csv")
    print(presidents_relation.limit(2))

In [None]:
import duckdb

with duckdb.connect(database="presidents.db") as conn:
    presidents_relation = conn.read_csv(
        "presidents.csv", date_format="%B %d %Y"
    )
    print(presidents_relation.dtypes)

# Querying the Database
## Querying Tables

In [None]:
import duckdb

with duckdb.connect(database="presidents.db") as conn:
    conn.read_json("parties.json").to_table("parties")

In [None]:
with duckdb.connect("presidents.db") as conn:
    print(
        conn.sql(
            """
        SELECT first_name, last_name, party_name
        FROM presidents
        JOIN parties
        ON presidents.party_id = parties.party_id
        WHERE party_name = 'Whig'
        ORDER BY last_name DESC
        """
        )
    )

## Querying Relations

In [None]:
import duckdb

presidents = duckdb.read_parquet("presidents.parquet")
parties = duckdb.read_json("parties.json")

duckdb.sql(
    """
    SELECT first_name, last_name, party_name
    FROM presidents
    JOIN parties
    ON presidents.party_id = parties.party_id
    WHERE party_name = 'Whig'
    ORDER BY last_name DESC
    """
)

In [None]:
import duckdb

presidents = duckdb.read_parquet("presidents.parquet").set_alias("presidents")
parties = duckdb.read_json("parties.json").set_alias("parties")

(
    presidents.join(parties, "presidents.party_id = parties.party_id")
    .select("first_name", "last_name", "party_name")
    .filter("party_name = 'Whig'")
    .order("last_name DESC")
)

In [None]:
leaders = duckdb.read_parquet("presidents.parquet").set_alias("usa_presidents")
faction = duckdb.read_json("parties.json").set_alias("political_parties")

(
    leaders.join(
        faction, "usa_presidents.party_id = political_parties.party_id"
    )
    .select("first_name", "last_name", "party_name")
    .filter("party_name = 'Whig'")
    .order("last_name DESC")
)

# Using Concurrency
## Performing Concurrent Reads

In [None]:
from concurrent.futures import ThreadPoolExecutor
import duckdb


def read_data(thread_id):
    print(f"Thread {thread_id} starting its read.")
    with duckdb.connect("presidents.db") as conn:
        conn.sql(
            """
            SELECT first_name, last_name
            FROM presidents
            WHERE sequence = 1
            """
        ).show()
    print(f"Thread {thread_id} ending its read.")


def concurrent_read():
    with ThreadPoolExecutor(max_workers=3) as executor:
        executor.map(read_data, range(3))


concurrent_read()

## Performing Concurrent Writes

In [None]:
from concurrent.futures import ThreadPoolExecutor
import duckdb


def update_data(thread_id):
    new_name = f"George ({thread_id})"
    with duckdb.connect("presidents.db") as conn:
        print(f"Thread {thread_id} starting its update.")
        conn.sql(
            f"""
            UPDATE presidents
            SET first_name = '{new_name}'
            WHERE sequence = 1
            """
        )
        print(f"Thread {thread_id} ending its update.")


def concurrent_update():
    with ThreadPoolExecutor(max_workers=3) as executor:
        executor.map(update_data, range(3))


concurrent_update()

In [None]:
with duckdb.connect("presidents.db") as conn:
    print(
        conn.sql(
            """
            SELECT last_name, first_name
            FROM presidents
            WHERE sequence = 1
            """
        )
    )

# Integrating DuckDB Within Your Python Environment
## Creating Python Functions for DuckDB to Use

In [None]:
import duckdb


def short_name(first_name: str, last_name: str) -> str:
    return f"{first_name[0]}. {last_name}"

In [None]:
short_name("Abraham", "Lincoln")

**Only run this code if you need to update your existing `short_name()` function**

```python
duckdb.remove_function("short_name")
```

In [None]:
duckdb.create_function("short_name", short_name)

In [None]:
presidents = duckdb.read_parquet("presidents.parquet")

duckdb.sql(
    """ 
    SELECT short_name(first_name::VARCHAR, last_name::VARCHAR) AS name,
    (term_end - term_start) AS "days in office"
    FROM presidents
    """
).limit(3)

# Using Polars and pandas With DuckDB

In [None]:
!python -m pip install pandas polars pyarrow

In [None]:
import duckdb

with duckdb.connect("presidents.db") as conn:
    pandas_presidents = conn.sql(
        """
        SELECT last_name, first_name
        FROM presidents
        WHERE sequence BETWEEN 2 AND 5
        """
    ).df()

pandas_presidents

In [None]:
import duckdb

presidents = duckdb.read_parquet("presidents.parquet").set_alias("presidents")
parties = duckdb.read_json("parties.json").set_alias("parties")

(
    presidents.join(parties, "presidents.party_id = parties.party_id")
    .select("first_name", "last_name", "party_name")
    .order("last_name DESC")
).pl().head(3)