In [1]:
import pandas as pd
import duckdb

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
# insert data
customers_table_creation_query = """
drop table if exists customers;

create table customers (customer_id int, customer_name varchar);

insert into customers values
  (1, 'Thomas'),
  (2, 'Thierry');
SELECT * FROM customers
"""

new_customers_table_creation_query = """
drop table if exists new_customers;

create table new_customers (customer_id int, customer_name varchar);

insert into new_customers values
  (1, 'Thomas'),
  (3, 'Marc');
SELECT * FROM new_customers
"""

In [3]:
# transform customers and new_customers tables to a pandas dataframe
customers = duckdb.sql(customers_table_creation_query).df()
new_customers = duckdb.sql(new_customers_table_creation_query).df()

# 1. Union all

## 1a. SQL

In [4]:
sql_query = """ 
    SELECT 
        customer_id,
        customer_name
    FROM customers
    UNION ALL 
    SELECT 
        customer_id,
        customer_name
    FROM new_customers
    """

duckdb.sql(sql_query)

┌─────────────┬───────────────┐
│ customer_id │ customer_name │
│    int32    │    varchar    │
├─────────────┼───────────────┤
│           1 │ Thomas        │
│           2 │ Thierry       │
│           1 │ Thomas        │
│           3 │ Marc          │
└─────────────┴───────────────┘

## 1b. Pandas

In [5]:
pd.concat([customers, new_customers], ignore_index=True)

Unnamed: 0,customer_id,customer_name
0,1,Thomas
1,2,Thierry
2,1,Thomas
3,3,Marc


# 2. Union

## 2a. SQL

In [6]:
sql_query = """ 
    SELECT 
        customer_id,
        customer_name
    FROM customers
    UNION 
    SELECT 
        customer_id,
        customer_name
    FROM new_customers
    """

duckdb.sql(sql_query)

┌─────────────┬───────────────┐
│ customer_id │ customer_name │
│    int32    │    varchar    │
├─────────────┼───────────────┤
│           1 │ Thomas        │
│           3 │ Marc          │
│           2 │ Thierry       │
└─────────────┴───────────────┘

## 2b. Pandas

In [7]:
pd.concat([customers, new_customers], ignore_index=True).drop_duplicates()

Unnamed: 0,customer_id,customer_name
0,1,Thomas
1,2,Thierry
3,3,Marc


# Summary

<img src="assets/union.png" width=1000 />
