In [None]:
# pip install duckdb==0.7.0

In [None]:
from IPython.display import display
import altair as alt
from altair.utils.data import to_values

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [1]:
import duckdb
csv = '../../data/toflit18_all_flows.csv'
select_all_flows = duckdb.sql(f"""
SELECT  customs_office,
        TRY_CAST(value AS FLOAT) as value,
        partner_grouping, year, export_import,
        best_guess_region_prodxpart,
        product_sitc_simplEN
FROM read_csv_auto('{csv}', ALL_VARCHAR=TRUE)
""")
duckdb.sql('FROM select_all_flows WHERE customs_office IS NOT NULL')

┌──────────────────────┬──────────┬───┬───────────────┬──────────────────────┬──────────────────────┐
│    customs_office    │  value   │ … │ export_import │ best_guess_region_…  │ product_sitc_simplEN │
│       varchar        │  float   │   │    varchar    │       varchar        │       varchar        │
├──────────────────────┼──────────┼───┼───────────────┼──────────────────────┼──────────────────────┤
│ Dunkerque-Basseville │  14482.5 │ … │ Exports       │ 1                    │ Chemical products    │
│ Dunkerque-Basseville │  81015.5 │ … │ Exports       │ 1                    │ Other foodstuffs a…  │
│ Dunkerque-Basseville │ 200467.5 │ … │ Exports       │ 1                    │ Leather, wood and …  │
│ Dunkerque-Basseville │    180.0 │ … │ Exports       │ 1                    │ Chemical products    │
│ Dunkerque-Basseville │   5700.0 │ … │ Exports       │ 1                    │ Other industrial p…  │
│ Dunkerque-Basseville │    450.0 │ … │ Exports       │ 1                    │ Lea

In [None]:
def aggregate_and_visualize(data, filters, title):
    list_of_regions_values = duckdb.sql(filters)

    values_per_customs_region = list_of_regions_values.aggregate("""
    customs_office,
    SUM(value) as total_value
    """)
    ordered = duckdb.sql("""
    SELECT *
    FROM values_per_customs_region
    ORDER BY total_value DESC
    """)
    
    limited = ordered.limit(20)
    
    values = limited.fetchall()
    columns = limited.columns
    
    data = [ dict(zip(columns, row)) for row in values]
    
    VegaLite({
        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": title,
        "mark": "bar",
        "data": {
            "values": data
        },
        "encoding": {
            "y": {
                "field": "total_value",
                "type": "quantitative", 
                "title": "Valeur"
            },
            "x": {
                "field": "customs_office",
                "type": "nominal",
                "title": "Les 20 plus importants bureaux de ferme",
                "sort": "-y"
            }
        }
    })

common_filters="""
    FROM select_all_flows
    WHERE customs_office IS NOT NULL
    AND value IS NOT NULL
    AND best_guess_region_prodxpart = '1'
    AND partner_grouping != 'France'
    AND year = '1789'
"""

In [None]:
exports_all_but_france = f"""
    {common_filters}
    AND export_import = 'Exports'
    """

aggregate_and_visualize(
    select_all_flows,
    exports_all_but_france,
    'Valeur des exports de tout partenaire hors de la France en 1789'
)

In [None]:
exports_all_but_france_and_monde = f"""
    {common_filters}
    AND export_import = 'Exports'
    AND product_sitc_simplEN != 'Plantation foodstuffs'
    """
aggregate_and_visualize(
    select_all_flows,
    exports_all_but_france_and_monde,
    'Valeur des exports de tout partenaire hors de la France en 1789 sauf les colonies')

In [None]:
imports_all_but_france = f"""
    {common_filters}
    AND export_import = 'Imports'
    """

aggregate_and_visualize(
    select_all_flows,
    imports_all_but_france,
    'Valeur des imports de tout partenaire hors de la France en 1789'
)

In [None]:
imports_all_but_france_and_monde = f"""
    {common_filters}
    AND export_import = 'Imports'
    AND product_sitc_simplEN != 'Plantation foodstuffs'
    """

aggregate_and_visualize(
    select_all_flows,
    imports_all_but_france_and_monde,
    'Valeur des imports de tout partenaire hors de la France en 1789 sauf les colonies'
)