In [None]:
# pip install duckdb==0.7.0

In [1]:
from IPython.display import display
import altair as alt

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [2]:
import duckdb
csv = '../../data/toflit18_all_flows.csv'
select_all_flows = duckdb.sql(f"""
SELECT  customs_office,
        TRY_CAST(value AS FLOAT) as value,
        partner_grouping, year, export_import,
        best_guess_region_prodxpart,
        product_sitc_simplEN,
        partner_simplification
FROM read_csv_auto('{csv}', ALL_VARCHAR=TRUE)
""")
duckdb.sql('FROM select_all_flows WHERE customs_office IS NOT NULL')

┌──────────────────────┬──────────┬──────────────────────┬───┬──────────────────────┬──────────────────────┐
│    customs_office    │  value   │   partner_grouping   │ … │ product_sitc_simplEN │ partner_simplifica…  │
│       varchar        │  float   │       varchar        │   │       varchar        │       varchar        │
├──────────────────────┼──────────┼──────────────────────┼───┼──────────────────────┼──────────────────────┤
│ Dunkerque-Basseville │  14482.5 │ Divers               │ … │ Chemical products    │ Monde via Dunkerque  │
│ Dunkerque-Basseville │  81015.5 │ Divers               │ … │ Other foodstuffs a…  │ Monde via Dunkerque  │
│ Dunkerque-Basseville │ 200467.5 │ Divers               │ … │ Leather, wood and …  │ Monde via Dunkerque  │
│ Dunkerque-Basseville │    180.0 │ Divers               │ … │ Chemical products    │ Monde via Dunkerque  │
│ Dunkerque-Basseville │   5700.0 │ Divers               │ … │ Other industrial p…  │ Monde via Dunkerque  │
│ Dunkerque-Bassevi

In [3]:
base_relation="""
    FROM select_all_flows
    WHERE customs_office IS NOT NULL
    AND value IS NOT NULL
    AND best_guess_region_prodxpart = '1'
    AND partner_grouping != 'France'
    AND year = '1789'
"""

In [4]:
def aggregate_order_data(relation):
    list_of_regions_values = duckdb.sql(relation)

    values_per_customs_region = list_of_regions_values.aggregate("""
    customs_office,
    SUM(value) as total_value
    """)
    ordered = duckdb.sql("""
    SELECT *
    FROM values_per_customs_region
    ORDER BY total_value DESC
    """)

    limited = ordered.limit(20)

    values = limited.fetchall()
    columns = limited.columns

    return [ dict(zip(columns, row)) for row in values]

def visualise_single(data, title):

    return alt.Chart().from_dict({
        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": title,
        "mark": "bar",
        "data": {
            "values": data
        },
        "encoding": {
            "y": {
                "field": "total_value",
                "type": "quantitative", 
                "title": "Valeur"
            },
            "x": {
                "field": "customs_office",
                "type": "nominal",
                "title": "Les 20 plus importants bureaux de ferme",
                "sort": "-y"
            }
        }
    })

In [5]:
# Apply filters for this aggregation
filtered_relation = f"""
    {base_relation}
    AND export_import = 'Exports'
    """

# Aggregate the relation by 'customs_office'
data = aggregate_order_data(relation=filtered_relation)
title = 'Valeur des exports de tout partenaire hors de la France en 1789'


# Visualise this relation
chart = visualise_single(
    data,
    'Valeur des exports de tout partenaire hors de la France en 1789'
)

chart

In [None]:
# Apply filters for this aggregation
filtered_relation = f"""
    {base_relation}
    AND export_import = 'Exports'
    AND product_sitc_simplEN != 'Plantation foodstuffs'
    AND partner_grouping NOT IN ('Outre Mer', 'Amérique', 'Afrique', 'Asie')
    AND partner_simplification != 'Monde hors colonies'
    """

# Aggregate the relation by 'customs_office'
data = aggregate_order_data(relation=filtered_relation)

# Visualise this relation
chart = visualise_single(
    data,
    'Valeur des exports de tout partenaire hors de la France en 1789 sauf les produits coloniaux et les colonies'
)

chart

In [None]:
# Apply filters for this aggregation
filtered_relation = f"""
    {base_relation}
    AND export_import = 'Imports'
    """

# Aggregate the relation by 'customs_office'
data = aggregate_order_data(relation=filtered_relation)

# Visualise this relation
chart = visualise_single(
    data,
    'Valeur des imports de tout partenaire hors de la France en 1789'
)

chart

In [None]:
# Apply filters for this aggregation
filtered_relation = f"""
    {base_relation}
    AND export_import = 'Imports'
    AND product_sitc_simplEN != 'Plantation foodstuffs'
    AND partner_grouping NOT IN ('Outre Mer', 'Amérique', 'Afrique', 'Asie')
    AND partner_simplification != 'Monde hors colonies'
    """

# Aggregate the relation by 'customs_office'
data = aggregate_order_data(relation=filtered_relation)

# Visualise this relation
chart = visualise_single(
    data,
    'Valeur des imports de tout partenaire hors de la France en 1789 sauf les produits coloniaux et les colonies'
)

chart