In [1]:
# pip install duckdb==0.7.0

In [2]:
from IPython.display import display
# import pandas as pd
import altair as alt
from altair.utils.data import to_values

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [3]:
import duckdb
csv = '../../data/toflit18_all_flows.csv'
select_all_flows = duckdb.sql(f"""
SELECT customs_region, TRY_CAST(value AS FLOAT) as value, partner_grouping, year, export_import, best_guess_region_prodxpart
FROM read_csv_auto('{csv}', ALL_VARCHAR=TRUE)
""")
select_all_flows.show()

┌────────────────┬───────────┬──────────────────┬─────────┬───────────────┬─────────────────────────────┐
│ customs_region │   value   │ partner_grouping │  year   │ export_import │ best_guess_region_prodxpart │
│    varchar     │   float   │     varchar      │ varchar │    varchar    │           varchar           │
├────────────────┼───────────┼──────────────────┼─────────┼───────────────┼─────────────────────────────┤
│ NULL           │  248210.0 │ Divers           │ 1792    │ Exports       │ 0                           │
│ NULL           │      NULL │ Divers           │ 1792    │ Exports       │ 0                           │
│ NULL           │      NULL │ Divers           │ 1792    │ Exports       │ 0                           │
│ NULL           │      NULL │ Outre-mers       │ 1792    │ Exports       │ 0                           │
│ NULL           │      NULL │ Asie             │ 1792    │ Exports       │ 0                           │
│ NULL           │      NULL │ Divers         

In [4]:
list_of_regions_values = duckdb.sql("""
FROM select_all_flows
WHERE customs_region IS NOT NULL
AND value IS NOT NULL
AND partner_grouping != 'France'
AND partner_grouping != '????'
AND year = '1789'
AND export_import = 'Exports'
AND best_guess_region_prodxpart = '1'
""")
list_of_regions_values.show()

┌────────────────┬─────────┬───────────────────────┬─────────┬───────────────┬─────────────────────────────┐
│ customs_region │  value  │   partner_grouping    │  year   │ export_import │ best_guess_region_prodxpart │
│    varchar     │  float  │        varchar        │ varchar │    varchar    │           varchar           │
├────────────────┼─────────┼───────────────────────┼─────────┼───────────────┼─────────────────────────────┤
│ Amiens         │ 34500.0 │ Portugal              │ 1789    │ Exports       │ 1                           │
│ Amiens         │  5150.0 │ Portugal              │ 1789    │ Exports       │ 1                           │
│ Amiens         │   900.0 │ Portugal              │ 1789    │ Exports       │ 1                           │
│ Amiens         │    63.0 │ Nord                  │ 1789    │ Exports       │ 1                           │
│ Amiens         │  1485.0 │ Angleterre            │ 1789    │ Exports       │ 1                           │
│ Amiens         │ 

In [5]:
values_per_customs_region = list_of_regions_values.aggregate("""
customs_region,
SUM(value) as total_value
""")

values_per_customs_region.show()

┌─────────────────────┬────────────────────┐
│   customs_region    │    total_value     │
│       varchar       │       double       │
├─────────────────────┼────────────────────┤
│ Amiens              │ 3610793.6906585693 │
│ Auch                │  2145359.301551819 │
│ Bayonne             │ 16653122.974777222 │
│ Besançon            │ 16182680.241600037 │
│ Bordeaux            │ 131582196.87841034 │
│ Caen                │ 502370.94706726074 │
│ Charleville         │ 2265570.1178207397 │
│ Châlons             │  6407065.964874268 │
│ Directions de terre │           607756.0 │
│ Flandre             │         10951606.0 │
│    ·                │              ·     │
│    ·                │              ·     │
│    ·                │              ·     │
│ Marseille           │  82122535.14059448 │
│ Montpellier         │  7423987.931632996 │
│ Nantes              │ 43446843.090530396 │
│ Narbonne            │ 1167178.5250778198 │
│ Rouen               │         42264843.0 │
│ Saint-Ma

In [6]:
values = values_per_customs_region.fetchall()
columns = values_per_customs_region.columns

In [7]:
data = [ dict(zip(columns, row)) for row in values]

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Valeur des exports depuis les plus importantes directions de ferme",
    "mark": "bar",
    "data": {
        "values": data
    },
    "encoding": {
        "y": {
            "field": "total_value",
            "type": "quantitative", 
            "title": "value"
        },
        "x": {
            "field": "customs_region",
            "type": "nominal",
            "title": "customs region",
            "sort": "-y"
        }
    }
})