# Récuperer les données

In [None]:
import duckdb

# Le dataset
csv = '../../data/toflit18_all_flows.csv'

# Lire le csv et l'envoyer comme une relation (table)
toflit18_flows = duckdb.sql(f"""
SELECT  customs_region,
        CAST(value AS FLOAT) as value,
        year,
        export_import,
        best_guess_region_prodxpart,
        partner_simplification,
        origin_province,
        partner_grouping,
        product_orthographic,
        product_sitc_simplEN
FROM read_csv_auto('{csv}', ALL_VARCHAR=TRUE)
""")

In [None]:
# Appliquer les filtres de base

base_relation = duckdb.sql(f"""
    FROM toflit18_flows
    WHERE year = '1789'
    AND best_guess_region_prodxpart = '1'
    AND partner_simplification = 'Marseille'
    AND customs_region != 'Marseille'
    AND export_import = 'Exports'
    AND product_sitc_simplEN IS NOT NULL
    AND origin_province IS NOT NULL
""")

base_relation.show()

# Agréger par produit, origine, et direction de ferme exportatrice

Effectuer un groupement des exports vers Marseille selon trois dimensions :
- le type du produit
- l'origine du produit
- la direction de terme qui a exporté le produit vers Marseille

In [None]:
imports_grouped_by_origine_province_type = duckdb.sql("""
SELECT origin_province, customs_region, SUM(value) as sum_value, COUNT(value) as nb_flows, product_sitc_simplEN
FROM base_relation
GROUP BY (origin_province, customs_region, product_sitc_simplEN);
""")

imports_grouped_by_origine_province_type.show()

In [None]:
# Transformer la relation de la table filtrée en dictionnaires imbriqués dans une liste
columns = imports_grouped_by_origine_province_type.columns
values = imports_grouped_by_origine_province_type.fetchall()

data = [dict(zip(columns, row)) for row in values]

# Visualiser les agrégations

In [None]:
import altair as alt

chart = alt.Chart().from_dict({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Type de produit exporté vers Marseille en 1789 par son origine et par la direction de ferme exportatrice",
    "width": 800,
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "bar"
    },
    "data": {
        "values": data
    },
    "encoding": {
        "row": {
                "field": "customs_region",  
                "header":{"labelOrient": "top"},
                "sort": {
                    "op": "sum",
                    "field": "sum_value",
                    "order": "descending"
                    }
            },
        "x": {
            
            "field": "sum_value",
            "type": "quantitative",
            "title": "Valeur totale"
        },
        "y": {
            "field": "origin_province",
            "type": "nominal",
            "title": "Origine du produit",
            "axis": {
                "labelLimit": 500,
            },
            "sort": "-x"
            
        },
        "color": {
            "field": "product_sitc_simplEN",
            "type": "nominal",
            "title": "Type du produit exporté",
            "scale": {
                "range": ["#a7ecff",
                        "#4605ba",
                        "#eaff76",
                        "#9b0071",
                        "#398700",
                        "#db0048",
                        "#02ece4",
                        "#af4600",
                        "#004b86",
                        "#ffc68a",
                        "#002e36",
                        "#c1b6ff",
                        "#282600"]
            }
        },
       
        
    },
    "resolve": {"scale": {"y": "independent"}}

})

chart