## Demande de viz complémentaire : 
#### en réponse au commentaire du 20/05 de Guillaume Daudin (doc Etape 3, Axe 1)
"il faudrait refaire la viz [Proportion type de produits exportés par direction pour 1789] en excluant les exportations vers les ports francs? Et peut-être pour 1750 ? Et en classant par La Rochelle ?"

#### Méthode adoptée : 
- reprise du code de réalisation de la viz lors du sprint
- on enlève dans les données les flux avec pour attribut 'partner_grouping' = 'France' (ports francs)
- on réorganise le jeu de données pour que les produits les plus exportés par la DFLR apparaissent eu haut du diagramme => pour l'instant je n'y arrive pas

In [None]:
from poitousprint import Toflit
from vega import VegaLite
import pandas as pd
from operator import itemgetter # to sort lists

toflit_client = Toflit()

In [None]:
def prepare_flow(flow):
    f = flow.copy()
    if f['export_import'] == 'Imports' or f['export_import'] == 'import':
        f['export_import'] = 'Imports'
    if f['customs_region'] == '' or f['customs_region'] == 'National':
        f['customs_region_simpl'] = 'National'
    elif f['customs_region'] == 'La Rochelle':
        f['customs_region_simpl'] = 'La Rochelle'
    else:
        f['customs_region_simpl'] = 'Autre direction'
    f['value'] = float(f['value']) if f['value'] != '' else 0
    return f



flows_1789 = toflit_client.get_flows(year=1789, best_guess_region_prodxpart='1')
flows_1750  = toflit_client.get_flows(year=1750, best_guess_region_prodxpart='1')

total_exports_per_direction = {}
total_imports_per_direction = {}

for f in flows_1789:
    flow = prepare_flow(f)
    if flow['export_import'] == 'Imports':
        if flow['customs_region_simpl'] not in total_imports_per_direction:
            total_imports_per_direction[flow['customs_region_simpl']] = 0
        total_imports_per_direction[flow['customs_region_simpl']] += flow['value']
    else:
        if flow['customs_region_simpl'] not in total_exports_per_direction:
            total_exports_per_direction[flow['customs_region_simpl']] = 0
        total_exports_per_direction[flow['customs_region_simpl']] += flow['value']
        
def clean_flow(flow):
    f = flow.copy()
    abs_map = total_exports_per_direction if f['export_import'] == 'Exports' else total_imports_per_direction
    f['value_rel_per_direction'] = f['value'] / abs_map[f['customs_region_simpl']]
    return f       

In [None]:
print(pd.DataFrame([clean_flow(f) for f in flows_1789 if (f['export_import'] == 'Exports' and f['partner_grouping'] != 'France')]))

In [None]:
flows_1789 = [clean_flow(prepare_flow(f)) for f in flows_1789]
print(flows_1789[0:10])
flows_1750 = sorted([clean_flow(prepare_flow(f)) for f in flows_1750], key=itemgetter('value'))

In [None]:
def aggregate_flows_by_product(flows):
    flows_aggregated_by_product = {}
    # je veux construire un dict par produit, en distinguant LR / pas LR
    
    for flow in flows:
        if flow['product_revolutionempire'] not in flows_aggregated_by_product:
            flows_aggregated_by_product[flow['product_revolutionempire']] = {
                'product':flow['product_revolutionempire'],
                'exports_la_rochelle': 0,
                'exports_autres_directions':0,
                }
    # à la fin pour chaque produit je sommerais le total export pour calculer du relatif
    
    for flow in flows:   
        if flow['customs_region_simpl'] == 'La Rochelle':
            # print("found")
            flows_aggregated_by_product[flow['product_revolutionempire']]['exports_la_rochelle'] += flow['value']
        else:
            flows_aggregated_by_product[flow['product_revolutionempire']]['exports_autres_directions'] += flow['value']
                    
    return flows_aggregated_by_product

In [None]:
product_exports_values_per_direction = aggregate_flows_by_product(flows_1789)

total_exports_la_rochelle = 0
total_exports_autres_directions = 0

for product, values in product_exports_values_per_direction.items():
    total_exports_la_rochelle += values['exports_la_rochelle']
    total_exports_autres_directions += values['exports_autres_directions']
    
for product, values in product_exports_values_per_direction.items():
    values['exports_rel_la_rochelle'] = values['exports_la_rochelle'] / total_exports_la_rochelle
    values['exports_rel_autres_directions'] = values['exports_autres_directions'] / total_exports_autres_directions

In [None]:
sorted_product_exports_values_per_direction = sorted(product_exports_values_per_direction.values(), key=itemgetter('exports_rel_la_rochelle'), reverse=True)
print(sorted_product_exports_values_per_direction)

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits exportés par direction pour 1789",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "customs_region_simpl",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "sort": "-size"
        },
        "color": {
            "field": "customs_region_simpl",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "field": "value_rel_per_direction",
            "title": "Part de la somme des valeurs par rapport au total par direction",
            "aggregate": "sum"
        }
    }
}, pd.DataFrame([clean_flow(f) for f in flows_1789 if (f['export_import'] == 'Exports' and f['partner_grouping'] != 'France')]))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits exportés par direction pour 1750",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "customs_region_simpl",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "sort": "-size"
        },
        "color": {
            "field": "customs_region_simpl",
            "type": "nominal", 
            "title": "Direction",
            "sort": ["Étoffes de laine", "Toiles de chanvre et de lin", "Sucre", "Vins de Bordeaux","Étoffes de soie", "Café", "Mercerie", "Indigo", "Ouvrages divers et mélangés", "Étoffes diverses", "Dorure", "Toiles diverses", "Toiles de coton", "Dentelle", "Soieries diverses", "Eaux-de-vie et liqueurs"]
        },
        "size": {
            "type": "quantitative",
            "field": "value_rel_per_direction",
            "title": "Part de la somme des valeurs par rapport au total par direction",
            "aggregate": "sum"
        }
    }
}, pd.DataFrame([clean_flow(f) for f in flows_1750 if (f['export_import'] == 'Exports' and f['partner_grouping'] != 'France')]))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits exportés par direction pour 1750",
    "mark": "circle",
    "encoding": {
        "column": {
            "field": "customs_region_simpl"
        },
        "x": {
            "field": "partner_grouping",
            "type": "nominal", 
            "title": "Partenaire de commerce",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "sort": "-size"
        },
        "color": {
            "field": "customs_region_simpl",
            "title": "Comparaison entre la Rochelle et les autres directions",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "field": "value_rel_per_direction",
            "aggregate": "sum"
        }
    }
}, pd.DataFrame([clean_flow(f) for f in flows_1750 if (f['export_import'] == 'Exports' and f['partner_grouping'] != 'France')]))

In [None]:
final_vega_data = []

for values in sorted_product_exports_values_per_direction:
    
    final_vega_data.append({
        "product" : values['product'],
        "direction des Fermes" : 'La Rochelle',
        "value_rel_per_direction" : values['exports_rel_la_rochelle']})
    
    final_vega_data.append({
        "product" : values['product'],
        "direction des Fermes" : 'Autre direction',
        "value_rel_per_direction" : values['exports_rel_autres_directions']})
print(final_vega_data)

In [None]:
print(pd.DataFrame(final_vega_data))

### Tentative de transformation en grouped-bar chart

soit je mets dans un json bien nesté

[
{"product_revolutionempire":"Eau-de-vie et liqueurs","customs_region_simpl":'La Rochelle',"value_rel_per_direction":0.2},
{"product_revolutionempire":"Eau-de-vie et liqueurs","customs_region_simpl":'Autres direction',"value_rel_per_direction":0.03},


soit je fais ac pd.dataframe



In [None]:
VegaLite({
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "title": "Proportion des types de produits exportés par direction pour 1789",
  # "data": final_vega_data,
  "width": {"step": 12},
  "mark": "bar",
  "encoding": {
    "column": {
      "field": "product", "type": "ordinal", "spacing": 10
    },
    "y": {
      "aggregate": "sum", "field": "value_rel_per_direction",
      "title": "Part de la somme des valeurs par rapport au total par direction",
      "axis": {"grid": False}
    },
    "x": {
      "field": "direction des Fermes",
      "axis": {"title": ""}
    },
    "color": {
      "field": "direction des Fermes",
      "scale": {"range": ["#675193", "#ca8861"]}
    }
  },
  "config": {
    "view": {"stroke": "transparent"},
    "axis": {"domainWidth": 1}
  }
}, pd.DataFrame(final_vega_data))

### Exemple Vega-lite original : accessible à https://vega.github.io/vega-lite/examples/bar_grouped.html

In [None]:
VegaLite({
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": { "url": "https://github.com/vega/vega/blob/master/docs/data/population.json"},
  "transform": [
    {"filter": "datum.year == 2000"},
    {"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"}
  ],
  "width": {"step": 12},
  "mark": "bar",
  "encoding": {
    "column": {
      "field": "age", "type": "ordinal", "spacing": 10
    },
    "y": {
      "aggregate": "sum", "field": "people",
      "title": "population",
      "axis": {"grid": False}
    },
    "x": {
      "field": "gender",
      "axis": {"title": ""}
    },
    "color": {
      "field": "gender",
      "scale": {"range": ["#675193", "#ca8861"]}
    }
  },
  "config": {
    "view": {"stroke": "transparent"},
    "axis": {"domainWidth": 1}
  }
})