# Analyse préliminaire et mise en contexte des données de Toflit18 pour le datasprint

Note préliminaire :

* cette analyse préliminaire ne prend pas en compte les quantités/valeurs des flux, elle ne fait que compter en nombre de flux renseignés

# Analyse diachronique des flux commerciaux de la direction des fermes de la Rochelle

In [None]:
from poitousprint import Toflit
from vega import VegaLite
import pandas as pd

toflit_client = Toflit()

In [None]:
flows_diachro = toflit_client.get_flows(start_year=1700, end_year=1800, customs_region='La Rochelle')

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de flux documentés pour la direction des fermes de La Rochelle dans le temps",
    "width": 800,
    "mark": "bar",
    "encoding": {
        "x": {
            "field": "year",
            "type": "quantitative", 
            "title": "année",
            "axis": {
                "format": "c"
            }
        },
        "y": {
            "type": "quantitative",
            "aggregate": "count",
            "title": "nombre de flux"
        },
        "color": {
            "field": "export_import",
            "type": "nominal",
            "title": "Type de flux"
        }
    }
}, pd.DataFrame(flows_diachro))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de flux documentés pour la direction des fermes, par partenaire commercial",
    "width": 800,
    "mark": "bar",
    "encoding": {
        "x": {
            "field": "year",
            "type": "quantitative", 
            "title": "année",
            "axis": {
                "format": "c"
            }
        },
        "y": {
            "type": "quantitative",
            "aggregate": "count",
            "title": "nombre de flux"
        },
        "color": {
            "field": "partner_grouping",
            "type": "nominal",
            "title": "Partenaire (classification 'grouping')"
        }
    }
}, pd.DataFrame(flows_diachro))

# Les flux de la direction des fermes de la Rochelle par rapport aux autres directions en 1789

In [None]:
flows_1789= toflit_client.get_flows(year=1789)

In [None]:
def resolve_direction(flow):
    f = flow.copy()
    if f['customs_region'] == '':
        f['customs_region'] = 'National'
    if f['export_import'] == 'import':
        f['export_import'] = 'Imports'
    return f
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de flux par direction en 1789",
    "width": 800,
    "mark": "bar",
    "encoding": {
        "y": {
            "field": "customs_region",
            "type": "nominal", 
            "title": "Bureau des fermes",
            "sort": "-x"
        },
        "x": {
            "type": "quantitative",
            "aggregate": "count",
            "title": "nombre de flux"
        },
        "color": {
            "field": "export_import",
            "type": "nominal",
            "title": "Type de flux"
        }
    }
}, pd.DataFrame([resolve_direction(f) for f in flows_1789]))

In [None]:
def resolve_direction(flow):
    f = flow.copy()
    if f['customs_region'] == '':
        f['customs_region'] = 'National'
    elif f['customs_region'] != 'La Rochelle':
        f['customs_region'] = 'Autres directions'
    return f
def compute_products_percentage_by_direction(flows):
    by_direction = [resolve_direction(f) for f in flows]

    m = {
        'National': {},
        'La Rochelle': {},
        'Autres directions': {}
    }
    for f in by_direction:
        product = f['product_revolutionempire']
        direction = f['customs_region']
        if product in m[direction]:
            m[direction][product] += 1
        else:
            m[direction][product] = 1
    products_ratio = [{"direction": key, "products": val} for key,val in m.items()]

    totals = {
        'National': len([p for p in by_direction if p['customs_region'] == 'National']),
        'La Rochelle': len([p for p in by_direction if p['customs_region'] == 'La Rochelle']),
        'Autres directions': len([p for p in by_direction if p['customs_region'] == 'Autres directions'])
    }

    products_by_direction = []
    for products in products_ratio:
        direction = products["direction"]
        these_products = products["products"]
        products_by_direction = products_by_direction + [{"product": product, "direction": direction, "percentage": count / totals[direction] * 100} for product, count in these_products.items()]

    return products_by_direction

products_by_directions_all = compute_products_percentage_by_direction([resolve_direction(f) for f in flows_1789])
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits par direction pour 1789",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque type de produit dans les flux"
        }
    }
}, pd.DataFrame(products_by_directions_all))

In [None]:
products_by_directions_imports = compute_products_percentage_by_direction([resolve_direction(f) for f in flows_1789 if f['export_import'] == 'import' or f['export_import'] == 'Imports'])

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits par direction pour 1789 (imports seulement)",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque type de produit dans les flux"
        }
    }
}, pd.DataFrame(products_by_directions_imports))

In [None]:
products_by_directions_exports = compute_products_percentage_by_direction([f for f in flows_1789 if f['export_import'] == 'Exports'])
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des types de produits par direction pour 1789 (exports seulement)",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque type de produit dans les flux"
        }
    }
}, pd.DataFrame(products_by_directions_exports))

In [None]:
def resolve_direction(flow):
    f = flow.copy()
    if f['customs_region'] == '':
        f['customs_region'] = 'National'
    elif f['customs_region'] != 'La Rochelle':
        f['customs_region'] = 'Autres directions'
    return f
def compute_partners_percentage_by_direction(flows):
    by_direction = [resolve_direction(f) for f in flows]

    m = {
        'National': {},
        'La Rochelle': {},
        'Autres directions': {}
    }
    for f in by_direction:
        partner = f['partner_simplification']
        direction = f['customs_region']
        if partner in m[direction]:
            m[direction][partner] += 1
        else:
            m[direction][partner] = 1
    partners_ratio = [{"direction": key, "partners": val} for key,val in m.items()]

    totals = {
        'National': len([p for p in by_direction if p['customs_region'] == 'National']),
        'La Rochelle': len([p for p in by_direction if p['customs_region'] == 'La Rochelle']),
        'Autres directions': len([p for p in by_direction if p['customs_region'] == 'Autres directions'])
    }

    partners_by_direction = []
    for partners in partners_ratio:
        direction = partners["direction"]
        these_partners = partners["partners"]
        partners_by_direction = partners_by_direction + [{"partner": partner, "direction": direction, "percentage": count / totals[direction] * 100} for partner, count in these_partners.items()]

    return partners_by_direction

partners_by_direction_all = compute_partners_percentage_by_direction([resolve_direction(f) for f in flows_1789])
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des partenaires par direction pour 1789",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "partner",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque partenaire commercial dans les flux"
        }
    }
}, pd.DataFrame(partners_by_direction_all))



In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des partenaires par direction pour 1789 (imports seulement)",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "partner",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque partenaire commercial dans les flux"
        }
    }
}, pd.DataFrame(compute_partners_percentage_by_direction([resolve_direction(f) for f in flows_1789 if f['export_import'] == 'import' or f['export_import'] == 'Imports'])))


In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Proportion des partenaires par direction pour 1789 (export seulement)",
    "mark": "circle",
    "encoding": {
        "x": {
            "field": "direction",
            "type": "nominal", 
            "title": "Direction",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "partner",
            "sort": "-size"
        },
        "color": {
            "type": "nominal",
            "field": "direction",
            "title": "Origine du flux"
        },
        "size": {
            "type": "quantitative",
            "field": "percentage",
            "title": "Pourcentage de chaque partenaire commercial dans les flux"
        }
    }
}, pd.DataFrame(compute_partners_percentage_by_direction([resolve_direction(f) for f in flows_1789 if f['export_import'] == 'Exports'])))


# Premier aperçu des données de la direction de La Rochelle en 1789

In [None]:
la_rochelle = toflit_client.get_flows(customs_region='La Rochelle', year=1789)
for f in la_rochelle:
    if f['customs_office'] == '':
        f['customs_office'] = 'Pas de bureau spécifié'

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de flux par bureau des fermes",
    "mark": "bar",
    "encoding": {
        "x": {
            "type": "quantitative",
            "aggregate": "count"
        },
        "y": {
            "type": "nominal",
            "field": "customs_office",
            "sort": "-x"
        },
        "color": {
            "type": "nominal",
            "field": "export_import",
            "title": "Type de flux"
        }
    }
}, pd.DataFrame(la_rochelle))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Produits exports par bureau (classification révolution et empire)",
    "mark": "circle",
    "encoding": {
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "title": "production (classification 'révolution et empire')",
            "sort": "-size"
        },
        "x": {
            "type": "nominal",
            "field": "customs_office",
            "title": "bureau des fermes",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre d'exports",
            "aggregate": "count"
        },
        "color": {
            "type": "nominal",
            "field": "customs_office",
            "title": "bureau des fermes"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Exports']))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Produits importés par bureau (classification révolution et empire)",
    "mark": "circle",
    "encoding": {
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "title": "production (classification 'révolution et empire')",
            "sort": "-size"
        },
        "x": {
            "type": "nominal",
            "field": "customs_office",
            "title": "bureau des fermes",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre d'imports",
            "aggregate": "count"
        },
        "color": {
            "type": "nominal",
            "field": "customs_office",
            "title": "bureau des fermes"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Imports']))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Partenaires commerciaux de la région (simplification)",
    "mark": "bar",
    "encoding": {
        "x": {
            "type": "quantitative",
            "title": "nombre de flux",
            "aggregate": "count"
        },
        "y": {
            "type": "nominal",
            "field": "partner_simplification",
            "title": "partenaire (classification simplification)",
            "sort": "-x"
        },
        "color": {
            "type": "nominal",
            "field": "export_import"
        }
    }
}, pd.DataFrame(la_rochelle))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Partenaires privilégiés pour les exports par bureau de ferme",
    "mark": "circle",
    "encoding": {
        "x": {
            "type": "nominal",
            "field": "partner_simplification",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "customs_office",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre de flux d'exports",
            "aggregate": "count"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Exports']))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Partenaires privilégiés pour les imports par bureau de ferme",
    "mark": "circle",
    "encoding": {
        "x": {
            "type": "nominal",
            "field": "partner_simplification",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "customs_office",
            "title": "Bureau des fermes",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre de flux d'imports",
            "aggregate": "count"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Imports']))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Types de produits exportés par partenaire",
    "mark": "circle",
    "encoding": {
        "x": {
            "type": "nominal",
            "field": "partner_simplification",
            "title": "partenaire (classification 'simplification')",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "title": "type de produit (classification 'révolution et empire')",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre de flux d'exports",
            "aggregate": "count"
        },
        "color": {
            "type": "nominal",
            "field": "partner_simplification",
            "title": "partenaire (classification 'simplification')"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Exports']))

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Types de produits importés par partenaire",
    "mark": "circle",
    "encoding": {
        "x": {
            "type": "nominal",
            "field": "partner_simplification",
            "title": "partenaire (classification 'simplification')",
            "sort": "-size"
        },
        "y": {
            "type": "nominal",
            "field": "product_revolutionempire",
            "title": "type de produit (classification 'révolution et empire')",
            "sort": "-size"
        },
        "size": {
            "type": "quantitative",
            "title": "nombre de flux d'import",
            "aggregate": "count"
        },
        "color": {
            "type": "nominal",
            "field": "partner_simplification",
            "title": "Partenaire de commerce"
        }
    }
}, pd.DataFrame([f for f in la_rochelle if f['export_import'] == 'Imports']))