In [None]:
import csv
import pandas as pd
import numpy as np
import pprint
import requests
from IPython.display import display

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)


In [None]:
# On récupère l'estimation du tonnage par type de bateau
TONNAGE_SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTYdeIwpzaVpY_KS91cXiHxb309iYBS4JN_1_hW-_oyeysuwcIpC2VJ5fWeZJl4tA/pub?output=csv"
download = requests.get(TONNAGE_SPREADSHEET_URL)
tonnages_estimate = {"": 0}
for row in csv.DictReader(download.content.decode("utf-8").splitlines()):
    tonnages_estimate[row["ship_class"]] = int(row["tonnage_estime_en_tx"].replace("No data", "0") or 0)

Ce module consiste a décrire diachroniquement les voyages arrivant à Marseille

### 1. Filtrage des flows

- Données de "flows" Navigo corpus
- Uniquement la "Santé"

In [None]:
flows_to_Marseille = []
rank_Marseille = {}
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['destination_function'] == "O" \
        and row["source_suite"] == "la Santé registre de patentes de Marseille":
           # and row['toponyme_fr'] == 'Marseille' \
            flows_to_Marseille.append(row)
            rank_Marseille[row["source_doc_id"]] = row["travel_rank"]

- Prendre note des flows d'arrivées à Marseille
- Isoler les flows qui précèdent les arrivées à Marseille

In [None]:
ranks_smaller_than_Marseille = []
counter_uhgs_99999 = 0
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row["source_suite"] == "la Santé registre de patentes de Marseille":
            if row["destination_uhgs_id"] == 'A9999997':
                counter_uhgs_99999 += 1
                continue
            if row["source_doc_id"] in rank_Marseille:
                max_rank = rank_Marseille[row["source_doc_id"]]
                if row["travel_rank"] <= max_rank:
                    ranks_smaller_than_Marseille.append(row)
counter_uhgs_99999

### 2. Reconstitution des voyages

- Reconstituer des voyages complets à partir des flows séquentiels
- Attribution de caractéristiques aux voyages :
    - distance parcourue
    - nombre d'étapes
    - date de départ
    - date d'arrivée
    - pavillon
    - classe de bateau

In [None]:
from collections import defaultdict
travels = defaultdict(lambda: {"total_miles": 0, "total_steps": 0, "keep": True})
null_distance = 0


province_to_class = {
    'Normandie': 'Ponant', 
    'Aunis': 'Ponant', 
    'Guyenne': 'Ponant', 
    'Languedoc': 'méditerrannée occidentale', 
    'Provence': 'méditerrannée occidentale', 
    'Flandre': 'Ponant', 
    'Picardie': 'Ponant', 
    'Corse': 'méditerrannée occidentale', 
    'Bretagne': 'Ponant',
    'Roussillon': 'méditerrannée occidentale'
}
state_to_class = {
    'Etats pontificaux': 'méditerrannée occidentale', 
    'Hambourg': 'Ponant', 
    #"Etats-Unis d'Amérique": '', 
    'Monaco': 'méditerrannée occidentale', 
    'Provinces-Unies': 'Ponant', 
    'Prusse': 'Ponant', 
    'Grande-Bretagne': 'Ponant', 
    'République romaine': 'méditerrannée occidentale', 
    'Duché de Massa et Carrare': 'méditerrannée occidentale', 
    'Royaume de Naples': 'méditerrannée occidentale', 
    'Malte': 'méditerrannée occidentale', 
    'République\xa0ligurienne': 'méditerrannée occidentale', 
    'Pologne': 'Ponant', 
    'République de Gênes': 'méditerrannée occidentale', 
    'Autriche': 'Ponant', 
    'République de Raguse': 'méditerrannée occidentale', 
    'Toscane': 'méditerrannée occidentale', 
    'Danemark': 'Ponant',
    'République de Venise': 'méditerrannée occidentale', 
    'Maroc': 'méditerrannée occidentale', 
    'Espagne': 'méditerrannée occidentale', 
    'Empire ottoman': 'empire ottoman', 
    'Royaume de Piémont-Sardaigne': 'méditerrannée occidentale', 
    'Suède': 'Ponant', 
    'Portugal': 'Ponant', 
    'Empire russe': 'Ponant', 
    # 'France', 
    'Brême': 'méditerrannée occidentale', 
    'République de Lucques': 'méditerrannée occidentale'
}

for row in ranks_smaller_than_Marseille:
    doc_id = row["source_doc_id"]
    travel = travels[doc_id]
    distance = row["distance_dep_dest_miles"]
    if distance and distance != '0' and travel["keep"] and row["departure_out_date"]:
        travel["total_miles"] += int(distance)
        travel["total_steps"] += 1
        # premier du voyage
        if row["travel_rank"] == "1":
            travel["tonnage"] = tonnages_estimate[row["ship_class_standardized"]] if row["ship_class_standardized"] in tonnages_estimate else 0
            travel["departure_date"] = row["departure_out_date"]
            travel["departure"] = row["departure"]
            travel["departure_state"] = row["departure_state_fr"]
            
            if row["departure_state_fr"] == "France":
                if row["departure_province"] in province_to_class:
                    travel["departure_class"] = province_to_class[row["departure_province"]]
            else:
                if row["departure_state_fr"] in state_to_class:
                    travel["departure_class"] = state_to_class[row["departure_state_fr"]]
        if row["travel_rank"] == rank_Marseille[doc_id]:
            travel["arrival_date"] = row["indate_fixed"] 
            travel["pavillon"] = row["ship_flag_standardized_fr"]
            travel["classe_bateau"] = row["ship_class_standardized"]
            year = row["indate_fixed"][:4]
            travel["year"] = year if year[-1] == "9" else year[:3]+"9"
            travel["wartimes"] = "guerre" if year in ["1759", "1779", "1799"] else "paix"
    else:
        travel["keep"] = False

In [None]:
for travel in travels.values():
    if "year" in travel and travel["year"] == '' and travel["keep"] == True:
        print(travel)

### 3. Suppression des voyages invalides et calcul de la vitesse

- Suppression des voyages dont la date est invalide : contient une imprécision sous forme de '<' ou '>'

In [None]:
from datetime import datetime

good_travels = {}
error_list = []

for k, v in travels.items():
    if v["keep"] and ('<' not in v['departure_date'] and '>' not in v['departure_date']):
        travel = v.copy()
        end_time = datetime.strptime(v["arrival_date"], "%Y-%m-%d")
        try:
            start_time = datetime.strptime(v["departure_date"][:10], "%Y=%m=%d")
        except ValueError as e:
            error_list.append(e)
            continue
        travel["duration"] = (end_time - start_time).days
        if travel["duration"] == 0:
            travel["duration"] = 1
        travel["speed"] = v["total_miles"] / travel["duration"]
        travel["decade"] = v["arrival_date"][:4]
        travel.pop("keep")
        good_travels[k] = travel

travels_list = list(good_travels.values())

travels_clean = [t for t in travels_list \
                    if "departure_class" in t \
                    and t["speed"] < 300
                   ]

travels_in_peace = [t for t in travels_clean if t["wartimes"] == "paix"]
travels_in_war = [t for t in travels_clean if t["wartimes"] == "guerre"]

### 5. Création des visualisations

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Styles de navigation durant les années de paix",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "point",
    },
    "data": {
        "values": travels_in_peace
    },
    "encoding": {
        "row": {"field": "departure_class"},
        "x": {
            "field": "speed",
            "type": "quantitative", 
            "title": "vitesse"
        },
        "y": {
            "field": "tonnage",
            "type": "quantitative",
            "title": "tonnage"
        },
        "color": {
            "field": "total_steps",
            "title": "nombre d'étapes",
            "type": "quantitative",
            "scale": {
                "range": ["lightblue", "yellow"]
            }
        }
            
    },
    # "resolve": {"axis": {"x": "independent", "y": "independent"}}
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Styles de navigation durant les années de guerre",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "point",
    },
    "data": {
        "values": travels_in_war
    },
    "encoding": {
        "row": {"field": "departure_class"},
        "x": {
            "field": "speed",
            "type": "quantitative", 
            "title": "vitesse"
        },
        "y": {
            "field": "tonnage",
            "type": "quantitative",
            "title": "tonnage"
        },
        "color": {
            "field": "total_steps",
            "title": "nombre d'étapes",
            "type": "quantitative",
            "scale": {
                "range": ["lightblue", "yellow"]
            }
        }
            
    },
    # "resolve": {"axis": {"x": "independent", "y": "independent"}}
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Distribution des tonnages par provenance",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "bar",
    },
    "data": {
        "values": travels_in_peace
    },
    "encoding": {
        "row": {
            "field": "departure_class"
        },
        "x": {
            "field": "tonnage",
            "type": "nominal", 
            "title": "tonnage"
        },
        "y": {
            
            "aggregate": "count",
            "type": "quantitative",
            "title": "nombre de voyages"
        }
            
    },
    "resolve": {
        "scale": {
      # "x": "independent", 
            "y": "independent"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Distribution des nombres d'étapes par provenance les années de paix",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "bar",
    },
    "data": {
        "values": travels_in_peace
    },
    "encoding": {
        "row": {
            "field": "departure_class"
        },
        "x": {
            "field": "total_steps",
            "type": "nominal", 
            "title": "nombre d'étapes"
        },
        "y": {
            
            "aggregate": "count",
            "type": "quantitative",
            "title": "nombre de voyages"
        }
            
    },
    "resolve": {
        "scale": {
      # "x": "independent", 
            "y": "independent"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Distribution des vitesses par provenance",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "bar",
    },
    "data": {
        "values": travels_in_peace
    },
    "encoding": {
        "row": {
            "field": "departure_class"
        },
        "x": {
            "field": "speed",
            "type": "quantitative", 
            "title": "vitesse",
            "bin": True
        },
        "y": {
            
            "aggregate": "count",
            "type": "quantitative",
            "title": "nombre de voyages"
        }
            
    },
    "resolve": {
        "scale": {
      # "x": "independent", 
            "y": "independent"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Distribution des distances par provenance",
    "data": {
        "values": travels_clean
    },
    "facet": {
        "row": {
            "field": "departure_class"
        },
        "column": {
            "field": "wartimes"
        },
    },
    "spec": {
        "mark": {
            "tooltip": {
             "content": "data"
            },
            "type": "bar",
        },
        "encoding": {
            "x": {
                "field": "total_miles",
                "type": "quantitative", 
                "title": "distance totale parcourue",
                "bin": True
            },
            "y": {

                "aggregate": "count",
                "type": "quantitative",
                "title": "nombre de voyages"
            }

        },
        
    },
    "resolve": {
            "scale": {
          # "x": "independent", 
                "y": "independent"
            }
        }
    
    
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Distribution des nombres d'étapes et tonnages par provenance les années de paix",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "circle",
    },
    "data": {
        "values": travels_in_peace
    },
    "encoding": {
        "row": {
            "field": "departure_class"
        },
        "y": {
            "field": "total_steps",
            "type": "ordinal", 
            "title": "nombre d'étapes",
            "sort": "descending"
        },
        "x": {
            "field": "tonnage",
            "type": "nominal",
            "title": "tonnage"
        },
        "size": {
            "aggregate": "count",
            "title": "nombre de voyages"
        }
            
    },
    "resolve": {
        "scale": {
      # "x": "independent", 
            "size": "independent"
        }
    }
})