In [None]:
import csv
import pandas as pd
import numpy as np
import pprint

Ce module consiste a décrire diachroniquement les voyages arrivant à Marseille

### 1. Filtrage des flows

- Données de "flows" Navigo corpus
- Uniquement la "Santé"

In [None]:
flows_to_Marseille = []
rank_Marseille = {}
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['destination_function'] == "O" \
        and row["source_suite"] == "la Santé registre de patentes de Marseille":
           # and row['toponyme_fr'] == 'Marseille' \
            flows_to_Marseille.append(row)
            rank_Marseille[row["source_doc_id"]] = row["travel_rank"]


- Prendre note des flows d'arrivées à Marseille
- Isoler les flows qui précèdent les arrivées à Marseille

In [None]:
ranks_smaller_than_Marseille = []
counter_uhgs_99999 = 0
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row["source_suite"] == "la Santé registre de patentes de Marseille":
            if row["destination_uhgs_id"] == 'A9999997':
                counter_uhgs_99999 += 1
                continue
            if row["source_doc_id"] in rank_Marseille:
                max_rank = rank_Marseille[row["source_doc_id"]]
                if row["travel_rank"] <= max_rank:
                    ranks_smaller_than_Marseille.append(row)
counter_uhgs_99999

### 2. Reconstitution des voyages

- Reconstituer des voyages complets à partir des flows séquentiels
- Attribution de caractéristiques aux voyages :
    - distance parcourue
    - nombre d'étapes
    - date de départ
    - date d'arrivée
    - pavillon
    - classe de bateau

In [None]:
from collections import defaultdict
travels = defaultdict(lambda: {"total_miles": 0, "total_steps": 0, "keep": True})
null_distance = 0

for row in ranks_smaller_than_Marseille:
    doc_id = row["source_doc_id"]
    travel = travels[doc_id]
    distance = row["distance_dep_dest_miles"]
    if distance and distance != '0' and travel["keep"] and row["departure_out_date"]:
        travel["total_miles"] += int(distance)
        travel["total_steps"] += 1
        if row["travel_rank"] == "1":
            travel["departure_date"] = row["departure_out_date"]
        if row["travel_rank"] == rank_Marseille[doc_id]:
            travel["arrival_date"] = row["indate_fixed"] 
            travel["pavillon"] = row["ship_flag_standardized_fr"]
            travel["classe_bateau"] = row["ship_class_standardized"]
            travel["year"] = row["indate_fixed"][:4]
    else:
        travel["keep"] = False

### 3. Suppression des voyages invalides

- Suppression des voyages dont la date est invalide : contient une imprécision sous forme de '<' ou '>'
- Suppression des voyages en 1787, pour bien comparer guerre et paix

In [None]:
from datetime import datetime

good_travels = {}
error_list = []

for k, v in travels.items():
    if v["keep"] and ('<' not in v['departure_date'] and '>' not in v['departure_date']) and v["year"] != '1787':
        travel = v.copy()
        end_time = datetime.strptime(v["arrival_date"], "%Y-%m-%d")
        try:
            start_time = datetime.strptime(v["departure_date"][:10], "%Y=%m=%d")
        except ValueError as e:
            error_list.append(e)
            continue
        travel["duration"] = (end_time - start_time).days
        if travel["duration"] == 0:
            travel["duration"] = 1
        travel["speed"] = v["total_miles"] / travel["duration"]
        travel.pop("keep")
        good_travels[k] = travel


- Création du dataframe de voyages

In [None]:
df_travels = pd.DataFrame(good_travels).transpose()
df_travels = df_travels[df_travels.duration > 0]

In [None]:
df_travels.year.value_counts().sort_index()

### 4. Création des visualisations

In [None]:
viz_df = df_travels.groupby(['year']).agg({'speed': 'mean',
                                           'total_steps': 'mean',
                                           'total_miles': 'mean',
                                           'departure_date': 'size',
                                           'duration': 'mean'
                                          }).reset_index()

viz_df.columns = ['year', 'mean_speed', 'mean_number_steps', 'mean_distance', 'count_travel', 'mean_duration']

viz_df["war"] = viz_df.year.isin(["1759", "1779", "1799"]).replace({True: "guerre", False: "paix"})

- Passage de dataframe Pandas à des listes "VegaLite-ready"
- Configuration de Vegalite

In [None]:
list_viz = []

for i,row in viz_df.iterrows():
    list_viz.append(dict(row))


In [None]:
from IPython.display import display

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Vitesse moyenne pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year"
        },
        "y": {
            "field": "mean_speed",
            "type": "quantitative",
            "title": "mean speed"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre moyen d'étapes pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year"
        },
        "y": {
            "field": "mean_number_steps",
            "type": "quantitative",
            "title": "mean number of steps"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Durée moyenne des trajets pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "année"
        },
        "y": {
            "field": "mean_duration",
            "type": "quantitative",
            "title": "Durée moyenne, jours"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Longueur moyenne des trajets en miles, pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "année"
        },
        "y": {
            "field": "mean_distance",
            "type": "quantitative",
            "title": "Distance moyenne, miles"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})