In [None]:
from classif_archaisme import (
    ughs_id_to_province, 
    LEVANT_PROVINCES,
    BIRTHPLACE_TO_PROVINCE,
    FLAG_TO_GROUPING
)
import csv
import pandas as pd
import numpy as np

In [None]:
def aggregate_flag_for_flow(flow, ughs_id_to_province_map, isolate_levant=True):
    flag = row['ship_flag_standardized_fr']
    birthplace_uhgs_id = row['birthplace_uhgs_id']
    birthplace_city = row['birthplace']
    if flag == '':
        return 'Inconnu'
    if flag == 'français' and isolate_levant:
        province = ughs_id_to_province_map[birthplace_uhgs_id] if birthplace_uhgs_id in ughs_id_to_province_map else 'Inconnu'
        if province == 'Inconnu' and birthplace_city in BIRTHPLACE_TO_PROVINCE:
            province = BIRTHPLACE_TO_PROVINCE[birthplace_city]
        if province in LEVANT_PROVINCES:
            return 'France du Levant'
        else:
            return 'France du Ponant'
    elif flag in FLAG_TO_GROUPING:
        if flag == 'monégasque' and isolate_levant:
            return 'France du Levant'
        else:
            return FLAG_TO_GROUPING[flag]
    else:
        return 'Inconnu'

In [None]:
stats = {}
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['destination_function'] == "O" \
        and row["source_suite"] == "la Santé registre de patentes de Marseille":
           # and row['toponyme_fr'] == 'Marseille' \
            flag_class = aggregate_flag_for_flow(row, ughs_id_to_province)
            if flag_class not in stats:
                stats[flag_class] = 0
            stats[flag_class]+= 1
# print(flags)
for flag_class, nb in sorted(stats.items(), key=lambda c : c[1]):
    print(flag_class, str(nb))

In [None]:
print(FLAG_TO_GROUPING)

In [None]:
flows_to_Marseille = []
rank_Marseille = {}
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['destination_function'] == "O" \
        and row["source_suite"] == "la Santé registre de patentes de Marseille":
            flows_to_Marseille.append(row)
            rank_Marseille[row["source_doc_id"]] = row["travel_rank"]
            
ranks_smaller_than_Marseille = []
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row["source_suite"] == "la Santé registre de patentes de Marseille":
            if row["destination_uhgs_id"] == 'A9999997':
                continue
            if row["source_doc_id"] in rank_Marseille:
                max_rank = rank_Marseille[row["source_doc_id"]]
                if row["travel_rank"] <= max_rank:
                    ranks_smaller_than_Marseille.append(row)

In [None]:
from collections import defaultdict
travels = defaultdict(lambda: {"total_miles": 0, "total_steps": 0, "keep": True})
null_distance = 0

for row in ranks_smaller_than_Marseille:
    doc_id = row["source_doc_id"]
    travel = travels[doc_id]
    distance = row["distance_dep_dest_miles"]
    if distance and distance != '0' and travel["keep"] and row["departure_out_date"]:
        travel["total_miles"] += int(distance)
        travel["total_steps"] += 1
        if row["travel_rank"] == "1":
            travel["departure_date"] = row["departure_out_date"]
        if row["travel_rank"] == rank_Marseille[doc_id]:
            travel["arrival_date"] = row["indate_fixed"] 
            travel["pavillon"] = row["ship_flag_standardized_fr"]
            travel["classe_bateau"] = row["ship_class_standardized"]
            travel["year"] = row["indate_fixed"][:4]
            travel["flag_class"] = aggregate_flag_for_flow(row, ughs_id_to_province, isolate_levant=True)
    else:
        travel["keep"] = False

In [None]:
for enum, t in enumerate(travels.values()):
    print(t)
    if enum == 10:
        break

In [None]:
from datetime import datetime

good_travels = {}
error_list = []

for k, v in travels.items():
    if v["keep"] and ('<' not in v['departure_date'] and '>' not in v['departure_date']) and v["year"] != '1787':
        travel = v.copy()
        end_time = datetime.strptime(v["arrival_date"], "%Y-%m-%d")
        try:
            start_time = datetime.strptime(v["departure_date"][:10], "%Y=%m=%d")
        except ValueError as e:
            error_list.append(e)
            continue
        travel["duration"] = (end_time - start_time).days
        if travel["duration"] == 0:
            travel["duration"] = 1
        travel["speed"] = v["total_miles"] / travel["duration"]
        travel.pop("keep")
        good_travels[k] = travel


In [None]:
df_travels = pd.DataFrame(good_travels).transpose()
df_travels = df_travels[df_travels.duration > 0]

In [None]:
general_speed = df_travels['speed'].mean()
flag_class_speed = df_travels.groupby('flag_class').agg({'speed': 'mean'}).to_dict()['speed']

In [None]:
viz_df = df_travels.groupby(['year', 'flag_class']).agg({'speed': 'mean',
                                           'total_steps': 'mean',
                                           'total_miles': 'mean',
                                           'departure_date': 'size',
                                           'duration': 'mean'
                                          }).reset_index()

viz_df.columns = ['year', 'flag_class', 'mean_speed', 'mean_number_steps', 
                  'mean_distance', 'count_travel', 'mean_duration']

viz_df["war"] = viz_df.year.isin(["1759", "1779", "1799"]).replace({True: "guerre", False: "paix"})
viz_df["flagclass_mean_speed"] = viz_df["flag_class"].map(flag_class_speed)
viz_df["deviation_to_flag_mean"] = viz_df["mean_speed"] - viz_df["flagclass_mean_speed"]
viz_df["deviation_to_general_mean"] = viz_df["mean_speed"] - general_speed

In [None]:
list_viz = []

for i,row in viz_df.iterrows():
    list_viz.append(dict(row))

In [None]:
from IPython.display import display

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": "Écart à la vitesse moyenne du pavillon pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "facet": {
            "field": "flag_class",
            "columns": 3,
        },
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year"
        },
        "y": {
            "field": "deviation_to_flag_mean",
            "type": "quantitative",
            "title": "deviation to mean - miles p. day"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": "Écart de la vitesse à la moyenne générale pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "facet": {
            "field": "flag_class",
            "columns": 3,
        },
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year"
        },
        "y": {
            "field": "deviation_to_general_mean",
            "type": "quantitative",
            "title": "deviation to mean - miles p. day"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})

In [None]:
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": "Vitesse moyenne pour chaque décennie",
    "mark":  "bar",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "facet": {
            "field": "flag_class",
            "columns": 3,
        },
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year"
        },
        "y": {
            "field": "mean_speed",
            "type": "quantitative",
            "title": "vitesse moyenne"
        },
        "color": {
            "field": "war",
            "type": "nominal",
            "title": "guerre"
        }
    }
})