In [None]:
import csv
import pandas as pd
import numpy as np
import pprint

### 1. Filtrage des flows

In [None]:
flows_to_Marseille = []
rank_Marseille = {}
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['destination_function'] == "O" \
        and row["source_suite"] == "la Santé registre de patentes de Marseille":
           # and row['toponyme_fr'] == 'Marseille' \
            flows_to_Marseille.append(row)
            rank_Marseille[row["source_doc_id"]] = row["travel_rank"]


In [None]:
ranks_smaller_than_Marseille = []
counter_uhgs_99999 = 0
with open('../../data/navigo_all_flows.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row["source_suite"] == "la Santé registre de patentes de Marseille":
            if row["destination_uhgs_id"] == 'A9999997':
                counter_uhgs_99999 += 1
                continue
            if row["source_doc_id"] in rank_Marseille:
                max_rank = rank_Marseille[row["source_doc_id"]]
                if row["travel_rank"] <= max_rank:
                    ranks_smaller_than_Marseille.append(row)
counter_uhgs_99999

### 2. Reconstitution des voyages

In [None]:
from collections import defaultdict
travels = defaultdict(lambda: {"total_miles": 0, "total_steps": 0, "keep": True})
null_distance = 0

for row in ranks_smaller_than_Marseille:
    doc_id = row["source_doc_id"]
    travel = travels[doc_id]
    distance = row["distance_dep_dest_miles"]
    if distance and distance != '0' and travel["keep"] and row["departure_out_date"]:
        travel["total_miles"] += int(distance)
        travel["total_steps"] += 1
        if row["travel_rank"] == "1":
            travel["departure_date"] = row["departure_out_date"]
        if row["travel_rank"] == rank_Marseille[doc_id]:
            travel["arrival_date"] = row["indate_fixed"] 
            travel["pavillon"] = row["ship_flag_standardized_fr"]
            travel["classe_bateau"] = row["ship_class_standardized"]
            year = row["indate_fixed"][:4]
            travel["year"] = year if year[-1] == "9" else year[:3]+"9"
    else:
        travel["keep"] = False

### 3. Suppression des voyages invalides

In [None]:
from datetime import datetime

good_travels = {}
error_list = []

for k, v in travels.items():
    if v["keep"] and ('<' not in v['departure_date'] and '>' not in v['departure_date']):
        travel = v.copy()
        end_time = datetime.strptime(v["arrival_date"], "%Y-%m-%d")
        try:
            start_time = datetime.strptime(v["departure_date"][:10], "%Y=%m=%d")
        except ValueError as e:
            error_list.append(e)
            continue
        travel["duration"] = (end_time - start_time).days
        if travel["duration"] == 0:
            travel["duration"] = 1
        travel["speed"] = v["total_miles"] / travel["duration"]
        travel["decade"] = v["arrival_date"][:4]
        travel.pop("keep")
        good_travels[k] = travel


In [None]:
df_travels = pd.DataFrame(good_travels).transpose()
df_travels = df_travels[df_travels.duration > 0]

In [None]:
df_travels
df_travels.to_csv('df_travels.csv')

### 4. Création des matrices de résultats

In [None]:
# Vitesse moyenne pour chaque pavillon et chaque décennie
mean_speed = pd.pivot_table(df_travels, values='speed', index=['pavillon'], columns=['year'], aggfunc=np.mean).sort_index()
mean_speed.to_csv("travel_mean_speed_per_flag_and_year.csv")

In [None]:
# Nombre de trajets total pour chaque pavillon et chaque décennie
count = pd.pivot_table(df_travels, values='speed', index=['pavillon'], columns=['year'], aggfunc=np.ma.count).sort_index()
count.to_csv("travel_count_per_flag_and_year.csv")

In [None]:
# Nombre moyen de flows des trajets pour chaque pavillon et chaque décennie
mean_flow_count = pd.pivot_table(df_travels, values='total_steps', index=['pavillon'], columns=['year'], aggfunc=np.mean).sort_index()
mean_flow_count.to_csv("mean_flow_count_per_flag_and_year.csv")

In [None]:
viz_df = df_travels.groupby(['year', 'pavillon']).agg({'speed': 'mean',
                                                       'total_steps': 'mean',
                                                       'total_miles': 'mean',
                                                       'departure_date': 'size',
                                                       'duration': 'mean'
                                                       }
                                                      ).reset_index()

viz_df.columns = ['year', 'pavillon', 'mean_speed', 'mean_number_steps', 'mean_total_miles', 'count_travel', 'mean_duration']

viz_df.to_csv('viz_travel.csv')

In [None]:
# Nombre trajet par pavillon et par types de bateau (toutes années confondues)
travel_shipclass = pd.pivot_table(df_travels, values='total_miles', index=['pavillon'], columns=['classe_bateau'], aggfunc=np.ma.count, margins=True)\
.transpose().sort_values('All', ascending=False).fillna(0.0)
travel_shipclass.to_csv("travels_count_per_shipclass_perflag.csv")

In [None]:
viz_travel_ship = df_travels.groupby(['pavillon', 'classe_bateau']).agg({'speed': 'mean',
                                                       'total_steps': 'mean',
                                                       'total_miles': 'mean',
                                                       'departure_date': 'size',
                                                       'duration': 'mean'
                                                       }
                                                      ).reset_index()

viz_travel_ship.columns = ['pavillon', 'classe_bateau', 'mean_speed', 'mean_number_steps', 'mean_total_miles', 'count_travel', 'mean_duration']


In [None]:
viz_travel_ship

### 5. Création des visualisations

In [None]:
list_viz = []

for i,row in viz_df.iterrows():
    list_viz.append(dict(row))


In [None]:
list_viz_travel_ship = []

for i,row in viz_travel_ship.iterrows():
    list_viz_travel_ship.append(dict(row))

In [None]:
from IPython.display import display

def VegaLite(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v4+json'] = spec
    display(bundle, raw=True)

In [None]:
# Vitesse moyenne pour chaque pavillon et chaque décennie Heatmap

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Vitesse moyenne pour chaque pavillon et chaque décennie",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year",
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        },
        "color": {
            "field": "mean_speed",
            "type": "quantitative",
            "title": "Vitesse moyenne"
        }
    }
})


In [None]:
# Vitesse moyenne pour chaque pavillon et chaque décennie Line Chart

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Vitesse moyenne pour chaque pavillon et chaque décennie",
    "mark":  "line",
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "temporal", 
            "title": "year"
        },
        "y": {
            "field": "mean_speed",
            "type": "quantitative",
            "title": "mean speed"
        },
        "color": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        }
    }
})


In [None]:
# Nombre de trajets total pour chaque pavillon et chaque décennie Heatmap

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de trajets total pour chaque pavillon et chaque décennie",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year",
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        },
        "color": {
            "field": "count_travel",
            "type": "quantitative",
            "title": "nombre de trajets"
        }
    }
})


In [None]:
# Nombre moyen de flows par trajets pour chaque pavillon et chaque décennie Heatmap

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre moyen de flows par trajets pour chaque pavillon et chaque décennie",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year",
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        },
        "color": {
            "field": "mean_number_steps",
            "type": "quantitative",
            "title": "nombre moyens d'étapes"
        }
    }
})


In [None]:
# Nombre moyen de miles pour chaque pavillon et chaque décennie Heatmap
VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre total de miles pour chaque pavillon et chaque décennie",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year",
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        },
        "color": {
            "field": "mean_total_miles",
            "type": "quantitative",
            "title": "total miles"
        }
    }
})


In [None]:
# Moyenne des temps de trajets pour chaque pavillon et chaque décennie Heatmap

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Moyenne des temps de trajets pour chaque pavillon et chaque décennie",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz
    },
    "encoding": {
        "x": {
            "field": "year",
            "type": "nominal", 
            "title": "year",
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon"
        },
        "color": {
            "field": "mean_duration",
            "type": "quantitative",
            "title": "temps de trajet moyen"
        }
    }
})

In [None]:
# Nombre de trajet par pavillon et par types de bateau Heatmap 

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Nombre de trajet par pavillon et par types de bateau",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz_travel_ship
    },
    "encoding": {
        "x": {
            "field": "classe_bateau",
            "type": "nominal", 
            "title": "type de bateau",
            "axis": {
                "orient": "top"
            },
            "sort": {"field": "count_travel", "op": "sum"}
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon",
            "sort": {"field": "count_travel", "op": "sum"}
        },
        "color": {
            "field": "count_travel",
            "type": "quantitative",
            "title": "nb de trajet, échelle log",
            "scale": {"type": "log"}
        }
    }
})


In [None]:
# Durée moyenne d'un trajet par pavillon et par types de bateau Heatmap 

VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
    "title": "Durée moyenne par pavillon et par types de bateau",
    "mark": {
        "tooltip": {
         "content": "data"
        },
        "type": "rect"
    },
    "data": {
        "values": list_viz_travel_ship
    },
    "encoding": {
        "x": {
            "field": "classe_bateau",
            "type": "nominal", 
            "title": "type de bateau",
            "sort": {"field": "count_travel", "op": "sum"},
            "axis": {
                "orient": "top"
            }
        },
        "y": {
            "field": "pavillon",
            "type": "nominal",
            "title": "pavillon",
            "sort": {"field": "count_travel", "op": "sum"}
        },
        "color": {
            "field": "mean_duration",
            "type": "quantitative",
            "title": "durée moyenne d'un trajet"
        }
    }
})
