# Check the validity of the french routes

We have calculated the distance and duration between all the special arrondisments and other commune in france. As we appended the new routes to the exsiting routes which are partitioned. We want to check the validity of the new dataset.

In [1]:
from sedona.spark import *
from pyspark.sql import SparkSession, DataFrame
from pathlib import Path
from pyspark.sql.functions import trim, split, expr, col

In [2]:
# build a sedona session offline
project_root_dir = Path.cwd().parent.parent

In [3]:
jar_folder = Path(f"{project_root_dir}/jars/sedona-35-212-172")
jar_list = [str(jar) for jar in jar_folder.iterdir() if jar.is_file()]
jar_path = ",".join(jar_list)

# build a sedona session (sedona = 1.7.2) offline
spark = SparkSession.builder \
    .appName("check_extra_routes") \
    .master("local[*]") \
    .config("spark.jars", jar_path) \
    .getOrCreate()

In [4]:
# create a sedona context
sedona = SedonaContext.create(spark)

In [5]:
# get the spark context
sc = sedona.sparkContext

# use utf as default encoding
sc.setSystemProperty("sedona.global.charset", "utf8")

In [6]:
data_path = "C:/Users/PLIU/Downloads/duration_with_arrondisment_prod_final"

### Paris

| Arrondissement	 | Code INSEE | Code Postal |
|-----------------|------------|-------------|
| Paris 1er	      | 75101      | 75001       |
| Paris 2e	       | 75102      | 75002       |
| Paris 3e	       | 75103      | 75003       |
| Paris 4e	       | 75104      | 75004       |
| Paris 5e	       | 75105      | 75005       |
| Paris 6e	       | 75106      | 75006       |
| Paris 7e	       | 75107      | 75007       |
| Paris 8e	       | 75108      | 75008       |
| Paris 9e	       | 75109      | 75009       |
| Paris 10e	      | 75110      | 75010       |
| Paris 11e	      | 75111      | 75011       |
| Paris 12e	      | 75112      | 75012       |
| Paris 13e	      | 75113      | 75013       |
| Paris 14e	      | 75114      | 75014       |
| Paris 15e	      | 75115      | 75015       |
| Paris 16e	      | 75116      | 75016       |
| Paris 17e	      | 75117      | 75017       |
| Paris 18e	      | 75118      | 75018       |
| Paris 19e	      | 75119      | 75019       |
| Paris 20e	      | 75120      | 75020       |
| Paris ALL       | 75056      | None        |

### Lyon

| Arrondissement	 | Code INSEE | Code Postal |
|-----------------|------------|-------------|
| Lyon 1er	       | 69381      | 69001       |
| Lyon 2e	        | 69382      | 69002       |
| Lyon 3e	        | 69383      | 69003       |
| Lyon 4e	        | 69384      | 69004       |
| Lyon 5e	        | 69385      | 69005       |
| Lyon 6e	        | 69386      | 69006       |
| Lyon 7e	        | 69387      | 69007       |
| Lyon 8e	        | 69388      | 69008       |
| Lyon 9e	        | 69389      | 69009       |
| Lyon ALL	       | 69123      | None        |

### Marseille

| Arrondissement	 | Code INSEE | Code Postal |
|-----------------|------------|-------------|
| Marseille 1er	  | 13201      | 75001       |
| Marseille 2e	   | 13202      | 75002       |
| Marseille 3e	   | 13203      | 75003       |
| Marseille 4e	   | 13204      | 75004       |
| Marseille 5e	   | 13205      | 75005       |
| Marseille 6e	   | 13206      | 75006       |
| Marseille 7e	   | 13207      | 75007       |
| Marseille 8e	   | 13208      | 75008       |
| Marseille 9e	   | 13209      | 75009       |
| Marseille 10e	  | 13210      | 75010       |
| Marseille 11e	  | 13211      | 75011       |
| Marseille 12e	  | 13212      | 75012       |
| Marseille 13e	  | 13213      | 75013       |
| Marseille 14e	  | 13214      | 75014       |
| Marseille 15e	  | 13215      | 75015       |
| Marseille 16e	  | 13216      | 75016       |
| Marseille ALL   | 13055      | None        |

In [10]:
paris_insee_code = [75101,75102,75103,75104,75105,75106,75107,75108,75109,75110,75111,75112,75113,75114,75115,75116,75117,75118,75119,75120,]
lyon_insee_code=[69381,69382,69383,69384,69385,69386,69387,69388,69389,]
mars_insee_code = [13201,13202,13203,13204,13205,13206,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216,]

paris_code_list = ["75101","75102","75103","75104","75105","75106","75107","75108","75109","75110","75111","75112","75113","75114","75115","75116","75117","75118","75119","75120"]
marseil_code_list = ["13201","13202","13203","13204","13205","13206","13207","13208","13209","13210","13211","13212","13213","13214","13215","13216"]
lyon_code_list = ["69381","69382","69383","69384","69385","69386","69387","69388","69389"]

# the target commune is montrouge
target_commune_code = "92049"

In [8]:
all_routes_df = spark.read.parquet(data_path)

In [13]:
all_routes_df.show(5)

+----------+------------+----------+---------------+-----------------+------------+
|source_nom|    dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+------------+----------+---------------+-----------------+------------+
|   LYON 08|Pie-d'Orezza|     2B222|       725246.8|           628.83|       69388|
|   LYON 08|        Lano|     2B137|       712468.5|           613.96|       69388|
|   LYON 08|      Cambia|     2B051|       717864.4|           621.05|       69388|
|   LYON 08|       Érone|     2B106|       712936.2|           614.56|       69388|
|   LYON 08|      Oletta|     2B185|       704262.2|            612.8|       69388|
+----------+------------+----------+---------------+-----------------+------------+
only showing top 5 rows



In [18]:
from typing import List


def get_special_routes(src_code_list:List[str], target_code:str):
    result_df = None

    for src_code in src_code_list:
        filtered = all_routes_df.filter(
            (col("source_insee") == src_code) &
            (col("dest_insee") == target_code)
        )

        if result_df is None:
            result_df = filtered
        else:
            result_df = result_df.unionByName(filtered)

    if result_df is not None:
        result_df.show()
    else:
        print("No matching rows found.")

In [19]:
# check routes for paris arrondisment
get_special_routes(paris_code_list, target_commune_code)

+----------+---------+----------+---------------+-----------------+------------+
|source_nom| dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+---------+----------+---------------+-----------------+------------+
|  PARIS 01|Montrouge|     92049|         7195.6|            17.61|       75101|
|  PARIS 02|Montrouge|     92049|         7611.2|            18.75|       75102|
|  PARIS 03|Montrouge|     92049|        11844.6|            20.77|       75103|
|  PARIS 04|Montrouge|     92049|        10271.8|            16.76|       75104|
|  PARIS 05|Montrouge|     92049|         5592.4|            13.72|       75105|
|  PARIS 06|Montrouge|     92049|         5248.4|            12.13|       75106|
|  PARIS 07|Montrouge|     92049|         5513.5|            12.34|       75107|
|  PARIS 08|Montrouge|     92049|        13592.3|            19.09|       75108|
|  PARIS 09|Montrouge|     92049|         9490.8|            23.07|       75109|
|  PARIS 10|Montrouge|     9

In [20]:
# check routes for lyon arrondisment
get_special_routes(lyon_code_list, target_commune_code)

+----------+---------+----------+---------------+-----------------+------------+
|source_nom| dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+---------+----------+---------------+-----------------+------------+
|   LYON 01|Montrouge|     92049|       459253.0|           287.89|       69381|
|   LYON 02|Montrouge|     92049|       459387.4|           283.37|       69382|
|   LYON 03|Montrouge|     92049|       462307.5|           287.73|       69383|
|   LYON 04|Montrouge|     92049|       458502.0|           286.82|       69384|
|   LYON 05|Montrouge|     92049|       457219.8|           282.62|       69385|
|   LYON 06|Montrouge|     92049|       463604.5|           287.93|       69386|
|   LYON 07|Montrouge|     92049|       460814.4|           286.45|       69387|
|   LYON 08|Montrouge|     92049|       463493.6|           291.04|       69388|
|   LYON 09|Montrouge|     92049|       457084.5|           280.85|       69389|
+----------+---------+------

In [21]:
# check routes for marseil arrondisment
get_special_routes(marseil_code_list, target_commune_code)

+------------+---------+----------+---------------+-----------------+------------+
|  source_nom| dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+------------+---------+----------+---------------+-----------------+------------+
|MARSEILLE 01|Montrouge|     92049|       769561.0|           477.01|       13201|
|MARSEILLE 02|Montrouge|     92049|       766000.5|           475.66|       13202|
|MARSEILLE 03|Montrouge|     92049|       768121.2|           476.77|       13203|
|MARSEILLE 04|Montrouge|     92049|       771405.1|           479.77|       13204|
|MARSEILLE 05|Montrouge|     92049|       771085.7|           480.01|       13205|
|MARSEILLE 06|Montrouge|     92049|       770770.0|           479.65|       13206|
|MARSEILLE 07|Montrouge|     92049|       772369.3|           482.27|       13207|
|MARSEILLE 08|Montrouge|     92049|       782207.3|           500.19|       13208|
|MARSEILLE 09|Montrouge|     92049|       783103.8|           492.72|       13209|
|MAR

## now I need to calculate the reverse route

In [22]:
malakoff = "92046"
montrouge = "92049"
bagneux = "92007"

# to paris 1st
get_special_routes([malakoff, montrouge, bagneux], "75101")

+----------+--------+----------+---------------+-----------------+------------+
|source_nom|dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+--------+----------+---------------+-----------------+------------+
|  Malakoff|PARIS 01|     75101|         8299.4|            19.35|       92046|
| Montrouge|PARIS 01|     75101|         6811.3|            16.13|       92049|
|   Bagneux|PARIS 01|     75101|         8986.0|            19.08|       92007|
+----------+--------+----------+---------------+-----------------+------------+



In [23]:
# to lyon 1st
get_special_routes([malakoff, montrouge, bagneux], "69381")

+----------+--------+----------+---------------+-----------------+------------+
|source_nom|dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+--------+----------+---------------+-----------------+------------+
|  Malakoff| LYON 01|     69381|       460418.9|           289.81|       92046|
| Montrouge| LYON 01|     69381|       458868.8|           287.46|       92049|
|   Bagneux| LYON 01|     69381|       456806.4|           289.22|       92007|
+----------+--------+----------+---------------+-----------------+------------+



In [24]:
# to marseil 1st
get_special_routes([malakoff, montrouge, bagneux], "13201")

+----------+------------+----------+---------------+-----------------+------------+
|source_nom|    dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+------------+----------+---------------+-----------------+------------+
|  Malakoff|MARSEILLE 01|     13201|       771177.7|           481.28|       92046|
| Montrouge|MARSEILLE 01|     13201|       769627.6|           478.93|       92049|
|   Bagneux|MARSEILLE 01|     13201|       767565.2|           480.69|       92007|
+----------+------------+----------+---------------+-----------------+------------+



In [25]:
# with a wrong code
get_special_routes([malakoff, montrouge, bagneux], "toto")

+----------+--------+----------+---------------+-----------------+------------+
|source_nom|dest_nom|dest_insee|distance(meter)|duration(minutes)|source_insee|
+----------+--------+----------+---------------+-----------------+------------+
+----------+--------+----------+---------------+-----------------+------------+

