In [16]:
import pandas as pd
import xml.etree.ElementTree as et
import xml.dom.minidom as md

In [2]:
# Récuperation du fichier dans Google Colab
filename = './drive/MyDrive/base_ratp.csv'

# Lecture du fichier CSV
df = pd.read_csv(filename, delimiter=';') 

In [4]:
# Tri du dataset
df = df.sort_values(by=['service_id', 'direction_id', 'route_id', 'stop_sequence'])

In [5]:
# Construction de la liste des lignes de métro
line_list = list(df['service_short_name'].unique())
print(line_list)

['2', '1', '3', '3B', '4', '5', '6', '7', '7B', '8', '9', '10', '11', '12', '13', '14']


In [6]:
# Construction d'un dictionnaire où les clés sont les noms des lignes et les valeurs les dataframes corespondants
df_by_line = {}

for line in line_list:
    df_by_line[line] = df[df.service_short_name == line]

In [7]:
# Retourne le nombre de station sur la ligne
def nb_stations(dataframe):
    return len(dataframe['station_id'].unique())

In [8]:
# Retourne vrai si la ligne n'a pas de bifurcation
def is_station_straight(dataframe):
    return len(dataframe) == len(dataframe['route_id'].unique()) * len(dataframe['station_id'].unique())

In [9]:
# Retourne le XML correspondant à une ligne à partir du dataframe de la ligne
def get_stations(dataframe):
    line_name = dataframe.iloc[0].service_short_name
    df_line = dataframe.drop_duplicates(subset='station_id')

    # Cas où la ligne n'a pas de bifurcation
    if is_station_straight(dataframe):
        line = []
        for station in df_line.iterrows():
            change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
            change.remove(line_name)
            line.append((station[1]['station_name'], change))
        return line
    
    # Cas où la ligne a une bifurcation
    else:
        nb_routes = len(dataframe.drop_duplicates(subset='route_id'))
        # Cas où la ligne a une bifurcation interne (2 terminus)
        if nb_routes == 2:
            
            first_list = []
            bottom_bifur = []
            top_bifur = []
            last_list = []
            
            first_done = False
            bottom_done = False
            
            for station in df_line.iterrows():
                change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
                change.remove(line_name)

                if len(dataframe[dataframe.station_id == station[1]['station_id']]) == 2:
                    if first_done:
                        bottom_done = True
                        last_list.append((station[1]['station_name'], change))
                    else :
                        first_list.append((station[1]['station_name'], change))
                
                else:
                    first_done = True
                    if bottom_done:
                        top_bifur.append((station[1]['station_name'], change))
                    else:
                        bottom_bifur.append((station[1]['station_name'], change))

            return (first_list, bottom_bifur, list(reversed(top_bifur)), last_list)
           
    
        # Cas où la ligne a une bifurcation qui ne se rejoint pas (3 terminus)
        if (nb_routes == 4):
            main_route = dataframe.iloc[0]['route_id']

            main_list = []
            bottom_bifur = []
            top_bifur = []

            for station in df_line.iterrows():
                change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
                change.remove(line_name)

                if len(dataframe[dataframe.station_id == station[1]['station_id']]) == 4:
                    main_list.append((station[1]['station_name'], change))
                else:
                    if station[1]['route_id'] == main_route:
                        bottom_bifur.append((station[1]['station_name'], change))
                    else:
                        top_bifur.append((station[1]['station_name'], change))

            return (main_list, bottom_bifur, top_bifur)

