In [81]:
import pandas as pd
import xml.etree.ElementTree as et
import xml.dom.minidom as md

In [82]:
# Lecture du fichier CSV
df = pd.read_csv('base_ratp.csv', delimiter=';') 

In [83]:
# Tri du dataset
df = df.sort_values(by=['service_id', 'direction_id', 'route_id', 'stop_sequence'])

In [84]:
# Construction de la liste des lignes de métro
line_list = list(df['service_short_name'].unique())
print(line_list)

['2', '1', '3', '3B', '4', '5', '6', '7', '7B', '8', '9', '10', '11', '12', '13', '14']


In [85]:
# Construction d'un dictionnaire où les clés sont les noms des lignes et les valeurs les dataframes corespondants
df_by_line = {}

for line in line_list:
    df_by_line[line] = df[df.service_short_name == line]

In [86]:
# Construit le XML correspondant à la liste d'une station
def stations_ref(stations, element):
    for station in stations:
            id, changes = station[0], station[1]
            s = et.SubElement(element, "station")
            sid = et.SubElement(s, "id")
            sid.text = str(id)

            if changes:
                c = et.SubElement(s, 'changes')
                for change in changes:
                    cl = et.SubElement(c, 'changeline')
                    cl.text = change


In [87]:
# Convertit la longitude en coordonnée X
def longitude_to_x(longitude):
    return str(int(100 + ((900 - 100) / (2.464319 - 2.228314)) * (longitude - 2.228314)))

# Convertit la latitude en coordonnée Y
def latitude_to_y(latitude):
    y = 100 + ((900 - 100) / (48.946111 - 48.768769)) * (latitude - 48.768769)
    return str(int(y - ((y - 500) * 2)))

In [88]:
# Construit le XML correspondant à une simple liste de station
def station_data(sta, element):
    s = et.SubElement(element, "station", id=str(sta['station_id']), x=longitude_to_x(sta['station_lon']), y=latitude_to_y(sta['station_lat']))
    n = et.SubElement(s, 'name')
    n.text = sta['station_name']
    d = et.SubElement(s, 'desc')
    d.text = sta['station_desc']

In [89]:
# Contruit le XML correspondant à une ligne à partir du dataframe de la ligne
def line_to_XML(dataframe):
    line_name = dataframe.iloc[0].service_short_name
    df_line = dataframe.drop_duplicates(subset='station_id')

    l = et.SubElement(lines, "line", name=line_name)

    # Cas où la ligne n'a pas de bifurcation
    if len(dataframe) == len(dataframe['route_id'].unique()) * len(dataframe['station_id'].unique()):
        
        # Construction de la liste des stations
        line = []
        for station in df_line.iterrows():
            change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
            change.remove(line_name)
            line.append((station[1]['station_id'], change))
        
        # Contruction du XML de la ligne
        path = et.SubElement(l, "mpath")
        stations_ref(line, path)
        return          
    
    # Cas où la ligne a une bifurcation
    else:
        nb_routes = len(dataframe.drop_duplicates(subset='route_id'))
        
        # Cas où la ligne a une bifurcation interne (2 terminus)
        if nb_routes == 2:
            
            # Construction des listes des stations
            first_list = []
            bottom_bifur = []
            top_bifur = []
            last_list = []
            
            first_done = False
            bottom_done = False
            
            for station in df_line.iterrows():
                change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
                change.remove(line_name)

                if len(dataframe[dataframe.station_id == station[1]['station_id']]) == 2:
                    if first_done:
                        bottom_done = True
                        last_list.append((station[1]['station_id'], change))
                    else :
                        first_list.append((station[1]['station_id'], change))
                
                else:
                    first_done = True
                    if bottom_done:
                        top_bifur.append((station[1]['station_id'], change))
                    else:
                        bottom_bifur.append((station[1]['station_id'], change))
            
            # Contruction du XML de la ligne
            path = et.SubElement(l, "lpath")
            stations_ref(first_list, path)
            bifur = et.SubElement(l, "bifur")
            path = et.SubElement(bifur, "subpath")
            stations_ref(bottom_bifur, path)
            path = et.SubElement(bifur, "subpath")
            stations_ref(list(reversed(top_bifur)), path)
            path = et.SubElement(l, "rpath")
            stations_ref(last_list, path)
            return
           
    
        # Cas où la ligne a une bifurcation qui ne se rejoint pas (3 terminus)
        if (nb_routes == 4):
            
            # Construction des listes des stations
            main_route = dataframe.iloc[0]['route_id']

            main_list = []
            bottom_bifur = []
            top_bifur = []

            for station in df_line.iterrows():
                change = list(set(df[df.station_id == station[1]['station_id']].service_short_name))
                change.remove(line_name)

                if len(dataframe[dataframe.station_id == station[1]['station_id']]) == 4:
                    main_list.append((station[1]['station_id'], change))
                else:
                    if station[1]['route_id'] == main_route:
                        bottom_bifur.append((station[1]['station_id'], change))
                    else:
                        top_bifur.append((station[1]['station_id'], change))

            # Contruction du XML de la ligne
            path = et.SubElement(l, "fpath")
            stations_ref(main_list, path)
            bifur = et.SubElement(l, "bifur")
            path = et.SubElement(bifur, "subpath")
            stations_ref(bottom_bifur, path)
            path = et.SubElement(bifur, "subpath")
            stations_ref(top_bifur, path)
            return


In [90]:
# Construction du XML et écriture du fichier
root = et.Element("root")
data = et.SubElement(root, "data")

df_data = df.drop_duplicates(subset='station_id').sort_values(by=['station_id'])
for s in df_data.iterrows():
    station_data(s[1], data)

lines = et.SubElement(root, "lines")

for line in line_list:
    line_to_XML(df_by_line[line])

with open("base_ratp.xml", "w") as f:
    f.write(str(md.parseString(et.tostring(root)).toprettyxml(indent="   ")))