In [1]:
import pandas as pd

In [2]:
# Récuperation du fichier dans Google Colab
filename = './drive/MyDrive/base_ratp.csv'

# Lecture du fichier CSV
df_raw = pd.read_csv(filename, delimiter=';') 

In [3]:
df_raw.dtypes

station_id              int64
station_name           object
station_desc           object
station_lat           float64
station_lon           float64
stop_sequence           int64
route_id                int64
service_id              int64
direction_id            int64
service_short_name     object
long_name_first        object
long_name_last         object
dtype: object

In [4]:
df_raw[df_raw['service_short_name'] == '7B'].head(10)

Unnamed: 0,station_id,station_name,station_desc,station_lat,station_lon,stop_sequence,route_id,service_id,direction_id,service_short_name,long_name_first,long_name_last
838,2940,Pré-Saint-Gervais,Serrurier 52 boulevard - 75119,48.88016,2.398587,1,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
839,2312,Danube,Rhin et Danube 9 place de - 75119,48.881951,2.393261,2,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
840,2124,Botzaris,80 rue Botzaris - 75119,48.879607,2.389416,3,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
841,2152,Buttes-Chaumont,Botzaris 28 rue - 75119,48.877804,2.381211,4,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
842,2116,Bolivar,Simon Bolivar 125 avenue - 75119,48.88079,2.374155,5,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
843,2484,Jaurès,Face au 2 place de Stalingrad - 75119,48.882769,2.369946,6,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
844,2580,Louis Blanc,234 rue du Faubourg Saint-Martin - 75110,48.8809,2.365107,7,656103,2280337,0,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
845,2580,Louis Blanc,234 rue du Faubourg Saint-Martin - 75110,48.8809,2.365107,1,656104,2280337,1,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
846,2484,Jaurès,Face au 2 place de Stalingrad - 75119,48.882769,2.369946,2,656104,2280337,1,7B,PRE-SAINT-GERVAIS,LOUIS BLANC
847,2116,Bolivar,Simon Bolivar 125 avenue - 75119,48.88079,2.374155,3,656104,2280337,1,7B,PRE-SAINT-GERVAIS,LOUIS BLANC


In [5]:
# On garde les colonnes qui nous interessent
df = df_raw.sort_values(by=['service_id', 'direction_id', 'route_id', 'stop_sequence'])[['station_id', 'station_name', 'stop_sequence', 'route_id', 'direction_id', 'service_short_name', 'long_name_first']]

In [6]:
# Construction d'un dictionnaire où les clés sont les noms des lignes et les valeurs les dataframes corespondants
line_list = ['1', '2', '3', '3B', '4', '5', '6', '7', '7B', '8', '9', '10', '11', '12', '13', '14']
df_by_line = {}

for line in line_list:
    df_by_line[line] = df[df.service_short_name == line]
    df_by_line[line].drop(['service_short_name'], axis=1)

In [7]:
# Retourne le nombre de station sur la ligne
def nb_stations(dataframe):
    return len(dataframe['station_id'].unique())

In [8]:
# Retourne vrai si la ligne n'a pas de bifurcation
def is_station_straight(dataframe):
    return len(dataframe) == len(dataframe['route_id'].unique()) * len(dataframe['station_id'].unique())

In [9]:
for line in line_list:
    print(line, is_station_straight(df_by_line[line]))

1 True
2 True
3 True
3B True
4 True
5 True
6 True
7 False
7B False
8 True
9 True
10 False
11 True
12 True
13 False
14 True


In [10]:
def get_stations(dataframe):

    # Cas où la ligne n'a pas de bifurcation
    if is_station_straight(dataframe):
        df_line = dataframe.drop_duplicates(subset='station_id')
        line = []
        for station in df_line.iterrows():
            line.append((station[1]['station_name'], station[1]['direction_id']))
        return line
    
    # Cas où la ligne a une bifurcation
    else:
        df_line = dataframe.drop_duplicates(subset='station_id')
        nb_routes = len(dataframe.drop_duplicates(subset='route_id'))
        print(df_line.iloc[0]['direction_id'])
        # Cas où la ligne a une bifurcation interne (2 terminus)
        if nb_routes == 2:
            
            first_list = []
            bottom_bifur = []
            top_bifur = []
            last_list = []
            
            first_done = False
            bottom_done = False
            
            for station in df_line.iterrows():
                if len(dataframe[dataframe.station_id == station[1]['station_id']]) == 2:
                    if first_done:
                        bottom_done = True
                        last_list.append((len(dataframe[dataframe.station_id == station[1]['station_id']]), station[1]['station_name'], station[1]['stop_sequence'], station[1]['route_id']))
                    else :
                        first_list.append((len(dataframe[dataframe.station_id == station[1]['station_id']]), station[1]['station_name'], station[1]['stop_sequence'], station[1]['route_id']))
                
                else:
                    first_done = True
                    if bottom_done:
                        top_bifur.append((len(dataframe[dataframe.station_id == station[1]['station_id']]), station[1]['station_name'], station[1]['stop_sequence'], station[1]['route_id']))
                    else:
                        bottom_bifur.append((len(dataframe[dataframe.station_id == station[1]['station_id']]), station[1]['station_name'], station[1]['stop_sequence'], station[1]['route_id']))

            return (first_list, bottom_bifur, list(reversed(top_bifur)), last_list)
           
    
        # Cas où la ligne a une bifurcation qui ne se rejoint pas (3 terminus)
        if (nb_routes == 4):
            ## TODO
            return None



In [11]:
get_stations(df_by_line['6'])

[('Nation', 0),
 ('Picpus', 0),
 ('Bel-Air', 0),
 ('Daumesnil Félix Eboué', 0),
 ('Dugommier', 0),
 ('Bercy', 0),
 ('Quai de la Gare', 0),
 ('Chevaleret', 0),
 ('Nationale', 0),
 ("Place d'Italie", 0),
 ('Corvisart', 0),
 ('Glacière', 0),
 ('Saint-Jacques', 0),
 ('Denfert-Rochereau', 0),
 ('Raspail', 0),
 ('Edgar-Quinet', 0),
 ('Montparnasse-Bienvenue', 0),
 ('Pasteur', 0),
 ('Sèvres-Lecourbe', 0),
 ('Cambronne', 0),
 ('La Motte-Picquet-Grenelle', 0),
 ('Dupleix', 0),
 ('Bir-Hakeim Grenelle', 0),
 ('Passy', 0),
 ('Trocadéro', 0),
 ('Boissière', 0),
 ('Kléber', 0),
 ('Charles de Gaulle-Etoile', 0)]

In [12]:
get_stations(df_by_line['7B'])

0


([(2, 'Pré-Saint-Gervais', 1, 656103)],
 [(1, 'Danube', 2, 656103)],
 [(1, 'Place des Fêtes', 6, 656104)],
 [(2, 'Botzaris', 3, 656103),
  (2, 'Buttes-Chaumont', 4, 656103),
  (2, 'Bolivar', 5, 656103),
  (2, 'Jaurès', 6, 656103),
  (2, 'Louis Blanc', 7, 656103)])