In [2]:
import requests
import numpy as np
import pandas as pd

# Configuration pour afficher toutes les colonnes
pd.set_option('display.max_columns', 50)

# Api Localisation and velib station caracteristics

Link: https://opendata.paris.fr/explore/dataset/velib-emplacement-des-stations/information/

In [3]:
api_url = "https://opendata.paris.fr/api/explore/v2.1/catalog/datasets/velib-emplacement-des-stations/records"
api_params = {"offset": 0, "limit": 1}
r= requests.get(url=api_url, params=api_params).json()
r

{'total_count': 1465,
 'results': [{'stationcode': '13007',
   'name': 'Le Brun - Gobelins',
   'capacity': 48,
   'coordonnees_geo': {'lon': 2.3534681351338, 'lat': 48.835092787824}}]}

In [4]:
r.keys()

dict_keys(['total_count', 'results'])

In [5]:
r["results"]

[{'stationcode': '13007',
  'name': 'Le Brun - Gobelins',
  'capacity': 48,
  'coordonnees_geo': {'lon': 2.3534681351338, 'lat': 48.835092787824}}]

In [6]:
r["results"][0]

{'stationcode': '13007',
 'name': 'Le Brun - Gobelins',
 'capacity': 48,
 'coordonnees_geo': {'lon': 2.3534681351338, 'lat': 48.835092787824}}

In [7]:
pd.DataFrame(r['results'])

Unnamed: 0,stationcode,name,capacity,coordonnees_geo
0,13007,Le Brun - Gobelins,48,"{'lon': 2.3534681351338, 'lat': 48.835092787824}"


In [8]:
api_url = "https://opendata.paris.fr/api/explore/v2.1/catalog/datasets/velib-emplacement-des-stations/records"
offset = 0
limit = 100  # Vous pouvez ajuster la limite en fonction de vos besoins

all_results = []

while True:
    params = {
        "offset": offset,
        "limit": limit
    }

    response = requests.get(url=api_url, params=params).json()

    if "results" not in response:
        break

    all_results.extend(response["results"])

    if len(response["results"]) < limit:
        break

    offset += limit

# Maintenant, all_results contient toutes les stations Velib
print(len(all_results))

1465


In [10]:
velib_stations = pd.DataFrame(all_results)
velib_stations

Unnamed: 0,stationcode,name,capacity,coordonnees_geo
0,13007,Le Brun - Gobelins,48,"{'lon': 2.3534681351338, 'lat': 48.835092787824}"
1,6021,Beaux-Arts - Bonaparte,20,"{'lon': 2.334851883351803, 'lat': 48.856451985..."
2,7004,Raspail - Varenne,18,"{'lon': 2.3263905197382, 'lat': 48.853147816545}"
3,44017,Jules Guesde - Pont du Port à l'Anglais,30,"{'lon': 2.417212128639221, 'lat': 48.796288070..."
4,17043,Belidor - Gouvion-Saint-Cyr,39,"{'lon': 2.2854680567979813, 'lat': 48.88022154..."
...,...,...,...,...
1460,21112,Claude Debussy - Marc Bloch,56,"{'lon': 2.3117771744728, 'lat': 48.910388180512}"
1461,8002,Gare Saint-Lazare - Cour du Havre,45,"{'lon': 2.3265598341823, 'lat': 48.875674400851}"
1462,31301,Gare RER,30,"{'lon': 2.4595758300992, 'lat': 48.896241412268}"
1463,18023,Ordener - Poissonniers,35,"{'lon': 2.351288666219873, 'lat': 48.891213890..."


In [11]:
# Separating the geographic coordinates into two separate columns
velib_stations['latitude'] = velib_stations['coordonnees_geo'].apply(lambda x: x['lat'])
velib_stations['longitude'] = velib_stations['coordonnees_geo'].apply(lambda x: x['lon'])
# Delete the column coordonnees_geo
velib_stations = velib_stations.drop('coordonnees_geo', axis=1)

In [12]:
velib_stations['name'] = velib_stations['name'].apply(lambda x: str.strip(x))

In [13]:
velib_stations.head()

Unnamed: 0,stationcode,name,capacity,latitude,longitude
0,13007,Le Brun - Gobelins,48,48.835093,2.353468
1,6021,Beaux-Arts - Bonaparte,20,48.856452,2.334852
2,7004,Raspail - Varenne,18,48.853148,2.326391
3,44017,Jules Guesde - Pont du Port à l'Anglais,30,48.796288,2.417212
4,17043,Belidor - Gouvion-Saint-Cyr,39,48.880222,2.285468


In [14]:
velib_stations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stationcode  1465 non-null   object 
 1   name         1465 non-null   object 
 2   capacity     1465 non-null   int64  
 3   latitude     1465 non-null   float64
 4   longitude    1465 non-null   float64
dtypes: float64(2), int64(1), object(2)
memory usage: 57.4+ KB


In [15]:
len(velib_stations.stationcode.unique())

1465

In [16]:
len(velib_stations.name.unique())

1463

In [17]:
# Identifier les doublons dans la colonne "col"
duplicates = velib_stations["name"].duplicated()

# Afficher les lignes contenant des doublons
duplicate_rows = velib_stations[duplicates]
duplicate_rows


Unnamed: 0,stationcode,name,capacity,latitude,longitude
988,25006,Place Nelson Mandela,22,48.862091,2.196576
1085,31706,Château - République,26,48.862924,2.415504


In [18]:
velib_stations.loc[988, "name"] == velib_stations.loc[1085, "name"]

False

# Api: Vélib - Bicycles and stations - Real-time availability

link: https://opendata.paris.fr/explore/dataset/velib-disponibilite-en-temps-reel/information/?disjunctive.name&disjunctive.is_installed&disjunctive.is_renting&disjunctive.is_returning&disjunctive.nom_arrondissement_communes

In [19]:
api_url_rt = "https://opendata.paris.fr/api/explore/v2.1/catalog/datasets/velib-disponibilite-en-temps-reel/records"
api_params = {"offset": 0, "limit": 1}
r= requests.get(url=api_url_rt, params=api_params).json()
r

{'total_count': 1465,
 'results': [{'stationcode': '16107',
   'name': 'Benjamin Godard - Victor Hugo',
   'is_installed': 'OUI',
   'capacity': 35,
   'numdocksavailable': 35,
   'numbikesavailable': 0,
   'mechanical': 0,
   'ebike': 0,
   'is_renting': 'OUI',
   'is_returning': 'OUI',
   'duedate': '2023-07-15T13:25:47+00:00',
   'coordonnees_geo': {'lon': 2.275725, 'lat': 48.865983},
   'nom_arrondissement_communes': 'Paris',
   'code_insee_commune': None}]}

In [20]:
api_url = "https://opendata.paris.fr/api/explore/v2.1/catalog/datasets/velib-disponibilite-en-temps-reel/records"
offset = 0
limit = 100

all_results = []

while True:
    params = {
        "offset": offset,
        "limit": limit
    }

    response = requests.get(url=api_url, params=params).json()

    if "results" not in response:
        break

    all_results.extend(response["results"])

    if len(response["results"]) < limit:
        break

    offset += limit

# Maintenant, all_results contient toutes les stations Velib
print(len(all_results))

1465


1. `stationcode - Identifiant station`: numéro unique d’identification de la station. Ce numéro identifie la station au sein du service Vélib’ Métropole
2. `name - Nom station`: nom de la station
3. `is_installed - Station en fonctionnement`: variable binaire indiquant si la station est. La station a déjà été déployée (OUI) ou est encore en cours de déploiement (NON)
4. `capacity - Capacité de la station`: nombre de bornettes dans la station
5. `numdocksavailable - Nombre bornettes libres`: nombre de bornettes disponibles
6. `numbikesavailable - Nombre total vélos disponibles`: nombre de vélos disponibles
7. `mechanical - Vélos mécaniques disponibles`:
8. `ebike - Vélos électriques disponibles`:
9. `is_renting - Borne de paiement disponible`: variable binaire indiquant si la station peut louer des vélos (is_renting=1 si le statut de la station est Operative)
10. `is_returning`: variable binaire indiquant si la station peut recevoir des vélos
11. `duedate - Actualisation de la donnée`:
12. `coordonnees_geo - Coordonnées géographiques`:
13. `nom_arrondissement_communes - Nom communes équipées`:
14. `code_insee_commune - Code INSEE communes équipées`

In [21]:
df = pd.DataFrame(all_results)
df.head()

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,coordonnees_geo,nom_arrondissement_communes,code_insee_commune
0,16107,Benjamin Godard - Victor Hugo,OUI,35,35,0,0,0,OUI,OUI,2023-07-15T13:25:47+00:00,"{'lon': 2.275725, 'lat': 48.865983}",Paris,
1,31104,Mairie de Rosny-sous-Bois,OUI,30,4,25,21,4,OUI,OUI,2023-07-15T13:37:51+00:00,"{'lon': 2.4865807592869, 'lat': 48.871256519012}",Rosny-sous-Bois,
2,30002,Jean Rostand - Paul Vaillant Couturier,NON,0,0,0,0,0,NON,NON,2023-03-29T11:35:57+00:00,"{'lon': 2.4530601033354, 'lat': 48.908168131015}",Bobigny,
3,14111,Cassini - Denfert-Rochereau,OUI,25,20,4,0,4,OUI,OUI,2023-07-15T13:37:06+00:00,"{'lon': 2.3360354080796, 'lat': 48.837525839067}",Paris,
4,11104,Charonne - Robert et Sonia Delaunay,OUI,20,12,7,2,5,OUI,OUI,2023-07-15T13:36:29+00:00,"{'lon': 2.3925706744194, 'lat': 48.855907555969}",Paris,


In [22]:
df[df.is_installed=="NON"]

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,coordonnees_geo,nom_arrondissement_communes,code_insee_commune
2,30002,Jean Rostand - Paul Vaillant Couturier,NON,0,0,0,0,0,NON,NON,2023-03-29T11:35:57+00:00,"{'lon': 2.4530601033354, 'lat': 48.908168131015}",Bobigny,
8,44015,Rouget de L'isle - Watteau,NON,0,0,0,0,0,NON,NON,2023-02-09T09:35:37+00:00,"{'lon': 2.3963020229163, 'lat': 48.778192750803}",Vitry-sur-Seine,
188,20120,Evariste Galois,NON,0,0,0,0,0,NON,NON,2023-06-15T14:09:38+00:00,"{'lon': 2.4133380129933, 'lat': 48.873087537345}",Paris,
289,23903,Stade Gaston Bouillant,NON,30,16,14,10,4,NON,NON,2023-07-15T13:36:40+00:00,"{'lon': 2.3274196014227, 'lat': 48.940273514964}",Villeneuve-la-Garenne,
290,8001,Petit Palais,NON,46,46,0,0,0,NON,NON,2023-07-15T13:33:43+00:00,"{'lon': 2.3157655, 'lat': 48.8667692}",Paris,
719,26010,Louis Armand - Bernard Palissy,NON,0,0,0,0,0,NON,NON,2023-01-25T14:17:38+00:00,"{'lon': 2.3124207955211, 'lat': 48.916925546419}",Asnières-sur-Seine,
725,21320,Président Roosevelt - Rouget de Lisle,NON,40,38,1,1,0,NON,NON,2021-07-28T07:10:05+00:00,"{'lon': 2.2616135525345, 'lat': 48.83047518749}",Issy-les-Moulineaux,
818,17203,Place de la Porte de Champeret,NON,0,0,0,0,0,NON,NON,2023-04-25T06:55:53+00:00,"{'lon': 2.2917001576821, 'lat': 48.886062068226}",Paris,
916,12201,Dugommier,NON,53,53,0,0,0,NON,NON,2023-07-15T13:37:56+00:00,"{'lon': 2.3896577954292, 'lat': 48.838811048815}",Paris,
1339,32605,Mairie,NON,0,0,0,0,0,NON,NON,2023-05-24T06:26:19+00:00,"{'lon': 2.4161527565572, 'lat': 48.879486942081}",Les Lilas,


In [23]:
# Separating the geographic coordinates into two separate columns
df['latitude'] = df['coordonnees_geo'].apply(lambda x: x['lat'])
df['longitude'] = df['coordonnees_geo'].apply(lambda x: x['lon'])
# Delete the column coordonnees_geo
df = df.drop('coordonnees_geo', axis=1)

In [24]:
df['name'] = df['name'].apply(lambda x: str.strip(x))

In [25]:
df['nom_arrondissement_communes'] = df['nom_arrondissement_communes'].apply(lambda x: str.strip(x))

In [26]:
# Affichage du DataFrame avec le style de formatage
df.head()

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,code_insee_commune,latitude,longitude
0,16107,Benjamin Godard - Victor Hugo,OUI,35,35,0,0,0,OUI,OUI,2023-07-15T13:25:47+00:00,Paris,,48.865983,2.275725
1,31104,Mairie de Rosny-sous-Bois,OUI,30,4,25,21,4,OUI,OUI,2023-07-15T13:37:51+00:00,Rosny-sous-Bois,,48.871257,2.486581
2,30002,Jean Rostand - Paul Vaillant Couturier,NON,0,0,0,0,0,NON,NON,2023-03-29T11:35:57+00:00,Bobigny,,48.908168,2.45306
3,14111,Cassini - Denfert-Rochereau,OUI,25,20,4,0,4,OUI,OUI,2023-07-15T13:37:06+00:00,Paris,,48.837526,2.336035
4,11104,Charonne - Robert et Sonia Delaunay,OUI,20,12,7,2,5,OUI,OUI,2023-07-15T13:36:29+00:00,Paris,,48.855908,2.392571


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   object 
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   object 
 9   is_returning                 1465 non-null   object 
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  code_insee_commune           0 non-null      object 
 13  latitude          

In [28]:
df.code_insee_commune.unique()

array([None], dtype=object)

In [29]:
df.drop('code_insee_commune', axis=1, inplace=True)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 14 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   object 
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   object 
 9   is_returning                 1465 non-null   object 
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  latitude                     1465 non-null   float64
 13  longitude         

In [31]:
# Extraction des colonnes "stationcode" dans deux séries
stationcode1 = velib_stations.stationcode
stationcode2 = df.stationcode

# Comparaison des ensembles de valeurs
if set(stationcode1) == set(stationcode2):
    print("Oui ! Les stations de Vélib sont les mêmes.")
else:
    print("Non! Les stations de Vélib ne sont pas les mêmes.")

Oui ! Les stations de Vélib sont les mêmes.


# API Velib Métropole

link: https://www.velib-metropole.fr/donnees-open-data-gbfs-du-service-velib-metropole

In [32]:
r = requests.get(url="https://velib-metropole-opendata.smoove.pro/opendata/Velib_Metropole/station_information.json",
                  params={"offset": 0, "limit":1})
r.json().keys()

dict_keys(['lastUpdatedOther', 'ttl', 'data'])

In [33]:
station_info = pd.DataFrame(r.json()['data']['stations'])
station_info.head()

Unnamed: 0,station_id,name,lat,lon,capacity,stationCode,rental_methods
0,213688169,Benjamin Godard - Victor Hugo,48.865983,2.275725,35,16107,
1,653222953,Mairie de Rosny-sous-Bois,48.871257,2.486581,30,31104,[CREDITCARD]
2,36255,Toudouze - Clauzel,48.879296,2.33736,21,9020,[CREDITCARD]
3,37815204,Mairie du 12ème,48.840855,2.387555,30,12109,
4,17486274358,Jean Rostand - Paul Vaillant Couturier,48.908168,2.45306,0,30002,[CREDITCARD]


In [34]:
# Renommage des colonnes "lat" et "lon" en "latitude" et "longitude"
station_info = station_info.rename(columns={"lat": "latitude", "lon": "longitude"})

In [35]:
station_info['name'] = station_info['name'].apply(lambda x: str.strip(x))

In [36]:
station_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   station_id      1465 non-null   int64  
 1   name            1465 non-null   object 
 2   latitude        1465 non-null   float64
 3   longitude       1465 non-null   float64
 4   capacity        1465 non-null   int64  
 5   stationCode     1465 non-null   object 
 6   rental_methods  853 non-null    object 
dtypes: float64(2), int64(2), object(3)
memory usage: 80.2+ KB


In [37]:
r = requests.get(url="https://velib-metropole-opendata.smoove.pro/opendata/Velib_Metropole/station_status.json",
                  params={"offset": 0, "limit":1})
r.json().keys()

dict_keys(['lastUpdatedOther', 'ttl', 'data'])

In [38]:
station_status = pd.DataFrame(r.json()['data']['stations'])
station_status.head()

Unnamed: 0,stationCode,station_id,num_bikes_available,numBikesAvailable,num_bikes_available_types,num_docks_available,numDocksAvailable,is_installed,is_returning,is_renting,last_reported
0,16107,213688169,0,0,"[{'mechanical': 0}, {'ebike': 0}]",35,35,1,1,1,1689427547
1,31104,653222953,25,25,"[{'mechanical': 21}, {'ebike': 4}]",4,4,1,1,1,1689428271
2,9020,36255,1,1,"[{'mechanical': 1}, {'ebike': 0}]",20,20,1,1,1,1689428135
3,12109,37815204,24,24,"[{'mechanical': 23}, {'ebike': 1}]",6,6,1,1,1,1689428253
4,30002,17486274358,0,0,"[{'mechanical': 0}, {'ebike': 0}]",0,0,0,0,0,1680089757


In [39]:
station_status['num_bikes_available_types'][0][0]

{'mechanical': 0}

# Merging and comparison

## velib station information

In [40]:
velib_stations.head(2)

Unnamed: 0,stationcode,name,capacity,latitude,longitude
0,13007,Le Brun - Gobelins,48,48.835093,2.353468
1,6021,Beaux-Arts - Bonaparte,20,48.856452,2.334852


In [44]:
velib_stations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stationcode  1465 non-null   object 
 1   name         1465 non-null   object 
 2   capacity     1465 non-null   int64  
 3   latitude     1465 non-null   float64
 4   longitude    1465 non-null   float64
dtypes: float64(2), int64(1), object(2)
memory usage: 57.4+ KB


In [45]:
station_info.head(2)

Unnamed: 0,station_id,name,latitude,longitude,capacity,stationCode,rental_methods
0,213688169,Benjamin Godard - Victor Hugo,48.865983,2.275725,35,16107,
1,653222953,Mairie de Rosny-sous-Bois,48.871257,2.486581,30,31104,[CREDITCARD]


In [46]:
station_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   station_id      1465 non-null   int64  
 1   name            1465 non-null   object 
 2   latitude        1465 non-null   float64
 3   longitude       1465 non-null   float64
 4   capacity        1465 non-null   int64  
 5   stationCode     1465 non-null   object 
 6   rental_methods  853 non-null    object 
dtypes: float64(2), int64(2), object(3)
memory usage: 80.2+ KB


In [47]:
# Fusion des DataFrames
merged_df = pd.merge(velib_stations, station_info, 
                     left_on=["stationcode", "name", "capacity", "latitude", "longitude"], 
                     right_on=["stationCode", "name", "capacity", "latitude", "longitude"])

# Affichage du DataFrame fusionné
merged_df

Unnamed: 0,stationcode,name,capacity,latitude,longitude,station_id,stationCode,rental_methods
0,13007,Le Brun - Gobelins,48,48.835093,2.353468,66491398,13007,[CREDITCARD]
1,6021,Beaux-Arts - Bonaparte,20,48.856452,2.334852,210405211,6021,[CREDITCARD]
2,7004,Raspail - Varenne,18,48.853148,2.326391,210567545,7004,
3,44017,Jules Guesde - Pont du Port à l'Anglais,30,48.796288,2.417212,499303158,44017,[CREDITCARD]
4,17043,Belidor - Gouvion-Saint-Cyr,39,48.880222,2.285468,80216231,17043,[CREDITCARD]
...,...,...,...,...,...,...,...,...
1460,21112,Claude Debussy - Marc Bloch,56,48.910388,2.311777,54000628,21112,
1461,8002,Gare Saint-Lazare - Cour du Havre,45,48.875674,2.326560,394404659,8002,[CREDITCARD]
1462,31301,Gare RER,30,48.896241,2.459576,13373801574,31301,[CREDITCARD]
1463,18023,Ordener - Poissonniers,35,48.891214,2.351289,54000604,18023,[CREDITCARD]


In [48]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   stationcode     1465 non-null   object 
 1   name            1465 non-null   object 
 2   capacity        1465 non-null   int64  
 3   latitude        1465 non-null   float64
 4   longitude       1465 non-null   float64
 5   station_id      1465 non-null   int64  
 6   stationCode     1465 non-null   object 
 7   rental_methods  853 non-null    object 
dtypes: float64(2), int64(2), object(4)
memory usage: 91.7+ KB


In [49]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   stationcode     1465 non-null   object 
 1   name            1465 non-null   object 
 2   capacity        1465 non-null   int64  
 3   latitude        1465 non-null   float64
 4   longitude       1465 non-null   float64
 5   station_id      1465 non-null   int64  
 6   stationCode     1465 non-null   object 
 7   rental_methods  853 non-null    object 
dtypes: float64(2), int64(2), object(4)
memory usage: 91.7+ KB


In [50]:
merged_df.drop("rental_methods", axis=1, inplace=True)

In [51]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stationcode  1465 non-null   object 
 1   name         1465 non-null   object 
 2   capacity     1465 non-null   int64  
 3   latitude     1465 non-null   float64
 4   longitude    1465 non-null   float64
 5   station_id   1465 non-null   int64  
 6   stationCode  1465 non-null   object 
dtypes: float64(2), int64(2), object(3)
memory usage: 80.2+ KB


## Velib station status 

In [52]:
df.head(2)

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,latitude,longitude
0,16107,Benjamin Godard - Victor Hugo,OUI,35,35,0,0,0,OUI,OUI,2023-07-15T13:25:47+00:00,Paris,48.865983,2.275725
1,31104,Mairie de Rosny-sous-Bois,OUI,30,4,25,21,4,OUI,OUI,2023-07-15T13:37:51+00:00,Rosny-sous-Bois,48.871257,2.486581


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 14 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   object 
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   object 
 9   is_returning                 1465 non-null   object 
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  latitude                     1465 non-null   float64
 13  longitude         

In [54]:
station_status.head(2)

Unnamed: 0,stationCode,station_id,num_bikes_available,numBikesAvailable,num_bikes_available_types,num_docks_available,numDocksAvailable,is_installed,is_returning,is_renting,last_reported
0,16107,213688169,0,0,"[{'mechanical': 0}, {'ebike': 0}]",35,35,1,1,1,1689427547
1,31104,653222953,25,25,"[{'mechanical': 21}, {'ebike': 4}]",4,4,1,1,1,1689428271


In [55]:
station_status.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 11 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   stationCode                1465 non-null   object
 1   station_id                 1465 non-null   int64 
 2   num_bikes_available        1465 non-null   int64 
 3   numBikesAvailable          1465 non-null   int64 
 4   num_bikes_available_types  1465 non-null   object
 5   num_docks_available        1465 non-null   int64 
 6   numDocksAvailable          1465 non-null   int64 
 7   is_installed               1465 non-null   int64 
 8   is_returning               1465 non-null   int64 
 9   is_renting                 1465 non-null   int64 
 10  last_reported              1465 non-null   int64 
dtypes: int64(9), object(2)
memory usage: 126.0+ KB


In [56]:
# Modification de l'encodage des colonnes binaires en 0 et 1 avec apply
df[["is_installed", "is_returning", "is_renting"]] = df[["is_installed", "is_returning", "is_renting"]].apply(lambda x: x.map({"OUI": 1, "NON": 0}))

In [57]:
df.head(2)

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,latitude,longitude
0,16107,Benjamin Godard - Victor Hugo,1,35,35,0,0,0,1,1,2023-07-15T13:25:47+00:00,Paris,48.865983,2.275725
1,31104,Mairie de Rosny-sous-Bois,1,30,4,25,21,4,1,1,2023-07-15T13:37:51+00:00,Rosny-sous-Bois,48.871257,2.486581


In [58]:
merged_station_status = pd.merge(df, station_status, 
                     left_on=["stationcode", "is_installed", "is_returning", "is_renting"], 
                     right_on=["stationCode", "is_installed", "is_returning", "is_renting"])


In [59]:
merged_station_status.head(2)

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,latitude,longitude,stationCode,station_id,num_bikes_available,numBikesAvailable,num_bikes_available_types,num_docks_available,numDocksAvailable,last_reported
0,16107,Benjamin Godard - Victor Hugo,1,35,35,0,0,0,1,1,2023-07-15T13:25:47+00:00,Paris,48.865983,2.275725,16107,213688169,0,0,"[{'mechanical': 0}, {'ebike': 0}]",35,35,1689427547
1,31104,Mairie de Rosny-sous-Bois,1,30,4,25,21,4,1,1,2023-07-15T13:37:51+00:00,Rosny-sous-Bois,48.871257,2.486581,31104,653222953,25,25,"[{'mechanical': 21}, {'ebike': 4}]",4,4,1689428271


In [60]:
merged_station_status.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   int64  
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   int64  
 9   is_returning                 1465 non-null   int64  
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  latitude                     1465 non-null   float64
 13  longitude         

In [61]:
merged_station_status.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   int64  
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   int64  
 9   is_returning                 1465 non-null   int64  
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  latitude                     1465 non-null   float64
 13  longitude         

# Summary of available data

In [62]:
merged_df.head(2)

Unnamed: 0,stationcode,name,capacity,latitude,longitude,station_id,stationCode
0,13007,Le Brun - Gobelins,48,48.835093,2.353468,66491398,13007
1,6021,Beaux-Arts - Bonaparte,20,48.856452,2.334852,210405211,6021


In [64]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stationcode  1465 non-null   object 
 1   name         1465 non-null   object 
 2   capacity     1465 non-null   int64  
 3   latitude     1465 non-null   float64
 4   longitude    1465 non-null   float64
 5   station_id   1465 non-null   int64  
 6   stationCode  1465 non-null   object 
dtypes: float64(2), int64(2), object(3)
memory usage: 80.2+ KB


In [65]:
merged_station_status.head(2)

Unnamed: 0,stationcode,name,is_installed,capacity,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,latitude,longitude,stationCode,station_id,num_bikes_available,numBikesAvailable,num_bikes_available_types,num_docks_available,numDocksAvailable,last_reported
0,16107,Benjamin Godard - Victor Hugo,1,35,35,0,0,0,1,1,2023-07-15T13:25:47+00:00,Paris,48.865983,2.275725,16107,213688169,0,0,"[{'mechanical': 0}, {'ebike': 0}]",35,35,1689427547
1,31104,Mairie de Rosny-sous-Bois,1,30,4,25,21,4,1,1,2023-07-15T13:37:51+00:00,Rosny-sous-Bois,48.871257,2.486581,31104,653222953,25,25,"[{'mechanical': 21}, {'ebike': 4}]",4,4,1689428271


In [66]:
merged_station_status.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   is_installed                 1465 non-null   int64  
 3   capacity                     1465 non-null   int64  
 4   numdocksavailable            1465 non-null   int64  
 5   numbikesavailable            1465 non-null   int64  
 6   mechanical                   1465 non-null   int64  
 7   ebike                        1465 non-null   int64  
 8   is_renting                   1465 non-null   int64  
 9   is_returning                 1465 non-null   int64  
 10  duedate                      1465 non-null   object 
 11  nom_arrondissement_communes  1465 non-null   object 
 12  latitude                     1465 non-null   float64
 13  longitude         

In [67]:
final_merged = pd.merge(merged_df, merged_station_status,
                        left_on=["stationcode", "stationCode", "capacity", "name", "latitude", "longitude", "station_id"], 
                        right_on=["stationcode", "stationCode",  "capacity", "name", "latitude", "longitude", "station_id"])
final_merged.head()

Unnamed: 0,stationcode,name,capacity,latitude,longitude,station_id,stationCode,is_installed,numdocksavailable,numbikesavailable,mechanical,ebike,is_renting,is_returning,duedate,nom_arrondissement_communes,num_bikes_available,numBikesAvailable,num_bikes_available_types,num_docks_available,numDocksAvailable,last_reported
0,13007,Le Brun - Gobelins,48,48.835093,2.353468,66491398,13007,1,27,20,15,5,1,1,2023-07-15T13:36:25+00:00,Paris,19,19,"[{'mechanical': 15}, {'ebike': 4}]",28,28,1689428299
1,6021,Beaux-Arts - Bonaparte,20,48.856452,2.334852,210405211,6021,1,10,10,4,6,1,1,2023-07-15T13:36:11+00:00,Paris,10,10,"[{'mechanical': 4}, {'ebike': 6}]",10,10,1689428171
2,7004,Raspail - Varenne,18,48.853148,2.326391,210567545,7004,1,15,0,0,0,1,1,2023-07-15T13:34:44+00:00,Paris,0,0,"[{'mechanical': 0}, {'ebike': 0}]",15,15,1689428084
3,44017,Jules Guesde - Pont du Port à l'Anglais,30,48.796288,2.417212,499303158,44017,1,2,27,19,8,1,1,2023-07-15T13:38:14+00:00,Vitry-sur-Seine,27,27,"[{'mechanical': 19}, {'ebike': 8}]",2,2,1689428294
4,17043,Belidor - Gouvion-Saint-Cyr,39,48.880222,2.285468,80216231,17043,1,31,7,4,3,1,1,2023-07-15T13:32:32+00:00,Paris,7,7,"[{'mechanical': 4}, {'ebike': 3}]",31,31,1689427952


In [68]:
final_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationcode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   capacity                     1465 non-null   int64  
 3   latitude                     1465 non-null   float64
 4   longitude                    1465 non-null   float64
 5   station_id                   1465 non-null   int64  
 6   stationCode                  1465 non-null   object 
 7   is_installed                 1465 non-null   int64  
 8   numdocksavailable            1465 non-null   int64  
 9   numbikesavailable            1465 non-null   int64  
 10  mechanical                   1465 non-null   int64  
 11  ebike                        1465 non-null   int64  
 12  is_renting                   1465 non-null   int64  
 13  is_returning      

In [64]:
final_merged.columns

Index(['stationcode', 'name', 'capacity', 'latitude', 'longitude',
       'station_id', 'stationCode', 'rental_methods', 'is_installed',
       'numdocksavailable', 'numbikesavailable', 'mechanical', 'ebike',
       'is_renting', 'is_returning', 'duedate', 'nom_arrondissement_communes',
       'num_bikes_available', 'numBikesAvailable', 'num_bikes_available_types',
       'num_docks_available', 'numDocksAvailable', 'last_reported'],
      dtype='object')

In [71]:
# Liste des noms de colonnes dans le nouvel ordre souhaité
new_column_order = ['stationCode', 'name', 'nom_arrondissement_communes', 
                    'capacity', 'latitude', 'longitude',
        'is_installed', 'is_renting', 'is_returning',
       'numdocksavailable','num_docks_available', 'numbikesavailable','num_bikes_available',
                    'mechanical', 'ebike', 'num_bikes_available_types',
        'duedate','last_reported', ]

# Réorganisation de l'ordre des colonnes
final_merged_reoder = final_merged.reindex(columns=new_column_order)


In [72]:
final_merged_reoder.head()

Unnamed: 0,stationCode,name,nom_arrondissement_communes,capacity,latitude,longitude,is_installed,is_renting,is_returning,numdocksavailable,num_docks_available,numbikesavailable,num_bikes_available,mechanical,ebike,num_bikes_available_types,duedate,last_reported
0,13007,Le Brun - Gobelins,Paris,48,48.835093,2.353468,1,1,1,27,28,20,19,15,5,"[{'mechanical': 15}, {'ebike': 4}]",2023-07-15T13:36:25+00:00,1689428299
1,6021,Beaux-Arts - Bonaparte,Paris,20,48.856452,2.334852,1,1,1,10,10,10,10,4,6,"[{'mechanical': 4}, {'ebike': 6}]",2023-07-15T13:36:11+00:00,1689428171
2,7004,Raspail - Varenne,Paris,18,48.853148,2.326391,1,1,1,15,15,0,0,0,0,"[{'mechanical': 0}, {'ebike': 0}]",2023-07-15T13:34:44+00:00,1689428084
3,44017,Jules Guesde - Pont du Port à l'Anglais,Vitry-sur-Seine,30,48.796288,2.417212,1,1,1,2,2,27,27,19,8,"[{'mechanical': 19}, {'ebike': 8}]",2023-07-15T13:38:14+00:00,1689428294
4,17043,Belidor - Gouvion-Saint-Cyr,Paris,39,48.880222,2.285468,1,1,1,31,31,7,7,4,3,"[{'mechanical': 4}, {'ebike': 3}]",2023-07-15T13:32:32+00:00,1689427952


In [73]:
final_merged_reoder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1465 entries, 0 to 1464
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   stationCode                  1465 non-null   object 
 1   name                         1465 non-null   object 
 2   nom_arrondissement_communes  1465 non-null   object 
 3   capacity                     1465 non-null   int64  
 4   latitude                     1465 non-null   float64
 5   longitude                    1465 non-null   float64
 6   is_installed                 1465 non-null   int64  
 7   is_renting                   1465 non-null   int64  
 8   is_returning                 1465 non-null   int64  
 9   numdocksavailable            1465 non-null   int64  
 10  num_docks_available          1465 non-null   int64  
 11  numbikesavailable            1465 non-null   int64  
 12  num_bikes_available          1465 non-null   int64  
 13  mechanical        

In [74]:
final_merged_reoder[["duedate", "last_reported"]]

Unnamed: 0,duedate,last_reported
0,2023-07-15T13:36:25+00:00,1689428299
1,2023-07-15T13:36:11+00:00,1689428171
2,2023-07-15T13:34:44+00:00,1689428084
3,2023-07-15T13:38:14+00:00,1689428294
4,2023-07-15T13:32:32+00:00,1689427952
...,...,...
1460,2023-07-15T13:36:39+00:00,1689428199
1461,2023-07-15T13:38:06+00:00,1689428302
1462,2023-07-15T13:37:59+00:00,1689428279
1463,2023-07-15T13:36:30+00:00,1689428190


In [75]:
final_merged_reoder["duedate"] =  pd.to_datetime(final_merged_reoder["duedate"])

In [76]:
type(final_merged_reoder["duedate"])

pandas.core.series.Series

In [77]:
final_merged_reoder["duedate"][0]

Timestamp('2023-07-15 13:36:25+0000', tz='UTC')

In [78]:
final_merged_reoder["duedate"][0].hour

13

In [79]:
final_merged_reoder["duedate"][0].minute

36

In [80]:
final_merged_reoder["year"] = final_merged_reoder["duedate"].dt.year
final_merged_reoder["month"] = final_merged_reoder["duedate"].dt.month
final_merged_reoder["day"] = final_merged_reoder["duedate"].dt.day

In [81]:
final_merged_reoder["hour"] = final_merged_reoder["duedate"].dt.hour
final_merged_reoder["minute"] = final_merged_reoder["duedate"].dt.minute
final_merged_reoder["second"] = final_merged_reoder["duedate"].dt.second

In [82]:
final_merged_reoder[["duedate", "year", "month", "day", "hour", "minute" ,"second"]]

Unnamed: 0,duedate,year,month,day,hour,minute,second
0,2023-07-15 13:36:25+00:00,2023,7,15,13,36,25
1,2023-07-15 13:36:11+00:00,2023,7,15,13,36,11
2,2023-07-15 13:34:44+00:00,2023,7,15,13,34,44
3,2023-07-15 13:38:14+00:00,2023,7,15,13,38,14
4,2023-07-15 13:32:32+00:00,2023,7,15,13,32,32
...,...,...,...,...,...,...,...
1460,2023-07-15 13:36:39+00:00,2023,7,15,13,36,39
1461,2023-07-15 13:38:06+00:00,2023,7,15,13,38,6
1462,2023-07-15 13:37:59+00:00,2023,7,15,13,37,59
1463,2023-07-15 13:36:30+00:00,2023,7,15,13,36,30


In [83]:
final_merged_reoder.head()

Unnamed: 0,stationCode,name,nom_arrondissement_communes,capacity,latitude,longitude,is_installed,is_renting,is_returning,numdocksavailable,num_docks_available,numbikesavailable,num_bikes_available,mechanical,ebike,num_bikes_available_types,duedate,last_reported,year,month,day,hour,minute,second
0,13007,Le Brun - Gobelins,Paris,48,48.835093,2.353468,1,1,1,27,28,20,19,15,5,"[{'mechanical': 15}, {'ebike': 4}]",2023-07-15 13:36:25+00:00,1689428299,2023,7,15,13,36,25
1,6021,Beaux-Arts - Bonaparte,Paris,20,48.856452,2.334852,1,1,1,10,10,10,10,4,6,"[{'mechanical': 4}, {'ebike': 6}]",2023-07-15 13:36:11+00:00,1689428171,2023,7,15,13,36,11
2,7004,Raspail - Varenne,Paris,18,48.853148,2.326391,1,1,1,15,15,0,0,0,0,"[{'mechanical': 0}, {'ebike': 0}]",2023-07-15 13:34:44+00:00,1689428084,2023,7,15,13,34,44
3,44017,Jules Guesde - Pont du Port à l'Anglais,Vitry-sur-Seine,30,48.796288,2.417212,1,1,1,2,2,27,27,19,8,"[{'mechanical': 19}, {'ebike': 8}]",2023-07-15 13:38:14+00:00,1689428294,2023,7,15,13,38,14
4,17043,Belidor - Gouvion-Saint-Cyr,Paris,39,48.880222,2.285468,1,1,1,31,31,7,7,4,3,"[{'mechanical': 4}, {'ebike': 3}]",2023-07-15 13:32:32+00:00,1689427952,2023,7,15,13,32,32


# Analyse

In [105]:
final_merged_reoder["availability_rate"] = (final_merged_reoder["numbikesavailable"]/final_merged_reoder["capacity"])*100

In [94]:
final_merged_reoder["verif_capa_"]= final_merged_reoder["num_docks_available"] + final_merged_reoder["num_bikes_available"]
final_merged_reoder["verif_capa"]= final_merged_reoder["numdocksavailable"] + final_merged_reoder["numbikesavailable"]

In [95]:
final_merged_reoder.columns

Index(['stationCode', 'name', 'nom_arrondissement_communes', 'capacity',
       'latitude', 'longitude', 'is_installed', 'is_renting', 'is_returning',
       'numdocksavailable', 'num_docks_available', 'numbikesavailable',
       'num_bikes_available', 'mechanical', 'ebike',
       'num_bikes_available_types', 'duedate', 'last_reported', 'year',
       'month', 'day', 'hour', 'minute', 'second', 'verif_capa',
       'verif_capa_'],
      dtype='object')

In [106]:
verif = final_merged_reoder[["stationCode", "capacity", "verif_capa_", "verif_capa",
                             "numdocksavailable", "num_docks_available", "numbikesavailable", "num_bikes_available",
                            "availability_rate"]]
verif

Unnamed: 0,stationCode,capacity,verif_capa_,verif_capa,numdocksavailable,num_docks_available,numbikesavailable,num_bikes_available,availability_rate
0,13007,48,47,47,27,28,20,19,41.666667
1,6021,20,20,20,10,10,10,10,50.000000
2,7004,18,15,15,15,15,0,0,0.000000
3,44017,30,29,29,2,2,27,27,90.000000
4,17043,39,38,38,31,31,7,7,17.948718
...,...,...,...,...,...,...,...,...,...
1460,21112,56,56,56,50,50,6,6,10.714286
1461,8002,45,44,44,5,5,39,39,86.666667
1462,31301,30,30,30,5,5,25,25,83.333333
1463,18023,35,34,34,30,31,4,3,11.428571


In [98]:
var1 = "verif_capa_"
var2 = "verif_capa"
mask = (verif[var1] == verif[var2]) == False
verif.loc[mask, ["capacity", var1, var2] ]

Unnamed: 0,capacity,verif_capa_,verif_capa
1017,37,36,35
1239,30,25,27


In [None]:
final_merged_reoder[["stationCode", "capacity", "verif_capa", "verif_capa_",]]