# CityBikes


In [1]:
# Standard Library Imports
import os
import re
import json
from pprint import pprint

# Third-Party Imports
import folium
import requests
import pandas as pd
from haversine import haversine, Unit
from geopy.geocoders import Nominatim
from pandas import json_normalize


In [2]:
# Constants
API_BASE_URL = "http://api.citybik.es/v2"
NETWORK_ID = 'velib'
NETWORKS_PATH = '../data/raw_networks.csv'
STATIONS_PATH = '../data/raw_stations.csv'

# Function to fetch JSON data from a URL
def fetch_json_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        content_type = response.headers.get('content-type', '').lower()
        if 'application/json' in content_type:
            return response.json()
        else:
            print(f"Invalid content type: {content_type}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON response from {url}: {e}")
        return None
    

# Function to recursively collect all keys and their data types from a JSON-like object
def get_all_keys_with_types(obj, keys_with_types, parent_key=''):
    if isinstance(obj, dict):
        for key, value in obj.items():
            full_key = f"{parent_key}.{key}" if parent_key else key
            keys_with_types[full_key] = type(value).__name__
            get_all_keys_with_types(value, keys_with_types, full_key)
    elif isinstance(obj, list):
        for item in obj:
            get_all_keys_with_types(item, keys_with_types, parent_key)


# Function to process network data and save to CSV
def process_network_data(data):
    if data:
        all_keys_with_types = {}
        get_all_keys_with_types(data, all_keys_with_types)

        print("Keys and Types for {url}:")
        for key, data_type in sorted(all_keys_with_types.items()):
            print(f"{key}: {data_type}")

        total_networks = len(data.get("networks", []))
        print(f"Total number of networks: {total_networks}")

        # Flatten the JSON data
        flattened_data = []
        for network in data.get("networks", []):
            # Flatten each network entry and append to the list
            flattened_data.append(json_normalize(network))

        # Concatenate all flattened data into a single DataFrame
        df = pd.concat(flattened_data, ignore_index=True)

        # Save station network to a CSV file
        df.to_csv(NETWORKS_PATH, index=False)


# Function to process station data
def process_station_data(data):
    if data:
        all_keys_with_types = {}
        get_all_keys_with_types(data, all_keys_with_types)

        print("Keys and Types for {url}:")
        for key, data_type in sorted(all_keys_with_types.items()):
            print(f"{key}: {data_type}")

        # Extract station data from the JSON
        station_data = data.get("network", {}).get("stations", [])
        total_stations = len(station_data)
        print(f"Total number of stations: {total_stations}")

        # Flatten the JSON station data
        flattened_data = []
        for station in station_data:
            flattened_data.append(json_normalize(station))

        # Concatenate all flattened data into a single DataFrame
        df = pd.concat(flattened_data, ignore_index=True)

        # Save station data to a CSV file
        df.to_csv(STATIONS_PATH, index=False)


# Main function
def main():
    urls = [
        f"{API_BASE_URL}/networks",
        f"{API_BASE_URL}/networks/{NETWORK_ID}"
    ]

    for url in urls:
        print(f"Fetching data from {url}")
        data = fetch_json_data(url)

        if url == f"{API_BASE_URL}/networks":
            process_network_data(data)
        elif url == f"{API_BASE_URL}/networks/{NETWORK_ID}":
            process_station_data(data)

if __name__ == "__main__":
    main()


Fetching data from http://api.citybik.es/v2/networks
Keys and Types for {url}:
networks: list
networks.company: list
networks.ebikes: bool
networks.gbfs_href: str
networks.href: str
networks.id: str
networks.license: dict
networks.license.name: str
networks.license.url: str
networks.location: dict
networks.location.city: str
networks.location.country: str
networks.location.latitude: float
networks.location.longitude: float
networks.name: str
networks.source: str
Total number of networks: 685
Fetching data from http://api.citybik.es/v2/networks/velib
Keys and Types for {url}:
network: dict
network.company: list
network.ebikes: bool
network.gbfs_href: str
network.href: str
network.id: str
network.license: dict
network.license.name: str
network.license.url: str
network.location: dict
network.location.city: str
network.location.country: str
network.location.latitude: float
network.location.longitude: float
network.name: str
network.stations: list
network.stations.empty_slots: int
network.s

Parse through the response to get the details you want for the bike stations in that city (latitude, longitude, number of bikes).


In [3]:
# API request
stations_url = "http://api.citybik.es/v2/networks/velib"
stations_response = requests.get(stations_url)
stations_data = json.loads(stations_response.text)

# Data parsing
stations_parsed_data = []
for info in stations_data['network']['stations']:
    stations_info_data = {
        "name": info['name'],
        "latitude": info['latitude'],
        "longitude": info['longitude'],
        "free_bikes": info['free_bikes'],
        "empty_slots": info['empty_slots'],
        "total_bikes": info['empty_slots'] + info['free_bikes'],
        "usage_percentage": info['empty_slots'] / (info['empty_slots'] + info['free_bikes']) if (info['empty_slots'] + info['free_bikes']) != 0 else 0
    }
    stations_parsed_data.append(stations_info_data)


Put your parsed results into a DataFrame.


In [4]:
# DataFrame creation
stations_df = pd.DataFrame(stations_parsed_data)


Filter stations based on API limits


In [5]:
# Define broader latitude and longitude bounds
lat_min, lat_max = 48.85, 48.88  # Broader range for latitude to include both landmarks
lon_min, lon_max = 2.29, 2.35    # Broader range for longitude to include both landmarks


# Assuming df is your DataFrame with 'latitude' and 'longitude' columns

# Filter the DataFrame based on the defined bounds
filtered_df = stations_df[(stations_df['latitude'] >= lat_min) & (stations_df['latitude'] <= lat_max) &
                 (stations_df['longitude'] >= lon_min) & (stations_df['longitude'] <= lon_max)]

# Proceed with further processing on filtered_df


Filtered by Radius

In [6]:
# Central point (Notre-Dame Cathedral)
center_lat, center_lon = 48.8606, 2.3376  # Coordinates of the Louvre


# Degree changes for 2 km radius
radius_lat = 0.015  # Approximate radius in degrees latitude
radius_lon = 0.020  # Approximate radius in degrees longitude

# Calculate new bounds
lat_min = center_lat - radius_lat
lat_max = center_lat + radius_lat
lon_min = center_lon - radius_lon
lon_max = center_lon + radius_lon

# Assuming df is your DataFrame with 'latitude' and 'longitude' columns

# Filter the DataFrame based on the defined bounds
filtered_df = stations_df[(stations_df['latitude'] >= lat_min) & (stations_df['latitude'] <= lat_max) &
                 (stations_df['longitude'] >= lon_min) & (stations_df['longitude'] <= lon_max)]

# Proceed with further processing on filtered_df


Save to CSV


In [7]:
stations_df.to_csv('stations.csv', index=False)


Descriptive Statistics


In [8]:
print(stations_df.describe())


          latitude    longitude   free_bikes  empty_slots  total_bikes  \
count  1461.000000  1461.000000  1461.000000  1461.000000  1461.000000   
mean     48.858293     2.341017    12.110883    18.320329    30.431211   
std       0.030746     0.056697    10.295964    11.620249    11.848020   
min      48.764615     2.165597     0.000000     0.000000     0.000000   
25%      48.837668     2.302569     4.000000    10.000000    22.000000   
50%      48.858463     2.343670     9.000000    17.000000    28.000000   
75%      48.879398     2.378317    17.000000    25.000000    36.000000   
max      48.951432     2.538242    65.000000    67.000000    73.000000   

       usage_percentage  
count       1461.000000  
mean           0.594895  
std            0.286679  
min            0.000000  
25%            0.379310  
50%            0.666667  
75%            0.833333  
max            1.000000  


Checking data integrity and data types


In [9]:
print(stations_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              1461 non-null   object 
 1   latitude          1461 non-null   float64
 2   longitude         1461 non-null   float64
 3   free_bikes        1461 non-null   int64  
 4   empty_slots       1461 non-null   int64  
 5   total_bikes       1461 non-null   int64  
 6   usage_percentage  1461 non-null   float64
dtypes: float64(3), int64(3), object(1)
memory usage: 80.0+ KB
None


Data Cleaning. Checking for duplicates and Null Values.


In [10]:
print(stations_df.duplicated().sum())
print(stations_df.isnull().sum())


0
name                0
latitude            0
longitude           0
free_bikes          0
empty_slots         0
total_bikes         0
usage_percentage    0
dtype: int64


## Visualization

In [11]:
import folium

# Create a map centered around the approximate center of the expanded bounds
map_center = [(lat_min + lat_max) / 2, (lon_min + lon_max) / 2]
map_folium = folium.Map(location=map_center, zoom_start=14)

# Adding markers for each bike station in the filtered DataFrame
for index, row in stations_df.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['name'],
        icon=folium.Icon(icon='bicycle', color='blue')
    ).add_to(map_folium)

# Display the map
map_folium



### Initial Process

#### Interactice Search

In [12]:
import requests

# Function to fetch and filter data
def search_bike_networks(query):
    url = "http://api.citybik.es/v2/networks"
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for non-200 status codes

    networks = response.json()['networks']

    query = query.lower()
    filtered_networks = [
        network for network in networks
        if query in network['location']['city'].lower() or
           query in network['location']['country'].lower() or
           query in network['id'].lower()
    ]

    return filtered_networks

# Get user input and display results
query_input = input("Enter city, country code, or company ID: ").strip()

if query_input:
    search_results = search_bike_networks(query_input)

    # Print the total number of results
    total_results = len(search_results)
    print(f"Total number of results for '{query_input}': {total_results}")

    # Print the filtered results
    for network in search_results:
        print(f"Network ID: {network['id']}")
        location = f"{network['location']['city']}, {network['location']['country']}"
        print(f"Location: {location}")
        companies = ', '.join(network['company'])
        print(f"Company: {companies}")
        print()
else:
    print("No query provided.")



Total number of results for 'paris': 2
Network ID: velib
Location: Paris, FR
Company: Smovengo

Network ID: saclay-captainbike
Location: Paris-Saclay, FR
Company: Ecovelo

