In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

# 2.0 – Shared Mobility API Collection (CROW, Cargoroo, OV-fiets)



This notebook focuses on fetching and processing data from various APIs and sources related to shared mobility in Utrecht. The goal is to analyze the availability and distribution of shared mobility services such as bicycles, cargo bikes, and other vehicles within a 50 km radius of Utrecht. The steps include:

1. **Data Collection**:
    - Fetching data from APIs (e.g., CROW API, Cargoroo API) and scraping relevant information.
        - **CROW API**: [https://www.crow.nl/](https://www.crow.nl/)
        - **Cargoroo API**: [https://cargoroo.nl/cargoroo-cities/](https://cargoroo.nl/cargoroo-cities/)
        - **OV Fiets**: Data scraped from NS OV Fiets website.
    - Loading data from JSON and CSV files.

2. **Data Processing**:
    - Filtering vehicles based on their proximity to Utrecht's center.
    - Extracting and organizing relevant attributes such as form factor, latitude, longitude, and system ID.

3. **Data Storage**:
    - Saving the processed data into CSV files for further analysis in QGIS.

4. **Data Analysis**:
    - Aggregating and summarizing the data to understand the distribution of shared mobility services.
    - Identifying duplicates and missing values in datasets.

5. **24-Hour Data Collection**:
    - Implementing a script to fetch and store data at regular intervals over a 24-hour period to analyze temporal patterns in shared mobility availability.

This notebook provides a comprehensive workflow for analyzing shared mobility data in Utrecht, enabling insights into the availability and usage of these services.

### 1.0 **Data Collection** - CROW API

In [None]:
import json
import csv
import os
from datetime import date
from geopy.distance import geodesic

# Set the date for file naming
Date = date.today().strftime("%d-%m-%Y")

# Set the file paths
from config import CROW_JSON_PATH, CSV_VEHICLE_LOCATIONS_DIR

input_path = CROW_JSON_PATH
output_dir = os.path.join(CSV_VEHICLE_LOCATIONS_DIR, Date)
os.makedirs(output_dir, exist_ok=True)

# Load the JSON data
try:
    with open(input_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
        vehicles = data.get('vehicles_in_public_space', [])
except (FileNotFoundError, json.JSONDecodeError) as e:
    print(f"Error loading JSON data: {e}")
    exit()

# Utrecht center coordinates
utrecht_center = (52.091983, 5.120756)  # Latitude and longitude of Utrecht center
radius_km = 50  # Radius in kilometers

# Define the CSV file path
csv_path = os.path.join(output_dir, f"CROW_locations_{Date}.csv")

# Write to CSV file
try:
    with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write the header
        header = ['form_factor', 'latitude', 'longitude', 'system_id']
        csv_writer.writerow(header)

        for vehicle in vehicles:
            try:
                # Extract data
                form_factor = vehicle.get('form_factor', 'Unknown')
                latitude = vehicle.get('location', {}).get('latitude', 'Unknown')
                longitude = vehicle.get('location', {}).get('longitude', 'Unknown')
                system_id = vehicle.get('system_id', 'Unknown')

                # Check if coordinates are valid
                if latitude == 'Unknown' or longitude == 'Unknown':
                    continue

                # Calculate the distance from Utrecht center
                vehicle_coords = (latitude, longitude)
                distance = geodesic(utrecht_center, vehicle_coords).km

                # Filter by 50 km radius
                if distance <= radius_km:
                    # Write row to CSV
                    csv_writer.writerow([form_factor, latitude, longitude, system_id])
            except Exception as e:
                print(f"Error processing vehicle: {e}")

    print(f"Data successfully saved to {csv_path}")
except Exception as e:
    print(f"Error writing CSV file: {e}")


In [4]:
from datetime import date

# Get the current date to match your file naming format
Date = date.today().strftime("%d-%m-%Y")

# Define the file path
csv_path = os.path.join(config['data_dir'], "1.1 - raw", "CSV_vehicle_locations", Date, f"CROW_locations_{Date}.csv")

# Load the CSV file into a pandas DataFrame
df_crow = pd.read_csv(csv_path)

# Display the first few rows to check the structure
df_crow.head()

Unnamed: 0,form_factor,latitude,longitude,system_id
0,bicycle,52.34067,4.873048,hely
1,bicycle,52.34067,4.873048,hely
2,bicycle,52.34067,4.873048,hely
3,bicycle,52.34067,4.873048,hely
4,bicycle,52.118011,5.189397,moveyou


In [5]:

df_summary = df_crow.groupby(['system_id', 'form_factor']).size().reset_index(name='count')
print(df_summary)


             system_id    form_factor  count
0                baqme  cargo_bicycle    437
1                check          moped   1719
2                 cykl        bicycle     22
3   deelfietsnederland        bicycle    102
4               donkey        bicycle   1223
5                felyx          moped   1866
6            gosharing          moped     28
7          greenwheels            car   1375
8                 hely        bicycle      9
9                  htm        bicycle     35
10                lime        bicycle    896
11             moveyou        bicycle     67
12            mywheels            car   1214
13               tier2        bicycle     28
14         uwdeelfiets        bicycle      1


### 1.1 **Data Collection** - CARGOROO API

In [None]:
import re
import csv
import os
from datetime import date
import requests
from geopy.distance import geodesic

# Get current date
Date = date.today().strftime("%d-%m-%Y")

# Utrecht center coordinates
utrecht_center = (52.091983, 5.120756)  # Latitude and longitude of Utrecht center
radius_km = 50  # Radius in kilometers

# Fetch data from the Cargoroo website
response = requests.get("https://cargoroo.nl/cargoroo-cities/")
data = response.text

# Extract latitude and longitude values using regex
latitudes = re.findall(r'data-lat="(\d+\.\d+)', data)
longitudes = re.findall(r'data-lng="(\d+\.\d+)', data)

# Convert extracted values to float
lat_list = [float(lat) for lat in latitudes]
lng_list = [float(lng) for lng in longitudes]

# Create directory if it doesn't exist
output_dir = os.path.join(PATHIN, Date)
os.makedirs(output_dir, exist_ok=True)

# Define the file path
file_path = f"{output_dir}/Cargoroo_locations_{Date}.csv"

# Write data to CSV
with open(file_path, 'w', newline='') as data_file:
    csv_writer = csv.writer(data_file)
    header = ['service', 'modality', 'latitude', 'longitude']
    csv_writer.writerow(header)
    for latitude, longitude in zip(lat_list, lng_list):
        # Calculate distance from Utrecht center
        distance = geodesic((latitude, longitude), utrecht_center).km
        
        # Filter by 50 km radius
        if distance <= radius_km:
            csv_writer.writerow(['Cargoroo', 'cargobike', latitude, longitude])

print(f"Data saved to {file_path}")


In [7]:
import pandas as pd

# Load the Cargoroo CSV file into a DataFrame
df_cargoroo = pd.read_csv(os.path.join(PATHIN, Date, f"Cargoroo_locations_{Date}.csv"))

df_cargoroo.head()

### 1.2 **Data Collection** - OV_Fiets (scraped)

In [9]:
import pandas as pd

file_path = os.path.join(PATHIN, Date, "ov_fiets_filtered.csv")
df_ov_fiets = pd.read_csv(file_path, encoding='utf-8')
print(df_ov_fiets.head(10))


                              name   latitude  longitude  available  \
0            's-Hertogenbosch Oost  51.700878   5.319214          6   
1    Almere Centrum Landdrostdreef  52.374530   5.220160         53   
2              Alphen aan den Rijn  52.124920   4.657890         13   
3              Amersfoort Centraal  52.152930   5.374810         33   
4        Amersfoort Mondriaanplein  52.154460   5.373390          8   
5            Amersfoort Schothorst  52.174600   5.404620          9   
6              Amersfoort Vathorst  52.192610   5.433620          9   
7                 Amsterdam Amstel  52.347010   4.918750         16   
8          Amsterdam Bijlmer ArenA  52.311914   4.948114          3   
9  Amsterdam Centraal IJzijde West  52.380095   4.898784         68   

                url  
0   /locatie/hto001  
1   /locatie/alm001  
2   /locatie/apn001  
3   /locatie/amf001  
4   /locatie/amf002  
5  /locatie/amfs001  
6  /locatie/avat001  
7   /locatie/asa001  
8   /locatie/asb002 

In [None]:
df_ov_fiets["service"] = "OV Fiets"
df_ov_fiets

In [11]:
df_ov_fiets["modality"] = "bicycle"

df_ov_fiets

Unnamed: 0,name,latitude,longitude,available,url,service,modality
0,'s-Hertogenbosch Oost,51.700878,5.319214,6,/locatie/hto001,OV Fiets,bicycle
1,Almere Centrum Landdrostdreef,52.374530,5.220160,53,/locatie/alm001,OV Fiets,bicycle
2,Alphen aan den Rijn,52.124920,4.657890,13,/locatie/apn001,OV Fiets,bicycle
3,Amersfoort Centraal,52.152930,5.374810,33,/locatie/amf001,OV Fiets,bicycle
4,Amersfoort Mondriaanplein,52.154460,5.373390,8,/locatie/amf002,OV Fiets,bicycle
...,...,...,...,...,...,...,...
118,Zeist,52.089090,5.249280,19,/locatie/db002,OV Fiets,bicycle
119,Zoetermeer Centrum West,52.060240,4.487390,13,/locatie/ztm002,OV Fiets,bicycle
120,Zoetermeer Driemanspolder,52.049090,4.476630,6,/locatie/ztm003,OV Fiets,bicycle
121,Zoetermeer Oost,52.046610,4.491330,3,/locatie/ztmo001,OV Fiets,bicycle


In [12]:
df_ov_fiets.duplicated().sum()

0

In [13]:
df_ov_fiets.isna().sum()

name         0
latitude     0
longitude    0
available    0
url          0
service      0
modality     0
dtype: int64