In [36]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [37]:
import os, time
import requests
import json
from datetime import datetime

station_information = "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"
station_status = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json"


In [38]:
station_status_file_loc = '/gdrive/My Drive/station_status.json'
station_information_file_loc = '/gdrive/My Drive/station_information.json'
station_data_file_loc = '/gdrive/My Drive/station_data.json'
neighborhoods_data_file_loc = '/gdrive/My Drive/neighborhoods.json'
station_ids_file = '/gdrive/My Drive/station_ids.json'

In [39]:
def fetch_station_info():
    response = requests.get(station_information)
    if response.status_code == 200:
        return response.json()['data']['stations']
    else:
        print(f"Error fetching station information. Status code: {response.status_code}")
        return []

In [40]:
def fetch_station_status():
    response = requests.get(station_status)
    if response.status_code == 200:
        return response.json()['data']['stations']
    else:
        print(f"Error fetching station status. Status code: {response.status_code}")
        return []

In [41]:
datetime.now()

datetime.datetime(2023, 12, 7, 2, 26, 29, 903723)

In [42]:
import json
from datetime import datetime

def combine_and_save_data(station_info, station_status, neighborhood_file, station_ids_file, station_data_file_loc):
    # Load neighborhood data from the file
    with open(neighborhood_file, 'r') as neighborhood_file:
        neighborhood_data = json.load(neighborhood_file)

    # Load station IDs from the file
    with open(station_ids_file, 'r') as station_ids_file:
        station_ids = [entry['station_id'] for entry in json.load(station_ids_file)]

    combined_data = []

    # Get the current date and time
    current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    # TODO Fix the day of week
    day_of_week = datetime.now().weekday()
    hour = datetime.now().hour
    # Iterate through station_info
    for info in station_info:
        # Find the corresponding station_status entry
        matching_status = next((status for status in station_status if status['station_id'] == info['station_id']), None)

        # Find the corresponding neighborhood entry
        matching_neighborhood = next((neighborhood for neighborhood in neighborhood_data if neighborhood['station_id'] == info['station_id']), None)

        # If both matching status and neighborhood entries are found, and all conditions are met
        if matching_status and matching_neighborhood:
            combined_station = {
                "station_id": info["station_id"],
                "lat": info["lat"],
                "lon": info["lon"],
                "name": info["name"],
                "capacity": info["capacity"],
                "num_vehicles_available": matching_status["num_bikes_available"],
                "num_vehicles_disabled": matching_status["num_bikes_disabled"],
                "num_docks_available": matching_status["num_docks_available"],
                "num_docks_disabled": matching_status["num_docks_disabled"],
                "neighborhood": matching_neighborhood["neighborhood"],
                "borough": matching_neighborhood["borough"],
                "dateTime": current_datetime,
                "day_of_week": day_of_week,
                "hour": day_of_week
            }

            # Check if the station_id is in the list of allowed station IDs
            if combined_station["station_id"] in station_ids:
                combined_data.append(combined_station)
    print(len(combined_data))
    # Save the combined data to the specified file
    with open(station_data_file_loc, 'w') as file:
        json.dump(combined_data, file, indent=2)

    print(f"Filtered and combined data with neighborhood, borough, and dateTime saved to {station_data_file_loc}")


In [43]:
station_info_data = fetch_station_info()

In [44]:
while True:
    station_status_data = fetch_station_status()
    combine_and_save_data(station_info_data, station_status_data, neighborhoods_data_file_loc, station_ids_file, station_data_file_loc)

    # Sleep for 60 seconds before fetching data again
    time.sleep(5)

1673
Filtered and combined data with neighborhood, borough, and dateTime saved to /gdrive/My Drive/station_data.json
1673
Filtered and combined data with neighborhood, borough, and dateTime saved to /gdrive/My Drive/station_data.json


KeyboardInterrupt: ignored