In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [18]:
import os,time
import requests
import json
from datetime import datetime

station_information = "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"
station_status = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json"


In [3]:
station_status_file_loc = '/gdrive/My Drive/station_status.json'
station_information_file_loc = '/gdrive/My Drive/station_information.json'
station_data_file_loc = '/gdrive/My Drive/station_data.json'
neighborhoods_data_file_loc = '/gdrive/My Drive/neighborhoods.json'
station_ids_file = '/gdrive/My Drive/station_ids.json'
id_map_file = '/gdrive/My Drive/id_map.json'

In [4]:
def fetch_station_info():
    response = requests.get(station_information)
    if response.status_code == 200:
        return response.json()['data']['stations']
    else:
        print(f"Error fetching station information. Status code: {response.status_code}")
        return []

In [5]:
def fetch_station_status():
    response = requests.get(station_status)
    if response.status_code == 200:
        return response.json()['data']['stations']
    else:
        print(f"Error fetching station status. Status code: {response.status_code}")
        return []

In [6]:
import json
from datetime import datetime
import pytz

In [7]:
def load_neighborhood_data(neighborhood_file):
    with open(neighborhood_file, 'r') as file:
        return json.load(file)

In [8]:
def load_station_ids(station_ids_file):
    with open(station_ids_file, 'r') as file:
        return [entry['station_id'] for entry in json.load(file)]

In [9]:
def load_local_id_map(id_map_file):
    with open(id_map_file, 'r') as file:
        return json.load(file)

In [10]:
def get_current_datetime():
    eastern = pytz.timezone('US/Eastern')
    return datetime.now(eastern).strftime("%Y-%m-%d %H:%M:%S")

In [11]:
def get_day_and_hour():
    eastern = pytz.timezone('US/Eastern')
    now = datetime.now(eastern)
    return now.weekday(), now.hour

In [12]:
def find_matching_status(station_status, station_id):
    return next((status for status in station_status if status['station_id'] == station_id), None)

In [13]:
def find_matching_neighborhood(neighborhood_data, station_id):
    return next((neighborhood for neighborhood in neighborhood_data if neighborhood['station_id'] == station_id), None)

In [14]:
def combine_station_data(info, matching_status, matching_neighborhood, station_ids, local_id_map):
    local_id = local_id_map.get(info["station_id"], None)

    return {
        "station_id": info["station_id"],
        "lat": info["lat"],
        "lon": info["lon"],
        "name": info["name"],
        "capacity": info["capacity"],
        "num_vehicles_available": matching_status["num_bikes_available"],
        "num_vehicles_disabled": matching_status["num_bikes_disabled"],
        "num_docks_available": matching_status["num_docks_available"],
        "num_docks_disabled": matching_status["num_docks_disabled"],
        "neighborhood": matching_neighborhood["neighborhood"],
        "borough": matching_neighborhood["borough"],
        "dateTime": get_current_datetime(),
        "day_of_week": get_day_and_hour()[0],
        "hour": get_day_and_hour()[1],
        "local_id": local_id
    }

In [15]:
def filter_and_save_data(station_info, station_status, neighborhood_file, station_ids_file, id_map_file, station_data_file_loc):
    neighborhood_data = load_neighborhood_data(neighborhood_file)
    station_ids = load_station_ids(station_ids_file)
    local_id_map = load_local_id_map(id_map_file)

    combined_data = []

    for info in station_info:
        matching_status = find_matching_status(station_status, info['station_id'])
        matching_neighborhood = find_matching_neighborhood(neighborhood_data, info['station_id'])

        if matching_status and matching_neighborhood:
            combined_station = combine_station_data(info, matching_status, matching_neighborhood, station_ids, local_id_map)

            if combined_station["station_id"] in station_ids:
                combined_data.append(combined_station)

    with open(station_data_file_loc, 'w') as file:
        json.dump(combined_data, file, indent=2)

    print(f"Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to {station_data_file_loc}")

In [None]:
import json
from datetime import datetime
import pytz

def combine_and_save_data(station_info, station_status, neighborhood_file, station_ids_file, id_map_file, station_data_file_loc):
    # Load neighborhood data from the file
    with open(neighborhood_file, 'r') as neighborhood_file:
        neighborhood_data = json.load(neighborhood_file)

    # Load station IDs from the file
    with open(station_ids_file, 'r') as station_ids_file:
        station_ids = [entry['station_id'] for entry in json.load(station_ids_file)]

    # Load the local ID map from the file
    with open(id_map_file, 'r') as id_map_file:
        local_id_map = json.load(id_map_file)

    combined_data = []

    eastern = pytz.timezone('US/Eastern')

    # Get the current time in Eastern Standard Time
    current_datetime = datetime.now(eastern).strftime("%Y-%m-%d %H:%M:%S")

    # Get the day of the week and hour in Eastern Standard Time
    day_of_week = datetime.now(eastern).weekday()
    hour = datetime.now(eastern).hour
    # Iterate through station_info
    for info in station_info:
        # Find the corresponding station_status entry
        matching_status = next((status for status in station_status if status['station_id'] == info['station_id']), None)

        # Find the corresponding neighborhood entry
        matching_neighborhood = next((neighborhood for neighborhood in neighborhood_data if neighborhood['station_id'] == info['station_id']), None)

        # If both matching status and neighborhood entries are found, and all conditions are met
        if matching_status and matching_neighborhood:
            # Get the local_id from the id_map.json file
            local_id = local_id_map.get(info["station_id"], None)

            combined_station = {
                "station_id": info["station_id"],
                "lat": info["lat"],
                "lon": info["lon"],
                "name": info["name"],
                "capacity": info["capacity"],
                "num_vehicles_available": matching_status["num_bikes_available"],
                "num_vehicles_disabled": matching_status["num_bikes_disabled"],
                "num_docks_available": matching_status["num_docks_available"],
                "num_docks_disabled": matching_status["num_docks_disabled"],
                "neighborhood": matching_neighborhood["neighborhood"],
                "borough": matching_neighborhood["borough"],
                "dateTime": current_datetime,
                "day_of_week": day_of_week,
                "hour": day_of_week,
                "local_id": local_id  # Add the local_id
            }

            # Check if the station_id is in the list of allowed station IDs
            if combined_station["station_id"] in station_ids:
                combined_data.append(combined_station)
    print(len(combined_data))
    # Save the combined data to the specified file
    with open(station_data_file_loc, 'w') as file:
        json.dump(combined_data, file, indent=2)

    print(f"Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to {station_data_file_loc}")


In [16]:
station_info_data = fetch_station_info()

In [19]:
while True:
    station_status_data = fetch_station_status()
    filter_and_save_data(station_info_data, station_status_data, neighborhoods_data_file_loc, station_ids_file, id_map_file, station_data_file_loc)

    # Sleep for 60 seconds before fetching data again
    time.sleep(5)

Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined data with neighborhood, borough, dateTime, and local_id saved to /gdrive/My Drive/station_data.json
Filtered and combined da

KeyboardInterrupt: ignored