In [None]:
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import hashlib
from env import *
import matplotlib.pyplot as plt
import pandas as pd
import csv
import googlemaps
import json
import os
from collections import Counter
from PIL import Image, ImageDraw, ImageFont

### get values from env file
username = USERNAME
password = PASSWORD
api_key = MAPS_API_KEY

# Specify the file path for cached data
file_path = "caching.json"

### Get cached data
cached_trip_dict = {}

# Check if the file exists and load data. Assumes if file exists it contains data
if os.path.exists(file_path):
    with open(file_path, "r") as json_file:
        cached_trip_dict = json.load(json_file)
        
    for trip_id, trip_data in cached_trip_dict.items():
        # Check if 'start_date' and 'end_date' keys exist in the dictionary
        if 'start_date' in trip_data:
            # Convert 'start_date' to datetime object
            cached_trip_dict[trip_id]['start_date'] = datetime.fromisoformat(trip_data['start_date'])

        if 'end_date' in trip_data:
            cached_trip_dict[trip_id]['end_date'] = datetime.fromisoformat(trip_data['end_date'])
    
    print(str(len(cached_trip_dict)) + ' rides found')


In [None]:
##### HELPER FUNCTIONS

In [None]:
def getData(blocks, my_class):
    if my_class  in ['start-date', 'end-date', 'duration']:
        text_class = 'text-small'
    else:
        text_class = 'text-large'
    
    values = []
    
    for block in blocks:
        this_block = block.find_all(class_=my_class)
        block_text = this_block[0].find('div', class_=text_class)
        value = block_text.text.strip()
        values.append(value)
    
    return values

def dateStrToDate(my_str):
    date_format = '%m-%d-%Y %H:%M:%S'
    my_date = datetime.strptime(my_str, date_format)
    return my_date

def getTripId(trip_block):
    hash_object = hashlib.sha256()
    hash_object.update(trip_block.encode('utf-8'))
    # Get the hexadecimal representation of the hash
    trip_id = hash_object.hexdigest()

    return trip_id

def parse_duration(duration_str):
    # Split the duration string into hours, minutes, and seconds
    hours, minutes, seconds = map(int, duration_str.split(':'))
    
    # Calculate the total duration in seconds
    total_seconds = hours * 3600 + minutes * 60 + seconds
    
    return total_seconds

def format_duration(total_seconds):
    # Calculate hours, minutes, and seconds from total seconds
    hours, remainder = divmod(total_seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    # Format as HH:MM:SS
    return f"{hours:02d} hours and {minutes:02d} minutes"

def getTripsNotCached(trip_dict, cached_trip_dict):
    missing_ids = []
    for key, value in trip_dict.items():
        if key not in cached_trip_dict.keys():
            missing_ids.append(key)
    return missing_ids
        
### BIKE SHARE TRIPS API CALL
def getTripsAPI(trip_dict, start_date, end_date, xsrf_token, laravel_session):
    new_trip_dict = {}
    
    url = "https://members.bikesharetoronto.com/trips?period=custom&date%5Bstart%5D=" + start_date + "&date%5Bend%5D=" + end_date + '&sort_direction=asc&sort_by=endTime'

    payload = {}
    headers = {
      'authority': 'members.bikesharetoronto.com',
      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
      'accept-language': 'en,es-419;q=0.9,es;q=0.8,en-US;q=0.7',
      'referer': 'https://members.bikesharetoronto.com/trips',
      'Cookie': 'XSRF-TOKEN=' + xsrf_token + '; laravel_session=' + laravel_session
    }

    response = requests.request("GET", url, headers=headers, data=payload)
    html_content = response.text

    # Parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    new_trip_dict = parseTrips(soup)
    
    # Combine add new trips to existing dict
    trip_dict.update(new_trip_dict)

    return trip_dict

### IF NO PREVIOUS DATA, THIS IS CALLED TO GET ALL TRIPS FROM BIKESHARE

def getAllTrips():
    trip_dict = {}
    new_trip_dict = {}
    
    # Set start date to arbitrary date a long time ago, before joining PBSC
    start_date = '01-01-2015'
    
    # Set end_date to current day
    end_date = get_current_date_str()

    new_trip_dict = getTripsAPI(trip_dict, start_date, end_date, xsrf_token, laravel_session)
    trip_dict = new_trip_dict

    # Get minimum date in trip_dict and iterate until it no new rides are found
    min_date = find_minimum_start_date(trip_dict)
    new_end_date = min_date.strftime('%d-%m-%Y')

    start_time = datetime.now()

    while True:

        min_date = find_minimum_start_date(trip_dict)
        new_end_date = min_date.strftime('%d-%m-%Y')

        prev_trip_dict_len = len(trip_dict)

        new_trip_dict = getTripsAPI(trip_dict, start_date, new_end_date, xsrf_token, laravel_session)
        trip_dict = new_trip_dict

        update_trip_dict_len = len(trip_dict)

        if update_trip_dict_len == prev_trip_dict_len:
            break

    end_time = datetime.now()
    time_to_complete = end_time - start_time
    print(time_to_complete)
    
    return trip_dict

### IF PREVIOUS DATA FOUND, THIS IS CALLED TO GET NEW TRIPS FROM BIKESHARE

def getNewTrips(cached_trip_dict):
    
    trip_dict = {}
    new_trip_dict = {}
    
    # Set start date to last cached ride
    last_start_date = find_max_start_date(cached_trip_dict)
    start_date = last_start_date.strftime('%d-%m-%Y')
    
    # Set end_date to current day
    end_date = get_current_date_str()

    new_trip_dict = getTripsAPI(trip_dict, start_date, end_date, xsrf_token, laravel_session)
    trip_dict = new_trip_dict
    
    # Get minimum date in trip_dict and iterate until it no new rides are found
    min_date = find_minimum_start_date(trip_dict)
    new_end_date = min_date.strftime('%d-%m-%Y')

    # Calculate time to get all repsonses
    start_time = datetime.now()
    
    start_date_list = []

    # Iterate through the dictionary and add 'start_date' values to the list
    for trip_key, trip_info in cached_trip_dict.items():
        start_date_list.append(trip_info["start_date"])

    while True:
        new_trip_dict = getTripsAPI(trip_dict, start_date, new_end_date, xsrf_token, laravel_session)
        trip_dict = new_trip_dict
        if min_date in start_date_list:
            break
            
        else:
            min_date = find_minimum_start_date(trip_dict)
            new_end_date = min_date.strftime('%d-%m-%Y')

    # Print time it took to get all responses
    end_time = datetime.now()
    time_to_complete = end_time - start_time
    print(time_to_complete)
    
    return trip_dict


### HELPER FUNCTIONS - HTML PARSING
def parseTrips(soup):
    
    ## Parse HTML to get data
    trip_blocks = soup.find_all(class_='trip-block')

    start_dates = getData(trip_blocks, 'start-date')
    end_dates = getData(trip_blocks, 'end-date')
    durations = getData(trip_blocks, 'duration')
    start_stations= getData(trip_blocks, 'start-station-name-block')
    end_stations = getData(trip_blocks, 'end-station-name-block')

    ## Convert to datetime
    if len(start_dates) != len(end_dates):
        print("Error - different length of start and end dates")
    else:
        for i in range(len(start_dates)):
            start_date = dateStrToDate(start_dates[i])
            start_dates[i] = start_date

            end_date = dateStrToDate(end_dates[i])
            end_dates[i] = end_date

            duration_in_seconds = parse_duration(durations[i])
            durations[i] = duration_in_seconds
    
    new_trip_dict = {}

    for i in range(len(start_dates)):
        trip_id = getTripId(trip_blocks[i])
        new_trip_dict[trip_id] = {
            "start_date": start_dates[i],
            "end_date": end_dates[i],
            "duration": durations[i],
            "start_station": start_stations[i],
            "end_station": end_stations[i]
        }
    
    return new_trip_dict


### HELPER FUNCTIONS - GET MIN START DATE
def find_minimum_start_date(trip_dict):
    start_dates = [trip_info['start_date'] for trip_info in trip_dict.values()]
    min_start_date = min(start_dates)

    return min_start_date

def find_max_start_date(trip_dict):
    start_dates = [trip_info['start_date'] for trip_info in trip_dict.values()]
    max_start_date = max(start_dates)

    return max_start_date

def get_current_date_str():
    # Get the current date and time
    current_datetime = datetime.now()
    current_date_str = current_datetime.strftime('%d-%m-%Y')
    return current_date_str

# Define a custom encoder for datetime objects
def datetime_encoder(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError("Object of type {} is not JSON serializable".format(type(obj)))

### GOOGLE MAPS API CALL
def getRouteDistanceOnBike(start_lat, start_lon, end_lat, end_lon):
    # Initialize the Google Maps client
    gmaps = googlemaps.Client(key=api_key)

    # Define the starting and ending coordinates (latitude and longitude)    
    start_location = (start_lat, start_lon)
    end_location = (end_lat, end_lon)

    # Specify the mode of travel as 'bicycling'
    mode = 'bicycling'

    directions_result = gmaps.directions(
       start_location,
       end_location,
       mode=mode,
       departure_time=datetime.now()
    )

    # Extract the total distance in meters from the result
    distance_meters = directions_result[0]['legs'][0]['distance']['value']
    polyline = directions_result[0]['overview_polyline']['points']
    return distance_meters, polyline



In [None]:
##### BIKESHARE LOGIN AND GET TRIP HISTORY

In [None]:
### GET XSRF Token

url = "https://members.bikesharetoronto.com/login"

payload = {}
headers = {
  'authority': 'members.bikesharetoronto.com',
  'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  'accept-language': 'en,es-419;q=0.9,es;q=0.8,en-US;q=0.7',
  'referer': 'https://bikesharetoronto.com/',
  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
}

response = requests.request("GET", url, headers=headers, data=payload)

### Parse the HTML content

# Get cookies
cookies = response.cookies
xsrf_token = cookies['XSRF-TOKEN'].replace('%3D', "")
laravel_session = cookies['laravel_session']

# Get HTML content
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')

# Extract the CSRF token value for the login request
csrf_meta_tag = soup.find('meta', attrs={'name': 'csrf-token'})
csrf_token = csrf_meta_tag.get('content')

In [None]:
### Login

import requests

url = "https://members.bikesharetoronto.com/login"

payload = '_token=' + csrf_token + '&userName=' + username + '&password=' + password
headers = {
  'authority': 'members.bikesharetoronto.com',
  'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  'accept-language': 'en,es-419;q=0.9,es;q=0.8,en-US;q=0.7',
  'cache-control': 'max-age=0',
  'content-type': 'application/x-www-form-urlencoded',
  'origin': 'https://members.bikesharetoronto.com',
  'referer': 'https://members.bikesharetoronto.com/login',
  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
  'Cookie': 'XSRF-TOKEN=' + xsrf_token + '; laravel_session=' + laravel_session
}

response = requests.request("POST", url, headers=headers, data=payload)

# Update cookies
cookies = response.cookies
xsrf_token = cookies['XSRF-TOKEN'].replace('%3D', "")
laravel_session = cookies['laravel_session']

In [None]:
### GET TRIPS 

# Create empty trip_dict
trip_dict = {}

# Check cached trips, if nothing cached, pick an arbitrary date from a long time ago, otherwise take start date of most recent cached trip
if len(cached_trip_dict) == 0:
    trip_dict = getAllTrips()
else: 
    trip_dict = getNewTrips(cached_trip_dict)
    
    ### FILTER TO ONLY KEEP VALUES THAT AREN'T CACHED -- ASSUMES HISTORICAL DATA CAN'T CHANGE
    keys_to_keep = getTripsNotCached(trip_dict, cached_trip_dict)
    print(str(len(keys_to_keep)) + " new trips found")
    trip_dict = {key: value for key, value in trip_dict.items() if key in keys_to_keep}


In [None]:
### CLEAN DATA

# FILTER OUT RECORDS WHERE start_station AND end_station ARE THE SAME
trip_dict_filtered = {trip_id: trip_data for trip_id, trip_data in trip_dict.items() if trip_data['start_station'] != trip_data['end_station']}
trip_dict = trip_dict_filtered

print(trip_dict)

# CHECK FOR TRIPS WITH A NULL END STATION, AND DELETE THEM, PRINTING IDS BEING DELETED
empty_end_trips = {trip_id: trip_info for trip_id, trip_info in trip_dict.items() if trip_info['end_station'] == ''}
empty_end_trips

for key in empty_end_trips:
    key_to_delete = key
    try:
        trip_dict.pop(key_to_delete, None)
        print(key_to_delete)
    except:
        continue

print(trip_dict)

In [None]:
### GET STATION LOCATIONS

start_stations = [trip_info['start_station'] for trip_info in trip_dict.values()]
start_stations

url = 'https://tor.publicbikesystem.net/ube/gbfs/v1/en/station_information'

res = requests.get(url).json()
stations = res['data']['stations']

In [None]:
# #### EXOPRT STATIONS TO CSV

# # Define the CSV file name
# csv_file = 'stations.csv'

# all_keys = set(key for item in stations for key in item.keys())

# # Write the list of dictionaries to a CSV file
# with open(csv_file, mode='w', newline='') as file:
#     writer = csv.DictWriter(file, fieldnames=all_keys)
    
#     # Write the header
#     writer.writeheader()
    
#     # Write the data rows
#     for row in stations:
#         # Create a new dictionary with all possible keys, setting default values to None
#         new_row = {key: row.get(key, None) for key in all_keys}
#         writer.writerow(new_row)

# print(f'CSV file "{csv_file}" has been created.')

In [None]:
### ADD LAT AND LON OF START AND END STATIONS, WHILE KEEPING TRACK OF LOCATIONS THAT AREN'T FOUND

unmatched_trip_ids = []
unmatched_station_names = []

for trip_id, trip_info in trip_dict.items():
    start_station_name = trip_info['start_station']
    end_station_name = trip_info['end_station']

    # Find the matching station by name (since names are unique)
    matching_start_station = next((station for station in stations if station['name'] == start_station_name), None)
    matching_end_station = next((station for station in stations if station['name'] == end_station_name), None)

    if matching_start_station:
        # Add lat and lon to the trip_dict
        trip_info['start_lat'] = matching_start_station['lat']
        trip_info['start_lon'] = matching_start_station['lon']
    else:
        unmatched_trip_ids.append(trip_id)
        unmatched_station_names.append(start_station_name)
        
    if matching_end_station:
        # Add lat and lon to the trip_dict
        trip_info['end_lat'] = matching_end_station['lat']
        trip_info['end_lon'] = matching_end_station['lon']
    else:
        unmatched_trip_ids.append(trip_id)
        unmatched_station_names.append(end_station_name)

In [None]:
# ### UNCOMMENT TO PRINT UNMATCHED STATIONS TO ADD THEM TO MANUAL LOOKUP
# unique_unmatched_stations =list(set(unmatched_station_names))
# unique_unmatched_stations

In [None]:
manual_lookup = {
    'Princess St / Adelaide St E': {
        'lat': 43.652246, 
        'lon': -79.367226
    },
    'Adelaide St W / Bay St - SMART': {
        'lat': 43.650207, 
        'lon': -79.379938
    },
    'Bloor St W / Huron St': {
        'lat': 43.667007, 
        'lon': -79.401706
    },
    'Martin Goodman Trail / Remembrance Dr': {
        'lat': 43.635495,
        'lon': -79.4035221
    },
    'Simcoe St / Adelaide St W': {
        'lat': 43.648515, 
        'lon':-79.386461
    },
    'Avenue Rd / Cumberland St': {
        'lat': 43.6698217,
        'lon': -79.39444615
    },
    'Dundas St E / Victoria St': {
        'lat': 43.6566482,
        'lon': -79.3797481
    },
    "D'Arcy St / Spadina Ave - SMART": {
        'lat': 43.6540496,
        'lon': -79.3982009
    },
    'Adelaide St W / Portland St': {
        'lat': 43.645618,
        'lon': -79.400459
    },
    'Fennings St / Queen St W': {
        'lat': 43.643833,
        'lon': -79.421123
    },
    'Widmer St / Adelaide St W': {
        'lat': 43.6472947,
        'lon': -79.3915765
    },
    'Stewart St / Bathurst St  - SMART': {
        'lat': 43.643317, 
        'lon': -79.402075
    },
    'Queen St E / George St (Moss Park)': {
        'lat': 43.654208,
        'lon': -79.372065
    },
    'Wellington St W / Portland St': {
        'lat': 43.643044,
        'lon': -79.399596
    },
    'Fleet St / Garrison Rd (Fort York)': {
        'lat': 43.636882,
        'lon': -79.408826
    },
    'Foster Pl / Elizabeth St': {
        'lat': 43.654550,
        'lon': -79.384645
    },
    'Queen St W / James St': {
        'lat': 43.6524891,
        'lon': -79.3807605
 }
}

In [None]:
unmatched_trip_dict = {trip_id: trip_info for trip_id, trip_info in trip_dict.items() if trip_id in unmatched_trip_ids}


In [None]:
### ADD LAT AND LON FROM MANUAL LOOKUP
for trip_id, trip_info in unmatched_trip_dict.items():
    
    if 'start_lat' not in unmatched_trip_dict[trip_id]:
        start_station = unmatched_trip_dict[trip_id]['start_station']
        trip_dict[trip_id]['start_lat'] = manual_lookup[start_station]['lat']
        trip_dict[trip_id]['start_lon'] = manual_lookup[start_station]['lon']
        
    if 'end_lat' not in unmatched_trip_dict[trip_id]:
        end_station = unmatched_trip_dict[trip_id]['end_station']
        trip_dict[trip_id]['end_lat'] = manual_lookup[end_station]['lat']
        trip_dict[trip_id]['end_lon'] = manual_lookup[end_station]['lon']

In [None]:
trip_dict = cached_trip_dict

In [None]:
### ADD DISTANCE IN METERS TO NEW TRIPS, GOOGLE MAPS CALL
start_time = datetime.now()

for key, value in trip_dict.items():
    start_lat = trip_dict[key]['start_lat']
    start_lon = trip_dict[key]['start_lon']
    end_lat = trip_dict[key]['end_lat']
    end_lon = trip_dict[key]['end_lon']
    
    bike_dist, polyline = getRouteDistanceOnBike(start_lat, start_lon, end_lat, end_lon)
    
    trip_dict[key]['bike_dist'] = bike_dist
    trip_dict[key]['polyline'] = polyline
    
    speed_ms = bike_dist / trip_dict[key]['duration']
    speed_kmh = speed_ms * 3.6
    trip_dict[key]['speed'] = speed_kmh

end_time = datetime.now()
time_to_complete = end_time - start_time
print(time_to_complete)

In [None]:
for key, value in cached_trip_dict.items():
    if key in trip_dict.keys():
        cached_trip_dict[key]['polyline'] = trip_dict[key]['polyline']

In [None]:
### ADD NEW TRIPS TO CACHED DATA, CACHED DICT INITIALIZED TO EMPTY DICT SO WORKS EVEN IF NO SAVED DATA
all_trip_dict = cached_trip_dict

all_trip_dict.update(trip_dict)
print(len(all_trip_dict))

In [None]:
cached_trip_dict

In [None]:
# ### SAVE NEW TRIPS TO CACHE

# # Save the dictionary to a JSON file with the custom encoder
# with open(file_path, "w") as json_file:
#     json.dump(all_trip_dict, json_file, default=datetime_encoder)

# # assign trip_dict to all_trips
# trip_dict = all_trip_dict

In [None]:
# ## FOR TESTING - Remove all entries after May 15

# threshold_date = datetime(2023, 5, 15)

# # Create a new dictionary to store filtered entries
# filtered_trip_dict = {}

# # Iterate through the dictionary and filter entries
# for key, data in cached_trip_dict.items():
#     if data["start_date"] <= threshold_date:
#         filtered_trip_dict[key] = data
        
# len(filtered_trip_dict)

# with open(file_path, "w") as json_file:
#     json.dump(filtered_trip_dict, json_file, default=datetime_encoder)

In [None]:
##### YEAR IN REVIEW

In [None]:
# ### Plot rides by month in table from

# month_counts = df.groupby('Months').size().reset_index(name='Count')
# month_counts

In [None]:
##### YEAR IN REVIEW FUNCTIONS

### Get total ride distance
def getTotalDist(trip_dict):
    total_dist = 0

    for key, value in trip_dict.items():
        total_dist += trip_dict[key]['bike_dist']

    return total_dist

### GET DATA TO PLOT DISTANCE GRAPHS
def getRideCountByMonthDf(trip_dict):
    ### Plot rides over time

    # Extract and format the start_date values into a list of months
    start_dates = [trip_info['start_date'] for trip_info in trip_dict.values()]
    months = [date.strftime('%Y-%m') for date in start_dates]
    df = pd.DataFrame({'Months': months})
    
    return df
    
def plotRideCountByMonth(df, save_path=None):
    # Count the occurrences of each month and sort by month
    month_counts = df['Months'].value_counts().sort_index()

    # Create a bar plot of the month counts
    month_counts.plot(kind='bar', edgecolor='black', color='#4EA154')
    plt.title('Number of Trips')
    plt.xticks(rotation=45)
    
    # Check if a save path is provided, and save the plot
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    # Show the plot
    plt.tight_layout()
    plt.show()


def getDfForDistPlotting(trip_dict):
    # Extract the 'start_date' and 'bike_dist' values into lists
    start_dates = [trip_info['start_date'] for trip_info in trip_dict.values()]
    bike_dists = [trip_info['bike_dist'] for trip_info in trip_dict.values()]

    # Create a DataFrame with 'start_date' and 'bike_dist' columns
    df = pd.DataFrame({'start_date': start_dates, 'bike_dist': bike_dists})

    df['bike_dist'] = df['bike_dist']/1000

    # Extract the month from 'start_date' and store it in a new column 'month'
    df['month'] = df['start_date'].dt.strftime('%Y-%m')
    
    return df

### PLOT AVERAGE RIDE DISTANCE BY MONTH
def plotAverageDistByMonth(df):
    monthly_mean = df.groupby('month')['bike_dist'].mean().reset_index()
    plt.bar(monthly_mean['month'], monthly_mean['bike_dist'], color='#4EA154', edgecolor='black')
    plt.title('Average Ride Distance (km)')
    plt.xticks(rotation=45)

    # Show the plot
    plt.tight_layout()
    plt.show()


def plotTotalDistByMonth(df, save_path=None):
    ### PLOT TOTAL RIDE DISTANCE BY MONTH
    monthly_sum = df.groupby('month')['bike_dist'].sum().reset_index()

    plt.bar(monthly_sum['month'], monthly_sum['bike_dist'], color='#4EA154', edgecolor='black')
    plt.title('Total Bike Distance (km)')
    plt.xticks(rotation=45)

    # Show the plot
    plt.tight_layout()

    # Check if a save path is provided, and save the plot
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    # Show the plot (if not saved)
    plt.show()

# Example usage of the function with a DataFrame 'df'
# plotTotalDistByMonth(df, save_path='total_distance_by_month.png')


def getTop5Routes(trip_dict):
    station_pairs = [(trip_info['start_station'], trip_info['end_station']) for trip_info in trip_dict.values()]

    # Use Counter to count the occurrences of station pairs
    station_pair_counts = Counter(station_pairs)

    # Find the top 5 most common station pairs and their counts
    top_5_station_pairs = station_pair_counts.most_common(5)
    
    return top_5_station_pairs

In [None]:
from collections import Counter
import datetime

def get_most_rides_date_and_count(trip_dict):
    # Extract start dates from 'trip_dict'
    start_dates = [trip['start_date'].date() for trip in trip_dict.values()]

    # Count the occurrences of each start date
    date_counts = Counter(start_dates)

    # Find the date with the most rides and its count
    most_rides_date, most_rides_count = date_counts.most_common(1)[0]

    return most_rides_date, most_rides_count

# Example usage:
most_rides_date, most_rides_count = get_most_rides_date_and_count(trip_dict)
print(f"The date with the most rides is {most_rides_date} with {most_rides_count} rides.")


In [None]:
### Year in Review 
year = "All time"
# year = 2022

if year == "All time":
    start_of_year = datetime(2015, 1, 1, 0, 0, 0)
    end_of_year = datetime(2025, 12, 31, 23, 59, 59)

else:
    start_of_year = datetime(year, 1, 1, 0, 0, 0)
    end_of_year = datetime(year, 12, 31, 23, 59, 59)

# Filter dict for date range
filtered_trip_dict = {
    key: value for key, value in trip_dict.items() if
    value["start_date"] > start_of_year and
    value["end_date"] < end_of_year
}

In [None]:
### Key calcuations
### Get total ride time

total_seconds = 0

for key, value in trip_dict.items():
    total_seconds += trip_dict[key]['duration']

total_time = format_duration(total_seconds)
total_time

total_rides = len(filtered_trip_dict)
total_km = round(getTotalDist(filtered_trip_dict) / 1000,1)
average_ride_distance = round(total_km / total_rides,2)

first_ride_date = find_minimum_start_date(filtered_trip_dict)

average_speed = round(total_km * 1000 / total_seconds * 3.6,1)

In [None]:
### Get month with most rides and number of rides

rideCountDf = getRideCountByMonthDf(filtered_trip_dict)

month_counts = rideCountDf['Months'].value_counts().sort_index()

month_with_most_rides = month_counts.idxmax()
num_rides_in_top_month = month_counts[month_with_most_rides]

date_obj = datetime.strptime(month_with_most_rides, '%Y-%m')

# Format the datetime object as 'Month Year'
if year == "All time":
    month_with_most_rides_to_print = date_obj.strftime('%B %Y')
else:
    month_with_most_rides_to_print = date_obj.strftime('%B')

df = getRideCountByMonthDf(filtered_trip_dict)

save_path = 'numRidesChart.jpg'
plotRideCountByMonth(df, save_path)

In [None]:
### Get avg dist by month
dist_df = getDfForDistPlotting(filtered_trip_dict)
plotAverageDistByMonth(dist_df)

In [None]:
### Get total dist by month
save_path = 'totalDistChart.jpg'

plotTotalDistByMonth(dist_df, save_path)

In [None]:
### Get top 5 routes

top5Routes = getTop5Routes(filtered_trip_dict)
topRoute = top5Routes[0]
topRouteStationStart = topRoute[0][0]
topRouteStationEnd = topRoute[0][1]
topRouteRide = topRoute[1]

# ### Print top route
topRouteString = "Top route is " + topRouteStationStart + " to "+ topRouteStationEnd + ", ridden " + str(topRouteRide) + " times"
print(topRouteString)

# ### Print top 5 routes
# for pair, count in top5Routes:
#     print(f"Station pair {pair} with {count} rides.")

In [None]:
### Get strings to print
year_str = "Year in review: " + str(year)
total_time_str = total_time + " riding " 
km_rides_str = str(total_km) + "km over " + str(total_rides) + " rides"
avg_km_per_ride_str = "For an average ride distance of " + str(average_ride_distance) + "km"
average_speed_str = "At an average speed of: " + str(average_speed) + "km/h"
top_month_str = str(month_with_most_rides_to_print) + " was the most common month, with " + str (num_rides_in_top_month) + " rides"
top_route_str = topRouteString
first_ride_str = "The first ride was on " + first_ride_date.strftime('%b %d, %Y')

print(year_str)
print(total_time_str)
print(km_rides_str)
print(average_speed_str)
print(avg_km_per_ride_str)
print(top_month_str)
print(top_route_str)
print(first_ride_str)

In [None]:
# Get the current working directory (the directory where your notebook is located)
notebook_directory = os.getcwd()

# Specify the filename of the first PNG image (Year in Review)
image_filename = "Year_in_Review_template.png"
image_path = os.path.join(notebook_directory, image_filename)

# Check if the first image file exists
if os.path.exists(image_path):
    # Open the first PNG image
    img = Image.open(image_path)
    
    # Create a draw context
    draw = ImageDraw.Draw(img)
    
    ### SPECIFY FONTS
    title_font_size = 64 # Font size
    title_text_color = (0, 0, 0)  # Text color (red in RGB)
    title_font = ImageFont.truetype("fonts/Roboto/Roboto-Bold.ttf", title_font_size)
    
    large_font_size = 36 # Font size
    large_text_color = (0, 0, 0)  # Text color (red in RGB)
    large_font = ImageFont.truetype("fonts/Roboto/Roboto-Medium.ttf", large_font_size)
    
    ### ADD YEAR IN REVIEW TEXT
    
    text_width, _ = draw.textsize(year_str, font=title_font)
    img_width, _ = img.size
    text_x = (img_width - text_width) // 2  # Center X-coordinate
    text_y = 378 # based on Figma

    # Add text to image
    draw.text((text_x, text_y), year_str, fill=title_text_color, font=title_font)

    
    ### ADD VARIABLE TEXT
    text_strings = [total_time_str,km_rides_str, top_month_str, first_ride_str]
    text_heights = []
    
    # Calculate the heights of the text strings
    for text_str in text_strings:
        if text_str is not None:
            text_width, text_height = draw.textsize(text_str, font=large_font)
            text_heights.append(text_height)
    
    # Find the maximum text height among the strings
    max_text_height = max(text_heights)
    
    # Specify the initial Y-coordinate for positioning text
    initial_text_y = 477
    
    # Add text to the image, ensuring consistent height
    for text_str in text_strings:
        if text_str is not None:
            text_width, _ = draw.textsize(text_str, font=large_font)
            img_width, _ = img.size
            text_x = (img_width - text_width) // 2  # Center X-coordinate
            
            # Position text using the maximum text height
            text_y = initial_text_y
            initial_text_y += max_text_height + 12  # Add padding
            
            # Add text to image
            draw.text((text_x, text_y), text_str, fill=large_text_color, font=large_font)
    
    
    # Specify the filename of the second PNG image (totalDistChart)
    second_image_filename = "totalDistChart.jpg"
    second_image_path = os.path.join(notebook_directory, second_image_filename)
    second_image_y = 753
    
    # Check if the second image file exists
    if os.path.exists(second_image_path):
        # Open the second image
        second_img = Image.open(second_image_path)

        # Resize the second image to fit within the first image (adjust as needed)
        max_width = img.width  # You can change this to limit the width
        max_height = img.height  # You can change this to limit the height
        second_img.thumbnail((max_width/2, max_height/2), Image.ANTIALIAS)

        # Calculate the position to paste the second image (centered)
        x = (img.width - second_img.width) // 2
        y = second_image_y # (img.height - second_img.height) // 2 # mid page

        # Paste the second image onto the first image
        img.paste(second_img, (x, y))

    else:
        print(f"The second image file {second_image_filename} does not exist in the notebook directory.")
    
    third_image_filename = "numRidesChart.jpg"
    third_image_path = os.path.join(notebook_directory, third_image_filename)
    
    # Check if the third image file exists
    if os.path.exists(third_image_path):
        # Open the third image
        third_img = Image.open(third_image_path)

        # Resize the second image to fit within the first image (adjust as needed)
        max_width = img.width / 2
        max_height = img.height / 2
        third_img.thumbnail((max_width, max_height), Image.ANTIALIAS)

        # Calculate the position to paste the second image (centered)
        x = (img.width - third_img.width) // 2
        y = second_image_y + second_img.height + 24

        # Paste the second image onto the first image
        img.paste(third_img, (x, y))

        plt.figure(figsize=(1080/80, 1920/80))

        # Display the combined image
        plt.imshow(img)
        plt.axis('off')
        plt.show()
    else:
        print(f"The third image file {third_image_filename} does not exist in the notebook directory.")
else:
    print(f"The first image file {image_filename} does not exist in the notebook directory.")

In [None]:
##### STRAVA
### TODO https://developers.strava.com/docs/reference/

In [None]:
### GET TORONTO OPEN DATA SOURCE URL

# import requests

# # Toronto Open Data is stored in a CKAN instance. It's APIs are documented here:
# # https://docs.ckan.org/en/latest/api/

# # To hit our API, you'll be making requests to:
# base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"

# # Datasets are called "packages". Each package can contain many "resources"
# # To retrieve the metadata for this package and its resources, use the package name in this page's URL:
# url = base_url + "/api/3/action/package_show"
# params = { "id": "bike-share-toronto"}
# package = requests.get(url, params = params).json()

# # To get resource data:
# for idx, resource in enumerate(package["result"]["resources"]):

#        # To get metadata for non datastore_active resources:
#        if not resource["datastore_active"]:
#            url = base_url + "/api/3/action/resource_show?id=" + resource["id"]
#            resource_metadata = requests.get(url).json()
#            print(resource_metadata)
#            # From here, you can use the "url" attribute to download this file

# resource_metadata

In [None]:
### GET ~REALTIME BIKE STATION STATUS

# url = 'https://tor.publicbikesystem.net/ube/gbfs/v1/en/station_status'

# res = requests.get(url).json()
# res

In [None]:
## Random person investigating bikeshare

# blog: https://towardsdatascience.com/exploring-toronto-bike-share-ridership-using-python-3dc87d35cb62
# github: https://github.com/open-data-toronto/story-bike-share-ridership/blob/master/get_routes.ipynb