In [None]:
import requests
import json
import pprint
import pytz
import os
import polyline
import gpxpy
import gpxpy.gpx
from geopy.distance import geodesic
from datetime import datetime, timedelta
import time

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urlparse, parse_qs

from env import *

client_id = CLIENT_ID
client_secret = CLIENT_SECRET
redirect_uri = REDIRECT_URI
base_url = BASE_URL

# Specify the path to the newly downloaded ChromeDriver executable
chrome_driver_path = CHROME_DRIVER_PATH

# Specify the file path for cached data
file_path = "../BikeShare/caching.json"

In [None]:


### Get cached data
cached_trip_dict = {}

# Check if the file exists and load data. Assumes if file exists it contains data
if os.path.exists(file_path):
    with open(file_path, "r") as json_file:
        cached_trip_dict = json.load(json_file)
        
    for trip_id, trip_data in cached_trip_dict.items():
        # Check if 'start_date' and 'end_date' keys exist in the dictionary
        if 'start_date' in trip_data:
            # Convert 'start_date' to datetime object
            cached_trip_dict[trip_id]['start_date'] = datetime.fromisoformat(trip_data['start_date'])

        if 'end_date' in trip_data:
            cached_trip_dict[trip_id]['end_date'] = datetime.fromisoformat(trip_data['end_date'])
    
    print(str(len(cached_trip_dict)) + ' rides found')

In [None]:
### HELPER FUNCTIONS

def getIsoStartDate(start_date):
    # Define the input date and time in local timezone (Toronto)
    input_date_time = start_date

    # Define the Toronto timezone
    toronto_timezone = pytz.timezone('America/Toronto')

    # Localize the input date and time to Toronto timezone
    localized_date_time = toronto_timezone.localize(input_date_time)

    # Convert the localized date and time to UTC
    utc_date_time = localized_date_time.astimezone(pytz.utc)

    # Format the UTC date and time in ISO 8601 format
    iso_start_date = utc_date_time.strftime('%Y-%m-%dT%H:%M:%SZ')

    return iso_start_date

# Define a custom encoder for datetime objects
def datetime_encoder(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError("Object of type {} is not JSON serializable".format(type(obj)))

In [None]:
### AUTH FLOW - GET AUTHORIZATION CODE FROM LOGIN, DOESN'T WORK WITH GOOGLE

service = Service(executable_path=chrome_driver_path)
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)

scopes = 'read,activity:write,activity:read_all,profile:read_all'

# Construct the authorization URL
authorization_url = 'https://www.strava.com/oauth/authorize?client_id=' + client_id + '&response_type=code&redirect_uri=https://localhost&scope=' + scopes

# Open the Strava authorization page in the headless browser
driver.get(authorization_url)

try:
    WebDriverWait(driver, 180).until(
        EC.url_contains(redirect_uri)
    )
except TimeoutException:
    print('Timed out waiting for redirection or URL does not contain the redirect_uri.')

current_url = driver.current_url

# Parse the URL to extract the authorization code
url_components = urlparse(current_url)
query_params = parse_qs(url_components.query)

if 'code' in query_params:
    authorization_code = query_params['code'][0]
else:
    print('Authorization Code not found in the URL')

# Close the WebDriver
driver.quit()

In [None]:
### GET ACCESS TOKEN USING AUTHORIZATION CODE

# Endpoint URL to exchange authorization code for an access token
token_url = 'https://www.strava.com/oauth/token'

# Data for the POST request
data = {
    'client_id': client_id,
    'client_secret': client_secret,
    'code': authorization_code,
    'grant_type': 'authorization_code'
}

# Send the POST request to get the access token
response = requests.post(token_url, data=data)

# Parse the response JSON
data = response.json()

# Extract the access token from the response
access_token = data['access_token']

In [None]:
### Get Athlete and id of city bike
headers = {'Authorization': f'Bearer {access_token}',}
url = base_url + 'athlete'
response = requests.get(url, headers=headers)

athlete_details = response.json()

bike_id = ''

for bike in athlete_details['bikes']:
    if (bike['name'] == 'City Bike'):
        bike_id = bike['id']
        
if bike_id == '':
    print("No city bike found")

In [None]:
# # Iterate through the dictionary and remove 'strava_id' from nested dictionaries
# for key, value in cached_trip_dict.items():
#     if 'strava_id' in value:
# #         if key != '75ac1935f60119b496b9363717fa3c9be2d54552fc90a4db686f14cd829af595':
#         value.pop('strava_id')
#     if 'upload_id' in value:
#         value.pop('upload_id')

In [None]:
##### PRIMARY HELPER FUNCTIONS

def getTripsNotInStrava(trip_dict):
    keys_not_in_strava = []
    for key, value in trip_dict.items():
        if 'strava_id' not in value.keys():
            keys_not_in_strava.append(key)
            
    return keys_not_in_strava

def generate_timestamps_for_polyline(polyline_str, start_time, total_duration_seconds):
    # Decode the polyline into latitude and longitude points
    points = polyline.decode(polyline_str)

    # Calculate the total distance of the trip
    total_distance_meters = 0
    for i in range(1, len(points)):
        lat1, lon1 = points[i - 1]
        lat2, lon2 = points[i]
        distance_meters = geodesic((lat1, lon1), (lat2, lon2)).meters
        total_distance_meters += distance_meters

    # Calculate the time interval between timestamps
    time_interval = timedelta(seconds=total_duration_seconds / total_distance_meters)

    # Initialize timestamp and distance traveled
    current_time = start_time
    distance_traveled = 0

    # List to store points with timestamps
    points_with_timestamps = []

    # Assign timestamps to each point
    for i in range(1, len(points)):
        lat1, lon1 = points[i - 1]
        lat2, lon2 = points[i]

        # Calculate distance between consecutive points using Haversine formula
        distance_meters = geodesic((lat1, lon1), (lat2, lon2)).meters

        # Increment distance traveled
        distance_traveled += distance_meters

        # Calculate timestamp based on distance traveled and time interval
        timestamp = current_time + timedelta(seconds=distance_traveled * time_interval.total_seconds())

        # Set the timezone to Toronto (Eastern Time Zone)
        toronto_tz = pytz.timezone('America/Toronto')
        timestamp = toronto_tz.localize(timestamp)

        points_with_timestamps.append((lat1, lon1, timestamp))

    # Add the last point with its timestamp
    points_with_timestamps.append((lat2, lon2, current_time + timedelta(seconds=total_duration_seconds)))

    return points_with_timestamps


def create_gpx_file(points_with_timestamps, gpx_file_name):
    # Create a new GPX object
    gpx = gpxpy.gpx.GPX()

    # Create a GPX track and add it to the GPX object
    gpx_track = gpxpy.gpx.GPXTrack()
    gpx.tracks.append(gpx_track)

    # Create a GPX segment and add it to the track
    gpx_segment = gpxpy.gpx.GPXTrackSegment()
    gpx_track.segments.append(gpx_segment)

    # Add points with timestamps to the GPX segment
    for lat, lon, timestamp in points_with_timestamps:
        gpx_segment.points.append(gpxpy.gpx.GPXTrackPoint(lat, lon, time=timestamp))

    # Serialize the GPX data to a file
    with open(gpx_file_name, 'w') as gpx_file:
        gpx_file.write(gpx.to_xml())


def convert_datetime_to_epoch(date_time_obj):
    try:
        # Convert the datetime object to epoch time (Unix timestamp)
        epoch_time = int(date_time_obj.timestamp())
        
        return epoch_time
    except ValueError:
        return None  # Return None for invalid input
    
    
    
#### LOOPING HELPER FUNCTION
### ASSIGN STRAVA ID TO TRIP DICT BASED ON UPLOAD ID 
def assignStravaIdForUpload(base_url, access_token, trip_dict):

    this_upload_id = trip_dict[trip_id]['upload_id']

    # Get time range to filter activities being returned
    start_date = trip_data['start_date']
    end_date = trip_data['end_date']

    after = convert_datetime_to_epoch(start_date) - 1000
    before = convert_datetime_to_epoch(end_date) + 1000

    # Get activities from Strava
    activities = getActivities(base_url, access_token, before, after)

    # Find uploaded activity based on the upload_id
    uploaded_activity = get_activity_upload(activities, this_upload_id)

    # Assign the strava_id to the trip_dict
    if uploaded_activity:
        trip_dict[trip_id]['strava_id'] = uploaded_activity['id']
        strava_id = trip_dict[trip_id]['strava_id']
    else:
        print("No activity found, likely upload still processing")

    return trip_dict

### UPDATE UPLOADED ACTIVITY
def updateData(base_url, access_token, bike_id, trip_id, strava_id):
    updated_data = {
        'hide_from_home': True,
        'gear_id': bike_id,
        'commute': True,
        'external_id': trip_id,
        'type': 'Ride'
    }

    res = updateActivity(updated_data, base_url, access_token, strava_id)
#     print(res)

### WRITE UPDATED TRIP DICT VALUES TO CACHE
def updateCache(file_path, trip_dict, cached_trip_dict):

    ### Write updated trip_dict to caching.json 

    # Set trip_dict values to cached_trip_dict
    for key, value in cached_trip_dict.items():
        if key in trip_dict.keys():
            cached_trip_dict[key] == trip_dict[key]

    # Save the dictionary to a JSON file with the custom encoder
    with open(file_path, "w") as json_file:
        json.dump(cached_trip_dict, json_file, default=datetime_encoder)

        
### Call Strava to upload
def uploadGPXFileHelper(base_url, access_token, gpx_file_name, trip_dict):
    
    trip_data = cached_trip_dict[trip_id]

    
    start_date = trip_data['start_date']
    distance = trip_data['bike_dist']

    trip_long_title_str = 'PBSC Ride, ' + start_date.strftime('%b %d %H:%M')
    trip_descrip_str = "City Bike: " + trip_data['start_station'] + ' to ' + trip_data['end_station'] + '\n\n' + trip_id
    
    gpx_file_path = gpx_file_name

    data = {
        'name': trip_long_title_str,
        'description': "City Bike: " + trip_data['start_station'] + ' to ' + trip_data['end_station'] + '\n\n' + trip_id,
        'commute': True,
        'data_type': 'gpx',
        'description': trip_descrip_str,
        'external_id': trip_id,
        'type': 'Ride'
    }

    response = upload_activity_with_gpx_file(base_url, access_token, gpx_file_path, data)

    data = response.json()
    upload_id = data['id']

    if response.status_code == 201:
    #     print('GPX activity uploaded successfully.')
        trip_dict[trip_id]['upload_id'] = upload_id

    else:
        print(f'Error uploading GPX activity. Status code: {response.status_code}')
        print(response.text)

        
### Update activity
def updateActivity(data, base_url, access_token, activity_id):

    endpoint = 'activities/' + str(activity_id)
    url = base_url + endpoint

    headers = {'Authorization': f'Bearer {access_token}'}

    res = requests.put(url, headers=headers, json=data)

    return res

In [None]:
##### API FUNCTIONS

### UPLOAD GPX FILE
def upload_activity_with_gpx_file(base_url, access_token, gpx_file_path, data):
    url = base_url + 'uploads'
    headers = {'Authorization': f'Bearer {access_token}',}

    files = {'file': ('activity.gpx', open(gpx_file_path, 'rb'), 'application/gpx+xml'),}

    response = requests.post(url, headers=headers, data=data, files=files)

    return response

### GET ACTIVITY BASED ON UPLOAD ID
def get_activity_upload(activities, _id):
    for activity in activities:
        if activity.get('upload_id') == _id:
            return activity
    return None  # Return None if no matching activity is found

### GET CURRENT LOGGED IN ATHLETE ID
def getAthleteId(base_url, access_token):
    # Get athlete
    url = base_url + 'athlete'
    headers = {'Authorization': f'Bearer {access_token}'}

    r = requests.get(url, headers=headers)

    my_id = r.json()['id']
    return my_id

### GET ACTIVITIES, FILTERABLE BY TIME RANGE
def getActivities(base_url, access_token, before, after):
    get_activities_endpoint = '/athlete/activities'
    
    url = base_url + get_activities_endpoint + '?before=' + str(before) + '&after=' + str(after)
    
    headers = {'Authorization': f'Bearer {access_token}'}

    r = requests.get(url, headers = headers)
    activities = r.json()
    print(r)
    
    return activities


In [None]:
### CREATE ENTRIES ON LOOP

counter = 0

while True:
    
    ### Import PBSC data, only keeping trips that don't have a Strava ID registered
    keys_to_keep = getTripsNotInStrava(cached_trip_dict)
    print(str(len(keys_to_keep)) + " trips not in Strava, out of a total " + str(len(cached_trip_dict)))
    if len(keys_to_keep) == 0:
        print("All trips created")
        break
    
    trip_dict = {key: value for key, value in cached_trip_dict.items() if key in keys_to_keep}

    ### Find first record not already uploaded to Strava
    trip_id = next(iter(trip_dict))
    trip_data = trip_dict[trip_id]

    polyline_str = trip_data['polyline']
    start_date = trip_data['start_date']
    total_duration_seconds = trip_data['duration']
    trip_long_title_str = 'PBSC Ride, ' + start_date.strftime('%b %d %H:%M')

    # Get inferred timestamps for polyline, req'd for Strava
    points_with_timestamps = generate_timestamps_for_polyline(polyline_str, start_date, total_duration_seconds)

    # Create GPX file, save it in gpx folder
    folder_path = 'gpx_files/'
    gpx_file_name = folder_path + trip_long_title_str + ".gpx"
    create_gpx_file(points_with_timestamps, gpx_file_name)

    uploadGPXFileHelper(base_url, access_token, gpx_file_name, trip_dict)

    print(str(trip_dict[trip_id]['upload_id']) + " upload created")
    
    # Wait for 8 seconds to allow for upload processing
    time.sleep(8)

    ### UPDATE ENTRY

    trip_dict = assignStravaIdForUpload(base_url, access_token, trip_dict)
    strava_id = trip_dict[trip_id]['strava_id']

    updateData(base_url, access_token, bike_id, trip_id, strava_id)

    
    
    ### WRITE TO CACHE
    updateCache(file_path, trip_dict, cached_trip_dict)
    
    print(str(strava_id) + " ride created")
    
    counter += 1
    if counter >= 10:
        break

In [None]:
### ONE OFF

In [None]:
### Import PBSC data, only keeping trips that don't have a Strava ID registered
keys_to_keep = getTripsNotInStrava(cached_trip_dict)
print(str(len(keys_to_keep)) + " trips not in Strava, out of a total " + str(len(cached_trip_dict)))
trip_dict = {key: value for key, value in cached_trip_dict.items() if key in keys_to_keep}

### Find first record not already uploaded to Strava
if len(keys_to_keep) > 0:
    trip_id = next(iter(trip_dict))
else:
    print("No new trips")

In [None]:
trip_data = cached_trip_dict[trip_id]

polyline_str = trip_data['polyline']
start_date = trip_data['start_date']
distance = trip_data['bike_dist']
total_duration_seconds = trip_data['duration']

trip_long_title_str = 'PBSC Ride, ' + start_date.strftime('%b %d %H:%M')
trip_descrip_str = "City Bike: " + trip_data['start_station'] + ' to ' + trip_data['end_station'] + '\n\n' + trip_id

In [None]:
# Get inferred timestamps for polyline, req'd for Strava
points_with_timestamps = generate_timestamps_for_polyline(polyline_str, start_date, total_duration_seconds)

# Create GPX file, save it in gpx folder
folder_path = 'gpx_files/'
gpx_file_name = folder_path + trip_long_title_str + ".gpx"
create_gpx_file(points_with_timestamps, gpx_file_name)


In [None]:
# ### Check timestamp diff for trip duration

# print(points_with_timestamps[-1][2] - points_with_timestamps[0][2])

In [None]:
### Call Strava to upload
gpx_file_path = gpx_file_name

data = {
    'name': trip_long_title_str,
    'description': "City Bike: " + trip_data['start_station'] + ' to ' + trip_data['end_station'] + '\n\n' + trip_id,
    'commute': True,
    'data_type': 'gpx',
    'description': trip_descrip_str,
    'external_id': trip_id,
    'type': 'Ride'
}

response = upload_activity_with_gpx_file(base_url, access_token, gpx_file_path, data)

data = response.json()
upload_id = data['id']

if response.status_code == 201:
#     print('GPX activity uploaded successfully.')
    trip_dict[trip_id]['upload_id'] = upload_id
    
else:
    print(f'Error uploading GPX activity. Status code: {response.status_code}')
    print(response.text)

    

In [None]:
this_upload_id = trip_dict[trip_id]['upload_id']

# Get time range to filter activities being returned
start_date = trip_data['start_date']
end_date = trip_data['end_date']

after = convert_datetime_to_epoch(start_date) - 1000
before = convert_datetime_to_epoch(end_date) + 1000

# Get activities from Strava
activities = getActivities(base_url, access_token, before, after)

# Find uploaded activity based on the upload_id
uploaded_activity = get_activity_upload(activities, this_upload_id)

# Assign the strava_id to the trip_dict
if uploaded_activity:
    trip_dict[trip_id]['strava_id'] = uploaded_activity['id']
    strava_id = trip_dict[trip_id]['strava_id']
else:
    print("No activity found, likely upload still processing")


In [None]:
### UPDATE UPLOADED ACTIVITY

updated_data = {
    'hide_from_home': True,
    'gear_id': bike_id,
    'commute': True,
    'external_id': trip_id,
    'gear_id': 'b13308146',
    'type': 'Ride'
}

res = updateActivity(updated_data, base_url, access_token, strava_id)
res

In [None]:
### Write updated trip_dict to caching.json 

# Set trip_dict values to cached_trip_dict
for key, value in cached_trip_dict.items():
    if key in trip_dict.keys():
        cached_trip_dict[key] == trip_dict[key]
        
# Save the dictionary to a JSON file with the custom encoder
with open(file_path, "w") as json_file:
    json.dump(cached_trip_dict, json_file, default=datetime_encoder)


In [None]:
activities = getActivities(base_url, access_token, before, after)
activities

In [None]:

# trip_dict = assignStravaIdForUpload(base_url, access_token, trip_dict)
# strava_id = trip_dict[trip_id]['strava_id']

# updateData(base_url, access_token, 'b13308146', trip_id, strava_id)

    
    
    ### WRITE TO CACHE
# updateCache(file_path, trip_dict, cached_trip_dict)

In [None]:
### TO DO - GET CARBON SAVED

In [None]:
# import re
# from bs4 import BeautifulSoup

# def extract_carbon_saved(html_text):
#     # Parse the HTML using BeautifulSoup
#     soup = BeautifulSoup(html_text, 'html.parser')

#     # Find the table row with the "Carbon Saved" label
#     carbon_saved_row = soup.find('th', text='Carbon Saved').find_parent('tr')

#     # Extract the value from the row
#     carbon_saved_value = carbon_saved_row.find('td').get_text(strip=True)

#     # Use regex to extract the numeric part
#     numeric_value = re.search(r'(\d+\.\d+)', carbon_saved_value).group(1)

#     return numeric_value



In [None]:
# # Read HTML content from the file
# html_file_path = '../trip_html_response.html'
# with open(html_file_path, 'r', encoding='utf-8') as file:
#     html_content = file.read()

# # Extract the numeric value of "Carbon Saved"
# carbon_saved_numeric = extract_carbon_saved(html_content)
# print("Carbon Saved (Numeric):", carbon_saved_numeric)


In [None]:
### OLD METHOD, WITHOUT MAP - JUST CREATE ACTIVITY

In [None]:
# ### Create activity

# def createActivity(data, base_url, access_token):

#     endpoint = 'activities'
#     url = base_url + endpoint

#     headers = {'Authorization': f'Bearer {access_token}'}

#     res = requests.post(url, headers=headers, data=data)
    
#     return res



In [None]:
# ### LOOP TO CREATE ACTIVITIES AND UPDATE THEM TO BE CITY BIKE AND HIDDEN
# ### RATE LIMIT IS 200 REQUESTS EVERY 15 MINS, 2000 PER DAY

# counter = 0

# # Define data structure for updating, as initial create field doesn't seem to take these fields
# updated_data = {
#     'hide_from_home': True,
#     'gear_id': 'b13308146',
#     'commute': True
# }

# activity_ids = []
# trip_ids_with_issue = []

# for trip_id, trip_data in trip_dict.items():
    
#     original_start_date = trip_data['start_date']

#     trip_title_str = 'PBSC Ride, ' + original_start_date.strftime('%b %d %H:%M')
#     trip_descrip_str = "City Bike: " + trip_data['start_station'] + ' to ' + trip_data['end_station'] + '\n\n' + trip_id
#     iso_start_date = getIsoStartDate(original_start_date)
#     duration = trip_data['duration']
#     distance = trip_data['bike_dist']
    
#     data = {
#         'name': trip_title_str,
#         'type': 'Ride',  # Replace with the appropriate activity type
#         'start_date_local': iso_start_date,  # ISO 8601 formatted date time in UTC
#         'elapsed_time': duration,
#         'description': trip_descrip_str,
#         'distance': distance,
#         'trainer': False,  # Set to False to mark as a non-trainer activity
#         'commute': True,  # Set to False to mark as a non-commute activity
#     }
    
#     counter += 1
#     if counter == 60:
#         break
    
#     ### CREATE ACTIVITY
#     try:
#         res = createActivity(data, base_url, access_token)

#         activity_id = res.json()['id']
#         activity_ids.append(activity_id)
#         trip_dict[trip_id]['strava_id'] = activity_id

#     except:
#         trip_ids_with_issue.append(trip_id)
#         print("err on trip: " + trip_id)
    
    
#     ### UPDATE ACTIVITY
#     try:
#         res = updateActivity(updated_data, base_url, access_token, activity_id)
        
#     except:
#         trip_ids_with_issue.append(trip_id)
#         print("err on updating activity " + str(activity_id))
        
    

In [None]:
##### MISC PLAYING AROUND WITH STRAVA API

In [None]:
bike_id

In [None]:
# uploaded_activity = activites['name']

In [None]:
# # Replace this with the activity ID you want to retrieve
# activity_id = 9950366076

# # Endpoint URL to get activity details
# activity_url = f'https://www.strava.com/api/v3/activities/{activity_id}'

# # Set the authorization header with the access token
# headers = {'Authorization': f'Bearer {access_token}'}

# # Send the GET request to get activity details
# response = requests.get(activity_url, headers=headers)
# activity_data = response.json()
# pprint.pprint(activity_data)

In [None]:
# activity_data['segment_efforts']

In [None]:
# # Replace this with the activity ID you want to retrieve
# segment_id = 3120856183202904508

# # Endpoint URL to get activity details
# segment_url = f'https://www.strava.com/api/v3/segment_efforts/{segment_id}'

# # Set the authorization header with the access token
# headers = {'Authorization': f'Bearer {access_token}'}

# # Send the GET request to get activity details
# response = requests.get(segment_url, headers=headers)
# segment_data = response.json()
# pprint.pprint(segment_data)

In [None]:
# segment_url

In [None]:
# # Replace this with the segment ID you want to retrieve
# segment_id = 3217667

# # Endpoint URL to get activity details
# segment_url = f'https://www.strava.com/api/v3/segments/{segment_id}'

# # Set the authorization header with the access token
# headers = {'Authorization': f'Bearer {access_token}'}

# # Send the GET request to get activity details
# response = requests.get(segment_url, headers=headers)
# segment_data = response.json()
# pprint.pprint(segment_data)

In [None]:
# activity_id = 9949481813

# # Endpoint URL to get activity details
# activity_url = f'https://www.strava.com/api/v3/activities/{activity_id}'

# # Set the authorization header with the access token
# headers = {'Authorization': f'Bearer {access_token}'}

# # Send the GET request to get activity details
# response = requests.get(activity_url, headers=headers)
# activity_data = response.json()
# pprint.pprint(activity_data)

In [None]:


# url = construct_request(get_stats_endpoint, access_token)
# r = requests.get(url)
# data = r.json()
# # data