From d18f23d6f0f049290d6c1fa8e1cd50096f275efe Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Thu, 1 Oct 2020 01:48:27 -0400
Subject: [PATCH 01/23] Provide a ScrapingClient that doesn't need API access

Also adds the ability to list activities using web scraping instead of
the API. The activities are returned as `ScrapedActivity` objects that
are mostly compatible with the normal `Activity` objects that are
returned by the list activities function that uses the API.
---
 stravaweblib/webclient.py | 207 +++++++++++++++++++++++++++++++++-----
 1 file changed, 182 insertions(+), 25 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 4e1e5e9..a3baee9 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -6,22 +6,92 @@
 import functools
 import json
 import time
+import uuid
 
 from bs4 import BeautifulSoup
 import requests
 import stravalib
+from stravalib.attributes import Attribute, TimestampAttribute, TimeIntervalAttribute
+from stravalib.model import Activity, BaseEntity
 
 
-__all__ = ["WebClient", "FrameType", "DataFormat", "ExportFile", "ActivityFile"]
+__all__ = ["WebClient", "ScrapingClient", "FrameType", "DataFormat", "ExportFile", "ActivityFile", "ScrapedActivity"]
 
 
 BASE_URL = "https://www.strava.com"
 
+# Used for filtering when scraping the activity list
+ACTIVITY_WORKOUT_TYPES = {
+    "Ride": {None: 10, "Race": 11, "Workout": 12},
+    "Run": {None: 0, "Race": 1, "Long Run": 2, "Workout": 3}
+}
 
 ExportFile = namedtuple("ExportFile", ("filename", "content"))
 ActivityFile = ExportFile  # TODO: deprecate and remove
 
 
+class ScrapingError(ValueError):
+    """An error that is retured when something fails during scraping
+
+    This can happen because something on the website changed.
+    """
+
+
+class ScrapedActivity(BaseEntity):
+    """
+    Represents an Activity (ride, run, etc.) that was scraped from the website
+
+    The attributes are compatible with stravalib.model.Activity where they exist
+    """
+
+    id = Attribute(int)
+    name = Attribute(str)
+    description = Attribute(str)
+    type = Attribute(str)
+    workout_type = Attribute(str)
+
+    start_date = TimestampAttribute()
+    distance = Attribute(float)
+    moving_time = TimeIntervalAttribute()
+    elapsed_time = TimeIntervalAttribute()
+    total_elevation_gain = Attribute(float)
+    suffer_score = Attribute(int)
+    calories = Attribute(float)
+    gear_id = Attribute(str)
+
+    # True if the activity has GPS coordinates
+    # False for trainers, manual activities, etc
+    has_latlng = Attribute(bool)
+
+    trainer = Attribute(bool)
+    commute = Attribute(bool)
+    private = Attribute(bool)
+    flagged = Attribute(bool)
+
+    def from_dict(self, d):
+        bike_id = d.pop("bike_id", None)
+        shoes_id = d.pop("athlete_gear_id", None)
+        if bike_id:
+            d["gear_id"] = "b{}".format(bike_id)
+        elif shoes_id:
+            d["gear_id"] = "g{}".format(shoes_id)
+
+        d["start_date"] = d.pop("start_time")
+        d["distance"] = d.pop("distance_raw")
+        d["moving_time"] = d.pop("moving_time_raw")
+        d["elapsed_time"] = d.pop("elapsed_time_raw")
+        d["total_elevation_gain"] = d.pop("elevation_gain_raw")
+
+        wt = d.pop("workout_type")
+        if d["type"] in ACTIVITY_WORKOUT_TYPES:
+            for k, v in ACTIVITY_WORKOUT_TYPES[d["type"]].items():
+                if wt == v:
+                    d["workout_type"] = k
+                    break
+
+        return super().from_dict(d)
+
+
 class DataFormat(enum.Enum):
     ORIGINAL = "original"
     GPX = "gpx"
@@ -48,10 +118,11 @@ def __str__(self):
         return str(self.name).replace("_", " ").title()
 
 
-class WebClient(stravalib.Client):
+class ScrapingClient:
     """
-    An extension to the stravalib Client that fills in some of the gaps in
-    the official API using web scraping.
+    A client that uses web scraping to interface with Strava.
+
+    Can be used as a mixin to add the extra methods to the main stravalib.Client
     """
 
     def __init__(self, *args, **kwargs):
@@ -75,20 +146,8 @@ def __init__(self, *args, **kwargs):
         else:
             raise ValueError("'jwt' or both of 'email' and 'password' are required")
 
-        # Init the normal stravalib client with remaining args
         super().__init__(*args, **kwargs)
 
-        # Verify that REST API and Web API correspond to the same Strava user account
-        if self.access_token is not None:
-            rest_id = str(self.get_athlete().id)
-            web_id = self._session.cookies.get('strava_remember_id')
-            if rest_id != web_id:
-                raise stravalib.exc.LoginFailed("API and web credentials are for different accounts")
-        else:
-            # REST API does not have an access_token (yet). Should we verify the match after
-            # exchange_code_for_token()?
-            pass
-
     @property
     def jwt(self):
         return self._session.cookies.get('strava_remember_token')
@@ -155,6 +214,89 @@ def _login_with_password(self, email, password):
         if not resp.is_redirect or resp.next.url == "{}/login".format(BASE_URL):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
+    def scrape_activities(self, keywords=None, activity_type=None, workout_type=None,
+                          commute=False, is_private=False, indoor=False, gear_id=None):
+        """A scraping-based alternative to stravalib.Client.get_activities()
+
+        Note that when using multiple parameters they are treated as AND, not OR
+
+        :param keywords: Text to search for
+        :param activity_type: The type of the activity. See stravalib.model:Activity.TYPES
+        :param workout_type: The type of workout ("Race", "Workout", etc)
+        :param commute: Only return activities marked as commutes
+        :param is_private: Only return private activities
+        :param indoor: Only return indoor/trainer activities
+        :param gear_id: Only return activities using this gear
+
+        :yield: ScrapedActivity objects
+        """
+
+        if activity_type is not None and activity_type not in Activity.TYPES:
+            raise ValueError(
+                "Invalid activity type. Must be one of: {}".format(",".join(Activity.TYPES))
+            )
+
+        if activity_type in ACTIVITY_WORKOUT_TYPES:
+            workout_type = ACTIVITY_WORKOUT_TYPES[activity_type].get(workout_type)
+            if workout_type is None:
+                raise ValueError(
+                    "Invalid workout type for a {}. Must be one of: {}".format(
+                        activity_type,
+                        ", ".join(ACTIVITY_WORKOUT_TYPES[activity_type].keys())
+                    )
+                )
+        elif workout_type is not None or gear_id is not None:
+            raise ValueError(
+                "Can only filter using workout type of gear when activity type is one of: {}".format(
+                    ", ".join(ACTIVITY_WORKOUT_TYPES.keys())
+                )
+            )
+
+        page = 1
+        per_page = 20
+        search_session_id = uuid.uuid4()
+
+        conv_bool = lambda x: "" if not x else "true"
+
+        while True:
+            resp = self._session.get(
+                "{}/athlete/training_activities".format(BASE_URL),
+                headers= {
+                    "Accept": "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript",
+                    #"X-CSRF-Token": next(iter(self.csrf.values())),
+                    "X-Requested-With": "XMLHttpRequest",
+                },
+                params={
+                    "search_session_id": search_session_id,
+                    "page": page,
+                    "per_page": per_page,
+                    "keywords": keywords,
+                    "new_activity_only": "false",
+                    "activity_type": activity_type or "",
+                    "commute": conv_bool(commute),
+                    "private_activities": conv_bool(is_private),
+                    "trainer": conv_bool(indoor),
+                    "gear": gear_id or "",
+                }
+            )
+            if resp.status_code != 200:
+                raise stravalib.exc.Fault(
+                    "Failed to list activities (status code {})".format(resp.status_code)
+                )
+            try:
+                data = resp.json()["models"]
+            except (ValueError, TypeError, KeyError) as e:
+                raise ScrapingError(
+                    "Invalid JSON response from Strava"
+                ) from e
+
+            for activity in data:
+                yield ScrapedActivity(**activity)
+
+            # No results = stop requesting pages
+            if not data:
+                break
+
     def delete_activity(self, activity_id):
         """
         Deletes the specified activity.
@@ -201,8 +343,7 @@ def _make_export_file(resp, id_):
             content=resp.iter_content(chunk_size=16*1024)  # 16KB
         )
 
-    def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL,
-                          json_fmt=None):
+    def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL, json_fmt=None):
         """
         Get a file containing the provided activity's data
 
@@ -231,6 +372,9 @@ def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL,
         fmt = DataFormat.classify(fmt)
         url = "{}/activities/{}/export_{}".format(BASE_URL, activity_id, fmt)
         resp = self._session.get(url, stream=True, allow_redirects=False)
+
+        # Gives a 302 back to the activity URL when trying to export a manual activity
+        # TODO: Does this also happen with other errors?
         if resp.status_code != 200:
             raise stravalib.exc.Fault("Status code '{}' received when trying "
                                       "to download an activity"
@@ -246,7 +390,8 @@ def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL,
 
         return self._make_export_file(resp, activity_id)
 
-    def _parse_date(self, date_str):
+    @staticmethod
+    def _parse_date(date_str):
         if not date_str:
             return None
         if date_str.lower() == "since beginning":
@@ -254,7 +399,7 @@ def _parse_date(self, date_str):
             return datetime.utcfromtimestamp(0).date()
         try:
             return datetime.strptime(date_str, "%b %d, %Y").date()
-        except ValueError as e:
+        except ValueError:
             return None
 
     @functools.lru_cache()
@@ -278,12 +423,15 @@ def _get_all_bike_components(self, bike_id):
                 "Failed to load bike details page (status code: {})".format(resp.status_code),
             )
 
-        soup = BeautifulSoup(resp.text, 'html.parser')
-        for table in soup.find_all('table'):
-            if table.find('thead'):
+        soup = BeautifulSoup(resp.text, 'html5lib')
+        table = None
+        for t in soup.find_all('table'):
+            if t.find('thead'):
+                table = t
                 break
-        else:
-            raise ValueError("Bike component table not found in the HTML - layout update?")
+
+        if not table:
+            raise ScrapingError("Bike component table not found in the HTML - layout update?")
 
         components = []
         for row in table.tbody.find_all('tr'):
@@ -363,6 +511,15 @@ def get_route_data(self, route_id, fmt=DataFormat.GPX):
         return self._make_export_file(resp, route_id)
 
 
+# Mix in the ScrapingClient to inherit all its methods
+class WebClient(ScrapingClient, stravalib.Client):
+    """
+    An extension to the stravalib Client that fills in some of the gaps in
+    the official API using web scraping.
+
+    Requires a JWT or both of email and password
+    """
+
 
 # Inherit parent documentation for WebClient.__init__
 WebClient.__init__.__doc__ = stravalib.Client.__init__.__doc__ + \

From cc9e606114e4d7cf95798d41748a0ac0767391b8 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Sat, 3 Oct 2020 23:51:37 -0400
Subject: [PATCH 02/23] Add the ability to scrape photos

---
 stravaweblib/webclient.py | 84 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 79 insertions(+), 5 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index a3baee9..7518c70 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -5,17 +5,22 @@
 import enum
 import functools
 import json
+import re
 import time
 import uuid
 
 from bs4 import BeautifulSoup
 import requests
 import stravalib
-from stravalib.attributes import Attribute, TimestampAttribute, TimeIntervalAttribute
-from stravalib.model import Activity, BaseEntity
+from stravalib.attributes import (Attribute, TimestampAttribute,
+                                  TimeIntervalAttribute, LocationAttribute)
+from stravalib.model import Activity, BaseEntity, BoundEntity
 
 
-__all__ = ["WebClient", "ScrapingClient", "FrameType", "DataFormat", "ExportFile", "ActivityFile", "ScrapedActivity"]
+__all__ = [
+    "WebClient", "ScrapingClient", "FrameType", "DataFormat", "ExportFile",
+    "ActivityFile", "ScrapedActivity", "ScrapedPhoto"
+]
 
 
 BASE_URL = "https://www.strava.com"
@@ -26,6 +31,8 @@
     "Run": {None: 0, "Race": 1, "Long Run": 2, "Workout": 3}
 }
 
+PHOTOS_REGEX = re.compile(r"var photosJson\s*=\s*(\[.*\]);")
+
 ExportFile = namedtuple("ExportFile", ("filename", "content"))
 ActivityFile = ExportFile  # TODO: deprecate and remove
 
@@ -37,7 +44,38 @@ class ScrapingError(ValueError):
     """
 
 
-class ScrapedActivity(BaseEntity):
+class ScrapedPhoto(BaseEntity):
+    """Represents a photo scraped from Strava's activity details page
+
+    The attributes are compatible with stravalib.models.ActivityPhoto where
+    they exist.
+    """
+
+    unique_id = Attribute(str)
+    activity_id = Attribute(int)
+    athlete_id = Attribute(int)
+    caption = Attribute(str)
+
+    location = LocationAttribute()
+
+    urls = Attribute(dict) # dimension: url
+
+    def from_dict(self, d):
+        d["unique_id"] = d.pop("photo_id")
+        d["athlete_id"] = d.pop("owner_id")
+
+        # The caption has unicode escapes (ie. \uFFFF) embedded in the string
+        d["caption"] = d.pop("caption_escaped", "").encode("utf-8").decode("unicode_escape")
+        d["urls"] = {
+            str(min(dim.values())): d.pop(name)
+            for name, dim in d.pop("dimensions").items()
+        }
+        d["location"] = [d.pop("lat"), d.pop("lng")]
+
+        return super().from_dict(d)
+
+
+class ScrapedActivity(BoundEntity):
     """
     Represents an Activity (ride, run, etc.) that was scraped from the website
 
@@ -68,6 +106,20 @@ class ScrapedActivity(BaseEntity):
     private = Attribute(bool)
     flagged = Attribute(bool)
 
+    _photos = None
+
+    @property
+    def photos(self):
+        """Returns a list of ScrapedPhoto objects"""
+        if self._photos is None:
+            self.assert_bind_client()
+            self._photos = self.bind_client.scrape_activity_photos(self.id)
+        return self._photos
+
+    @property
+    def total_photo_count(self):
+        return len(self.photos)
+
     def from_dict(self, d):
         bike_id = d.pop("bike_id", None)
         shoes_id = d.pop("athlete_gear_id", None)
@@ -214,6 +266,28 @@ def _login_with_password(self, email, password):
         if not resp.is_redirect or resp.next.url == "{}/login".format(BASE_URL):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
+    def scrape_activity_photos(self, activity_id):
+        """Get photos for an activity"""
+        resp = self._session.get("{}/activities/{}".format(BASE_URL, activity_id))
+        resp.raise_for_status()
+
+        soup = BeautifulSoup(resp.content, 'html5lib')
+        try:
+            script = next((x for x in soup.find_all("script") if "var photosJson" in x.text))
+        except StopIteration:
+            raise ScrapingError("Failed to find photo data in page")
+
+        m = PHOTOS_REGEX.search(script.text)
+        if not m:
+            raise ScrapingError("Failed to extract photo data from page")
+
+        try:
+            photos = json.loads(m.group(1))
+        except (TypeError, ValueError) as e:
+            raise ScrapingError("Failed to parse extracted photo data") from e
+
+        return [ScrapedPhoto(**p) for p in photos]
+
     def scrape_activities(self, keywords=None, activity_type=None, workout_type=None,
                           commute=False, is_private=False, indoor=False, gear_id=None):
         """A scraping-based alternative to stravalib.Client.get_activities()
@@ -291,7 +365,7 @@ def scrape_activities(self, keywords=None, activity_type=None, workout_type=None
                 ) from e
 
             for activity in data:
-                yield ScrapedActivity(**activity)
+                yield ScrapedActivity(bind_client=self, **activity)
 
             # No results = stop requesting pages
             if not data:

From 5cab40dc86440b6d5060dac82e1f26f9e3be07d3 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Sat, 3 Oct 2020 23:52:25 -0400
Subject: [PATCH 03/23] Remove caching for bike component scraping

This should be done by the library consumer if it's needed
---
 stravaweblib/webclient.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 7518c70..094538d 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -3,7 +3,6 @@
 from collections import namedtuple
 from datetime import date, datetime
 import enum
-import functools
 import json
 import re
 import time
@@ -476,7 +475,6 @@ def _parse_date(date_str):
         except ValueError:
             return None
 
-    @functools.lru_cache()
     def _get_all_bike_components(self, bike_id):
         """
         Get all components for the specified bike

From 0f55bd30c2e931ba71958190b2b3d40789991d17 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Sun, 4 Oct 2020 02:30:28 -0400
Subject: [PATCH 04/23] Make the ScrapingClient somewhat api-compatible

It's not going to be perfect, but the idea is that for the most basic of
cases it should be a pretty close replacement. The goal is to keep the
amount of work to support both API and scraping-based clients to a minimum.

To support this, the WebClient now uses delegation instead of
inheritance to add scraper-based functionality. This enables the
`ScrapingClient` class to use the same function names without
automatically overriding the `stravalib.Client` functions when used
through the `WebClient` class.
---
 stravaweblib/webclient.py | 84 +++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 13 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 094538d..0bf1509 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -3,6 +3,7 @@
 from collections import namedtuple
 from datetime import date, datetime
 import enum
+import functools
 import json
 import re
 import time
@@ -112,7 +113,7 @@ def photos(self):
         """Returns a list of ScrapedPhoto objects"""
         if self._photos is None:
             self.assert_bind_client()
-            self._photos = self.bind_client.scrape_activity_photos(self.id)
+            self._photos = self.bind_client.get_activity_photos(self.id)
         return self._photos
 
     @property
@@ -265,8 +266,13 @@ def _login_with_password(self, email, password):
         if not resp.is_redirect or resp.next.url == "{}/login".format(BASE_URL):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
-    def scrape_activity_photos(self, activity_id):
-        """Get photos for an activity"""
+    def get_activity_photos(self, activity_id):
+        """A scraping-based alternative to stravalib.Client.get_activity_photos
+
+        :param activity_id: The activity for which to fetch photos.
+
+        :return: A list of ScrapedPhoto objects
+        """
         resp = self._session.get("{}/activities/{}".format(BASE_URL, activity_id))
         resp.raise_for_status()
 
@@ -287,9 +293,10 @@ def scrape_activity_photos(self, activity_id):
 
         return [ScrapedPhoto(**p) for p in photos]
 
-    def scrape_activities(self, keywords=None, activity_type=None, workout_type=None,
-                          commute=False, is_private=False, indoor=False, gear_id=None):
-        """A scraping-based alternative to stravalib.Client.get_activities()
+    def get_activities(self, keywords=None, activity_type=None, workout_type=None,
+                       commute=False, is_private=False, indoor=False, gear_id=None,
+                       before=None, after=None, limit=None):
+        """A scraping-based alternative to stravalib.Client.get_activities
 
         Note that when using multiple parameters they are treated as AND, not OR
 
@@ -301,6 +308,14 @@ def scrape_activities(self, keywords=None, activity_type=None, workout_type=None
         :param indoor: Only return indoor/trainer activities
         :param gear_id: Only return activities using this gear
 
+        Parameters for compatibility with stravalib.Client.get_activities:
+
+        :param before: Result will start with activities whose start date is
+                       before specified date. (UTC)
+        :param after: Result will start with activities whose start date is after
+                      specified value. (UTC)
+        :param limit: How many maximum activities to return.
+
         :yield: ScrapedActivity objects
         """
 
@@ -325,6 +340,10 @@ def scrape_activities(self, keywords=None, activity_type=None, workout_type=None
                 )
             )
 
+        before = stravalib.Client._utc_datetime_to_epoch(None, before or datetime.max)
+        after = stravalib.Client._utc_datetime_to_epoch(None, after or datetime.min)
+
+        num_yielded = 0
         page = 1
         per_page = 20
         search_session_id = uuid.uuid4()
@@ -364,11 +383,20 @@ def scrape_activities(self, keywords=None, activity_type=None, workout_type=None
                 ) from e
 
             for activity in data:
-                yield ScrapedActivity(bind_client=self, **activity)
+                # Respect the limit
+                if limit is not None and num_yielded >= limit:
+                    return
+
+                activity = ScrapedActivity(bind_client=self, **activity)
 
-            # No results = stop requesting pages
+                # Respect the before and after filters
+                if after < activity.start_date.timestamp() < before:
+                    yield activity
+                    num_yielded += 1
+
+            # No results = done
             if not data:
-                break
+                return
 
     def delete_activity(self, activity_id):
         """
@@ -583,8 +611,8 @@ def get_route_data(self, route_id, fmt=DataFormat.GPX):
         return self._make_export_file(resp, route_id)
 
 
-# Mix in the ScrapingClient to inherit all its methods
-class WebClient(ScrapingClient, stravalib.Client):
+
+class WebClient(stravalib.Client):
     """
     An extension to the stravalib Client that fills in some of the gaps in
     the official API using web scraping.
@@ -592,9 +620,18 @@ class WebClient(ScrapingClient, stravalib.Client):
     Requires a JWT or both of email and password
     """
 
+    def __new__(cls, *args, **kwargs):
+        self = super().__new__(cls)
+
+        # Prepend __init__'s docstring with the parent classes one
+        cls.__init__.__doc__ = super().__init__.__doc__ + cls.__init__.__doc__
 
-# Inherit parent documentation for WebClient.__init__
-WebClient.__init__.__doc__ = stravalib.Client.__init__.__doc__ + \
+        # Delegate certain methods and properties to the scraper instance
+        for fcn in ("delete_activity", "get_bike_components", "get_activity_data", "jwt", "csrf"):
+            setattr(cls, fcn, cls._delegate(ScrapingClient, fcn))
+        return self
+
+    def __init__(self, *args, **kwargs):
         """
         :param email: The email of the account to log into
         :type email: str
@@ -604,6 +641,7 @@ class WebClient(ScrapingClient, stravalib.Client):
 
         :param jwt: The JWT of an existing session.
                     If not specified, email and password are required.
+                    Can be accessed from the `.jwt` property.
         :type jwt: str
 
         :param csrf: A dict of the form: `{<csrf-param>: <csrf-token>}`.
@@ -611,3 +649,23 @@ class WebClient(ScrapingClient, stravalib.Client):
                      Can be accessed from the `.csrf` property.
         :type csrf: dict
         """
+        sc_kwargs = {
+            k: kwargs.pop(k, None) for k in ("email", "password", "jwt", "csrf")
+        }
+        self._scraper = ScrapingClient(**sc_kwargs)
+        super().__init__(*args, **kwargs)
+
+    @staticmethod
+    def _delegate(cls, name):
+        func = getattr(cls, name)
+        is_prop = isinstance(func, property)
+
+        @functools.wraps(func)
+        def delegator(self, *args, **kwargs):
+            if is_prop:
+                return getattr(self._scraper, name)
+            return getattr(self._scraper, name)(*args, **kwargs)
+
+        if is_prop:
+            delegator = property(delegator)
+        return delegator

From 720958689598b2a7110d9fb4665e2944ee074a47 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Tue, 3 Nov 2020 22:59:45 -0500
Subject: [PATCH 05/23] Provide convenience functions for requesting data

---
 stravaweblib/webclient.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 0bf1509..0fefed7 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -5,6 +5,7 @@
 import enum
 import functools
 import json
+import logging
 import re
 import time
 import uuid
@@ -22,6 +23,7 @@
     "ActivityFile", "ScrapedActivity", "ScrapedPhoto"
 ]
 
+__log__ = logging.getLogger(__name__)
 
 BASE_URL = "https://www.strava.com"
 
@@ -193,8 +195,10 @@ def __init__(self, *args, **kwargs):
 
         if jwt:
             self._login_with_jwt(jwt)
+            __log__.info("Resumed session using JWT '%s'", jwt)
         elif email and password:
             self._login_with_password(email, password)
+            __log__.info("Logged in as '%s'", email)
         else:
             raise ValueError("'jwt' or both of 'email' and 'password' are required")
 
@@ -210,14 +214,29 @@ def csrf(self):
             self._csrf = self._get_csrf_token()
         return self._csrf
 
+    def request(self, method, service, *args, **kwargs):
+        """Request a URL from Strava
+
+        :service: The URL to send the request to without the base URL
+        """
+        return self._session.request(method, "https://www.strava.com/{}".format(service), *args, **kwargs)
+
+    def request_head(self, service, *args, **kwargs):
+        return self.request("HEAD", service, *args, **kwargs)
+
+    def request_get(self, service, *args, **kwargs):
+        return self.request("GET", service, *args, **kwargs)
+
+    def request_post(self, service, *args, **kwargs):
+        return self.request("POST", service, *args, **kwargs)
+
     def _get_csrf_token(self):
         """Get a CSRF token
 
         Uses the about page because it's small and doesn't redirect based
         on if the client is logged in or not.
         """
-        login_html = self._session.get("{}/about".format(BASE_URL)).text
-        soup = BeautifulSoup(login_html, 'html.parser')
+        soup = BeautifulSoup(self.request_get("about").text, 'html5lib')
 
         try:
             head = soup.head
@@ -253,8 +272,8 @@ def _login_with_jwt(self, jwt):
 
     def _login_with_password(self, email, password):
         """Log into the website using a username and password"""
-        resp = self._session.post(
-            "{}/session".format(BASE_URL),
+        resp = self.request_post(
+            "session",
             allow_redirects=False,
             data={
                 "email": email,
@@ -263,7 +282,7 @@ def _login_with_password(self, email, password):
                 **self.csrf
             }
         )
-        if not resp.is_redirect or resp.next.url == "{}/login".format(BASE_URL):
+        if not resp.is_redirect or resp.next.url.endswith("/login"):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
     def get_activity_photos(self, activity_id):

From 24eec45893c6c80b23b921fdc9385dad4e2a9190 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Tue, 3 Nov 2020 23:07:54 -0500
Subject: [PATCH 06/23] Ensure scraping and API are accessing the same account

---
 stravaweblib/webclient.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 0fefed7..32fc520 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -214,6 +214,10 @@ def csrf(self):
             self._csrf = self._get_csrf_token()
         return self._csrf
 
+    @property
+    def athlete_id(self):
+        return int(self._session.cookies.get('strava_remember_id'))
+
     def request(self, method, service, *args, **kwargs):
         """Request a URL from Strava
 
@@ -674,6 +678,9 @@ def __init__(self, *args, **kwargs):
         self._scraper = ScrapingClient(**sc_kwargs)
         super().__init__(*args, **kwargs)
 
+        if self._scraper.athlete_id != self.get_athlete().id:
+            raise ValueError("API and web credentials are for different accounts")
+
     @staticmethod
     def _delegate(cls, name):
         func = getattr(cls, name)

From f30882e039137d065b7c1a8e35d4add0444fba0d Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Tue, 3 Nov 2020 23:11:27 -0500
Subject: [PATCH 07/23] Change default fallback for JSON activity downloads

The default used to be to just download the JSON blob. It was changed to
request the GPX format instead since this is a more standardized format
for an activity.
---
 stravaweblib/webclient.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 32fc520..3e2cf44 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -483,19 +483,26 @@ def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL, json_fmt=None)
 
         :param json_fmt: The backup format to request in the event that the
                          `fmt` was DataFormat.ORIGINAL and the request returned
-                         a JSON blob (happens for uploads from mobile apps).
-                         Using `None` (default) will cause the JSON blob to be
-                         returned.
-        :type json_fmt: :class:`DataFormat` or None
+                         a JSON blob (happens for uploads from older mobile apps).
+                         Using `DataFormat.ORIGINAL` will cause the JSON blob to
+                         be returned.
+                         (defaults to DataFormat.GPX)
+        :type json_fmt: :class:`DataFormat`
 
         :return: A namedtuple with `filename` and `content` attributes:
                  - `filename` is the filename that Strava suggests for the file
                  - `contents` is an iterator that yields file contents as bytes
         :rtype: :class:`ExportFile`
         """
-        fmt = DataFormat.classify(fmt)
-        url = "{}/activities/{}/export_{}".format(BASE_URL, activity_id, fmt)
-        resp = self._session.get(url, stream=True, allow_redirects=False)
+        __log__.debug("Getting data (in %s format) for activity %s", fmt, activity_id)
+
+        fmt = DataFormat(fmt)
+        json_fmt = DataFormat(json_fmt)
+        resp = self.request_get(
+            "activities/{}/export_{}".format(activity_id, fmt),
+            stream=True,
+            allow_redirects=False
+        )
 
         # Gives a 302 back to the activity URL when trying to export a manual activity
         # TODO: Does this also happen with other errors?
@@ -504,13 +511,12 @@ def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL, json_fmt=None)
                                       "to download an activity"
                                       "".format(resp.status_code))
 
-        # In the case of downloading JSON, the Content-Type header will
-        # correctly be set to 'application/json'
-        if (json_fmt and fmt == DataFormat.ORIGINAL and
+        # When downloading JSON, the Content-Type header will set to 'application/json'
+        # If the json_fmt is not DataFormat.ORIGINAL, try the download again asking
+        # for the json_fmt.
+        if (fmt == DataFormat.ORIGINAL and json_fmt != fmt and
                 resp.headers['Content-Type'].lower() == 'application/json'):
-            if json_fmt == DataFormat.ORIGINAL.value:
-                raise ValueError("`json_fmt` parameter cannot be DataFormat.ORIGINAL")
-            return self.get_activity_data(activity_id, fmt=json_fmt)
+            return self.get_activity_data(activity_id, fmt=json_fmt, json_fmt=DataFormat.ORIGINAL)
 
         return self._make_export_file(resp, activity_id)
 

From 26334b370418b915d14d47e716ddab3dd0234b7e Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 10:15:36 -0500
Subject: [PATCH 08/23] Increase compatibility for get_activity_photos

Now accepts (but ignores) parameters that the `stravalib` version accepts
---
 stravaweblib/webclient.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 3e2cf44..ad63de6 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -289,10 +289,12 @@ def _login_with_password(self, email, password):
         if not resp.is_redirect or resp.next.url.endswith("/login"):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
-    def get_activity_photos(self, activity_id):
+    def get_activity_photos(self, activity_id, size=None, only_instagram=None):
         """A scraping-based alternative to stravalib.Client.get_activity_photos
 
         :param activity_id: The activity for which to fetch photos.
+        :param size: [unused] (for compatbility with stravalib)
+        :param only_instagram: [unused] (for compatibility with stravalib)
 
         :return: A list of ScrapedPhoto objects
         """

From dac6cc3027102b58217ba0e849558d3e8f6b0b58 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 10:21:27 -0500
Subject: [PATCH 09/23] Improve get_activites function

 - Make pagination actually work (forgot to increment page number)
 - Handle stopping based on the `before` param
 - Properly handle workout types
---
 stravaweblib/webclient.py | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index ad63de6..f3c2529 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -344,6 +344,7 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
         :yield: ScrapedActivity objects
         """
 
+        __log__.debug("Getting activities")
         if activity_type is not None and activity_type not in Activity.TYPES:
             raise ValueError(
                 "Invalid activity type. Must be one of: {}".format(",".join(Activity.TYPES))
@@ -376,11 +377,11 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
         conv_bool = lambda x: "" if not x else "true"
 
         while True:
-            resp = self._session.get(
-                "{}/athlete/training_activities".format(BASE_URL),
+            __log__.debug("Getting page %s of activities", page)
+            resp = self.request_get(
+                "athlete/training_activities",
                 headers= {
                     "Accept": "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript",
-                    #"X-CSRF-Token": next(iter(self.csrf.values())),
                     "X-Requested-With": "XMLHttpRequest",
                 },
                 params={
@@ -394,6 +395,7 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
                     "private_activities": conv_bool(is_private),
                     "trainer": conv_bool(indoor),
                     "gear": gear_id or "",
+                    "order": "start_date_local DESC" # Return in reverse-chronological order
                 }
             )
             if resp.status_code != 200:
@@ -403,9 +405,11 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
             try:
                 data = resp.json()["models"]
             except (ValueError, TypeError, KeyError) as e:
-                raise ScrapingError(
-                    "Invalid JSON response from Strava"
-                ) from e
+                raise ScrapingError("Invalid JSON response from Strava") from e
+
+            # No results = done
+            if not data:
+                return
 
             for activity in data:
                 # Respect the limit
@@ -415,13 +419,20 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
                 activity = ScrapedActivity(bind_client=self, **activity)
 
                 # Respect the before and after filters
-                if after < activity.start_date.timestamp() < before:
-                    yield activity
-                    num_yielded += 1
+                # Will see activities from neweset to oldest so can do less
+                # work to limit by time
+                ts = activity.start_date.timestamp()
+                if ts < after:
+                    # Activity is too new, no more results
+                    return
+                elif ts > before:
+                    # Activity is too old, don't yield it
+                    continue
 
-            # No results = done
-            if not data:
-                return
+                yield activity
+                num_yielded += 1
+
+            page += 1
 
     def delete_activity(self, activity_id):
         """

From 3cb2f48eb1631cace3668725de8a325ca888ddab Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 10:30:31 -0500
Subject: [PATCH 10/23] Refactor `delete_activity` to use `request_post`

---
 stravaweblib/webclient.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index f3c2529..965763d 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -441,8 +441,9 @@ def delete_activity(self, activity_id):
         :param activity_id: The activity to delete.
         :type activity_id: int
         """
-        resp = self._session.post(
-            "{}/activities/{}".format(BASE_URL, activity_id),
+        __log__.debug("Deleting activity %s", activity_id)
+        resp = self.request_post(
+            "activities/{}".format(activity_id),
             allow_redirects=False,
             data={
                 "_method": "delete",
@@ -450,7 +451,7 @@ def delete_activity(self, activity_id):
             }
         )
 
-        if not resp.is_redirect or resp.next.url != "{}/athlete/training".format(BASE_URL):
+        if not resp.is_redirect or not resp.next.url.endswith("/athlete/training"):
             raise stravalib.exc.Fault(
                 "Failed to delete activity (status code: {})".format(resp.status_code),
             )

From 0e2edb1146d60c2857e6ee5b04bd509849da8c93 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 11:27:17 -0500
Subject: [PATCH 11/23] WIP

 - Move models to a separate file
 - Add more detailed scraping of activity details
 - Add more detailed scraping of bike data
---
 stravaweblib/model.py     | 319 ++++++++++++++++++++++++++++++++++++++
 stravaweblib/webclient.py | 298 +++++++++++++----------------------
 2 files changed, 428 insertions(+), 189 deletions(-)
 create mode 100644 stravaweblib/model.py

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
new file mode 100644
index 0000000..47a37af
--- /dev/null
+++ b/stravaweblib/model.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+
+import enum
+from datetime import date, datetime
+
+from stravalib.attributes import (Attribute, DateAttribute, TimestampAttribute,
+                                  TimeIntervalAttribute, LocationAttribute)
+from stravalib.model import (BaseEntity, BoundEntity, LoadableEntity,
+                             Bike as _Bike)
+from stravalib import unithelper as uh
+
+
+def _parse_component_date(date_str):
+    if not date_str:
+        return None
+    if date_str.lower() == "since beginning":
+        # Different from no date, but don't know exactly when it was
+        return datetime.utcfromtimestamp(0).date()
+    try:
+        return datetime.strptime(date_str, "%b %d, %Y").date()
+    except ValueError:
+        return None
+
+
+def _dict_modify(d, prev, target, overwrite=True, default=None, fcn=None):
+    """Translate the prev key to target
+
+    Only non-None values will be set
+
+    if overwrite is true, the target key will be overwritten even if something truthy is already there
+    default controls if anything should be used if the prev key is not available
+    l is a lambda function that the value will be passed through before being set.
+    """
+    if not overwrite and d.get(target):
+        return
+
+    t = d.pop(prev, default)
+    if t is None:
+        return
+    if fcn:
+        t = fcn(t)
+    if t is None:
+        return
+    d[target] = t
+
+
+class DataFormat(enum.Enum):
+    ORIGINAL = "original"
+    GPX = "gpx"
+    TCX = "tcx"
+
+    def __str__(self):
+        return str(self.value)
+
+
+class FrameType(enum.Enum):
+    MOUNTAIN_BIKE = 1
+    CROSS_BIKE = 2
+    ROAD_BIKE = 3
+    TIME_TRIAL_BIKE = 4
+
+    def __str__(self):
+        return str(self.name).replace("_", " ").title()
+
+    @classmethod
+    def from_str(cls, s):
+        if isinstance(s, cls):
+            return s
+        return cls[s.replace(" ", "_").upper().replace("TT_", "TIME_TRIAL_")]
+
+
+class ExpandableEntity(LoadableEntity):
+    """Allows for an object to be "expanded" on demand"""
+
+    _expanded = False
+    _expandable = set()
+
+    def __getattribute__(self, k):
+        if k != "_expandable" and k in self._expandable and not self._expanded:
+            self.expand()
+        return super().__getattribute__(k)
+
+    def _do_expand(self, d, overwrite=True):
+        if self._expanded:
+            return
+
+        if overwrite:
+            self.from_dict(d)
+            self._expanded = True
+            return
+
+        # Only set non-null attributes
+        # Mark as expanded before doing the expansion so __getatttribute__
+        # doesn't cause infinte recursion
+        try:
+            self._expanded = True
+            self.from_dict({
+                k: v for k, v in d.items()
+                if not getattr(self, k, None)
+            })
+        except Exception:
+            self._expanded = False
+            raise
+
+    def expand(self):
+        # Needs to call self._do_expand with some data
+        raise NotImplementedError()
+
+
+class ScrapedGear(BaseEntity):
+    """Represents gear scraped from Strava
+
+    The attributes are compatible with stravalib.model.Gear where they exist
+    """
+    id = Attribute(str)
+    name = Attribute(str)
+    distance = Attribute(float, units=uh.meters)
+    primary = Attribute(bool)
+    brand_name = Attribute(str)
+    model_name = Attribute(str)
+    description = Attribute(str)
+
+    def from_dict(self, d):
+        _dict_modify(d, "display_name", "name", overwrite=False)
+        _dict_modify(d, "default", "primary", overwrite=False)
+        _dict_modify(d, "total_distance", "distance", overwrite=False,
+                     fcn=lambda x: float(x.replace(",", "")) * 1000)
+
+        return super().from_dict(d)
+
+    def __repr__(self):
+        return "<{} id={} name={!r}>".format(
+            self.__class__.__name__,
+            self.id,
+            self.name
+        )
+
+
+class ScrapedShoe(ScrapedGear):
+    """Represents a pair of shoes scraped from Strava
+
+    The attributes are compatible with stravalib.model.Shoe where they exist
+    """
+    pass
+
+
+class ScrapedBikeComponent(BaseEntity):
+    """Represents a bike component scraped from Strava"""
+
+    id = Attribute(int)
+    type = Attribute(str)
+    brand_name = Attribute(str)
+    model_name = Attribute(str)
+    added = DateAttribute()
+    removed = DateAttribute()
+    distance = Attribute(int, units=uh.meters)
+
+    def from_dict(self, d):
+        # Parse and convert dates into something DateAttribute can understand
+        _dict_modify(d, "added", "added", fcn=_parse_component_date)
+        _dict_modify(d, "removed", "removed", fcn=_parse_component_date)
+
+        return super().from_dict(d)
+
+    def __repr__(self):
+        return "<{} id={} type={!r}>".format(
+            self.__class__.__name__,
+            self.id,
+            self.type
+        )
+
+
+class _BikeData(ExpandableEntity):
+    """Mixin class to add weight and components to a Bike"""
+    frame_type = Attribute(FrameType)
+    components = Attribute(list)
+    weight = Attribute(float, units=uh.kg)
+
+    _expandable = {"weight", "components"}
+
+    def expand(self):
+        """Expand the bike with more details using scraping"""
+        self.assert_bind_client()
+        self._do_expand(self.bind_client.get_bike_details(self.id))
+
+    def components_on_date(self, on_date):
+        """Get bike components installed on the specified date
+
+        :type on_date: None or datetime.date or datetime.datetime
+                       (datetimes will lose time-precision)
+        """
+        if on_date is None:
+            return self.components
+
+        if isinstance(on_date, datetime):
+            on_date = on_date.date()
+
+        return [
+            c for c in self.components
+            if (c.added or date.min) <= on_date <= (c.removed or date.max)
+        ]
+
+    def from_dict(self, d):
+        # Upgrade the frame_type to the enum
+        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType(x))
+        return super().from_dict(d)
+
+
+class Bike(_BikeData, _Bike) :
+    __doc__ = _Bike.__doc__ + """
+    Scraping adds weight and components attributes
+    """
+
+
+class ScrapedBike(ScrapedGear, _BikeData):
+    """Represents a bike scraped from Strava
+
+    The attributes are compatible with stravalib.models.Bike where they exist.
+    """
+
+    _expandable = {'frame_type', 'brand_name', 'model_name'}
+
+    def from_dict(self, d):
+        # Upgrade the scraped frame_type string to the enum
+        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType.from_str(x))
+        return super().from_dict(d)
+
+
+class ScrapedActivityPhoto(BaseEntity):
+    """Represents a photo scraped from Strava's activity details page
+
+    The attributes are compatible with stravalib.models.ActivityPhoto where
+    they exist.
+    """
+
+    unique_id = Attribute(str)
+    activity_id = Attribute(int)
+    athlete_id = Attribute(int)
+    caption = Attribute(str)
+
+    location = LocationAttribute()
+
+    urls = Attribute(dict) # dimension: url
+
+    def from_dict(self, d):
+        _dict_modify(d, "photo_id", "unique_id")
+        _dict_modify(d, "owner_id", "athlete_id")
+
+        # The caption has unicode escapes (ie. \uFFFF) embedded in the string
+        _dict_modify(d, "caption_escaped", "caption", fcn=lambda x: x.encode("utf-8").decode("unicode_escape"))
+
+        if "dimensions" in d:
+            d["urls"] = {
+                str(min(dim.values())): d.pop(name)
+                for name, dim in d.pop("dimensions").items()
+            }
+        if "lat" in d and "lng" in d:
+            d["location"] = [d.pop("lat"), d.pop("lng")]
+
+        return super().from_dict(d)
+
+
+class ScrapedActivity(ExpandableEntity):
+    """
+    Represents an Activity (ride, run, etc.) that was scraped from the website
+
+    The attributes are compatible with stravalib.model.Activity where they exist
+    """
+
+    name = Attribute(str)
+    description = Attribute(str)
+    type = Attribute(str)
+    workout_type = Attribute(str)
+
+    start_date = TimestampAttribute()
+    distance = Attribute(float)
+    moving_time = TimeIntervalAttribute()
+    elapsed_time = TimeIntervalAttribute()
+    total_elevation_gain = Attribute(float)
+    suffer_score = Attribute(int)
+    calories = Attribute(float)
+    gear_id = Attribute(str)
+
+    # True if the activity has GPS coordinates
+    # False for trainers, manual activities, etc
+    has_latlng = Attribute(bool)
+
+    trainer = Attribute(bool)
+    commute = Attribute(bool)
+    private = Attribute(bool)
+    flagged = Attribute(bool)
+
+    manual = Attribute(bool)
+    photos = Attribute(list) # list of ScrapedActivityPhoto objects
+    device_name = Attribute(str)
+
+    _expandable = {"photos", "manual", "device_name"}
+
+    def expand(self):
+        """Expand the activity with more details using scraping"""
+        self.assert_bind_client()
+        self._do_expand(self.bind_client.get_extra_activity_details(self.id), overwrite=False)
+
+    @property
+    def total_photo_count(self):
+        return len(self.photos)
+
+    def from_dict(self, d):
+        # Only 1 of these will set the gear_id
+        _dict_modify(d, "bike_id", "gear_id", fcn=lambda x: "b{}".format(x))
+        _dict_modify(d, "athlete_gear_id", "gear_id", fcn=lambda x: "g{}".format(x))
+
+        _dict_modify(d, "start_time", "start_date")
+        _dict_modify(d, "distance_raw", "distance")
+        _dict_modify(d, "moving_time_raw", "moving_time")
+        _dict_modify(d, "elapsed_time_raw", "elapsed_time")
+        _dict_modify(d, "elevation_gain_raw", "elevation_gain")
+
+        return super().from_dict(d)
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 965763d..48d4393 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -1,8 +1,8 @@
+#!/usr/bin/env python
 from base64 import b64decode
 import cgi
 from collections import namedtuple
-from datetime import date, datetime
-import enum
+from datetime import datetime
 import functools
 import json
 import logging
@@ -13,19 +13,14 @@
 from bs4 import BeautifulSoup
 import requests
 import stravalib
-from stravalib.attributes import (Attribute, TimestampAttribute,
-                                  TimeIntervalAttribute, LocationAttribute)
-from stravalib.model import Activity, BaseEntity, BoundEntity
+from stravalib.model import Activity, Bike as _Bike
+from stravaweblib.model import (DataFormat, ScrapedShoe, Bike, ScrapedBike,
+                                ScrapedBikeComponent, ScrapedActivity,
+                                ScrapedActivityPhoto, ScrapedAthlete)
 
 
-__all__ = [
-    "WebClient", "ScrapingClient", "FrameType", "DataFormat", "ExportFile",
-    "ActivityFile", "ScrapedActivity", "ScrapedPhoto"
-]
-
 __log__ = logging.getLogger(__name__)
 
-BASE_URL = "https://www.strava.com"
 
 # Used for filtering when scraping the activity list
 ACTIVITY_WORKOUT_TYPES = {
@@ -33,7 +28,11 @@
     "Run": {None: 0, "Race": 1, "Long Run": 2, "Workout": 3}
 }
 
+# Regexes for pulling information out of the activity details page
 PHOTOS_REGEX = re.compile(r"var photosJson\s*=\s*(\[.*\]);")
+PAGE_VIEW_REGEX = re.compile(r"pageView\s*=\s*new\s+Strava.Labs.Activities.Pages.(\S+)PageView\([\"']?\d+[\"']?,\s*[\"']([^\"']+)")
+
+NON_NUMBERS = re.compile(r'[^\d\.]')
 
 ExportFile = namedtuple("ExportFile", ("filename", "content"))
 ActivityFile = ExportFile  # TODO: deprecate and remove
@@ -46,132 +45,6 @@ class ScrapingError(ValueError):
     """
 
 
-class ScrapedPhoto(BaseEntity):
-    """Represents a photo scraped from Strava's activity details page
-
-    The attributes are compatible with stravalib.models.ActivityPhoto where
-    they exist.
-    """
-
-    unique_id = Attribute(str)
-    activity_id = Attribute(int)
-    athlete_id = Attribute(int)
-    caption = Attribute(str)
-
-    location = LocationAttribute()
-
-    urls = Attribute(dict) # dimension: url
-
-    def from_dict(self, d):
-        d["unique_id"] = d.pop("photo_id")
-        d["athlete_id"] = d.pop("owner_id")
-
-        # The caption has unicode escapes (ie. \uFFFF) embedded in the string
-        d["caption"] = d.pop("caption_escaped", "").encode("utf-8").decode("unicode_escape")
-        d["urls"] = {
-            str(min(dim.values())): d.pop(name)
-            for name, dim in d.pop("dimensions").items()
-        }
-        d["location"] = [d.pop("lat"), d.pop("lng")]
-
-        return super().from_dict(d)
-
-
-class ScrapedActivity(BoundEntity):
-    """
-    Represents an Activity (ride, run, etc.) that was scraped from the website
-
-    The attributes are compatible with stravalib.model.Activity where they exist
-    """
-
-    id = Attribute(int)
-    name = Attribute(str)
-    description = Attribute(str)
-    type = Attribute(str)
-    workout_type = Attribute(str)
-
-    start_date = TimestampAttribute()
-    distance = Attribute(float)
-    moving_time = TimeIntervalAttribute()
-    elapsed_time = TimeIntervalAttribute()
-    total_elevation_gain = Attribute(float)
-    suffer_score = Attribute(int)
-    calories = Attribute(float)
-    gear_id = Attribute(str)
-
-    # True if the activity has GPS coordinates
-    # False for trainers, manual activities, etc
-    has_latlng = Attribute(bool)
-
-    trainer = Attribute(bool)
-    commute = Attribute(bool)
-    private = Attribute(bool)
-    flagged = Attribute(bool)
-
-    _photos = None
-
-    @property
-    def photos(self):
-        """Returns a list of ScrapedPhoto objects"""
-        if self._photos is None:
-            self.assert_bind_client()
-            self._photos = self.bind_client.get_activity_photos(self.id)
-        return self._photos
-
-    @property
-    def total_photo_count(self):
-        return len(self.photos)
-
-    def from_dict(self, d):
-        bike_id = d.pop("bike_id", None)
-        shoes_id = d.pop("athlete_gear_id", None)
-        if bike_id:
-            d["gear_id"] = "b{}".format(bike_id)
-        elif shoes_id:
-            d["gear_id"] = "g{}".format(shoes_id)
-
-        d["start_date"] = d.pop("start_time")
-        d["distance"] = d.pop("distance_raw")
-        d["moving_time"] = d.pop("moving_time_raw")
-        d["elapsed_time"] = d.pop("elapsed_time_raw")
-        d["total_elevation_gain"] = d.pop("elevation_gain_raw")
-
-        wt = d.pop("workout_type")
-        if d["type"] in ACTIVITY_WORKOUT_TYPES:
-            for k, v in ACTIVITY_WORKOUT_TYPES[d["type"]].items():
-                if wt == v:
-                    d["workout_type"] = k
-                    break
-
-        return super().from_dict(d)
-
-
-class DataFormat(enum.Enum):
-    ORIGINAL = "original"
-    GPX = "gpx"
-    TCX = "tcx"
-
-    def __str__(self):
-        return str(self.value)
-
-    @classmethod
-    def classify(cls, value):
-        for x in cls:
-            if x.value == str(value):
-                return x
-        raise ValueError("Invalid format '{}'".format(value))
-
-
-class FrameType(enum.Enum):
-    MOUNTAIN_BIKE = 1
-    CROSS_BIKE = 2
-    ROAD_BIKE = 3
-    TIME_TRIAL_BIKE = 4
-
-    def __str__(self):
-        return str(self.name).replace("_", " ").title()
-
-
 class ScrapingClient:
     """
     A client that uses web scraping to interface with Strava.
@@ -289,6 +162,55 @@ def _login_with_password(self, email, password):
         if not resp.is_redirect or resp.next.url.endswith("/login"):
             raise stravalib.exc.LoginFailed("Couldn't log in to website, check creds")
 
+    def get_extra_activity_details(self, activity_id):
+        """Scapes the full activity page for various details
+
+        Returns a dict of the properties
+        """
+        __log__.debug("Getting extra information for activity %s", activity_id)
+        resp = self.request_get("activities/{}".format(activity_id))
+        if not resp.ok:
+            raise stravalib.exc.Fault("Failed to load activity page to get details")
+
+        ret = {}
+
+        soup = BeautifulSoup(resp.text, 'html5lib')
+
+        summary = soup.find("div", class_="activity-summary-container")
+        if summary:
+            name = summary.find("h1", class_="activity-name")
+            if name:
+                ret["name"] = name.text.strip()
+            description = summary.find("div", class_="activity-description")
+            if description:
+                ret["description"] = description.text.strip()
+            device = summary.find("div", class_="device")
+            if device:
+                ret["device_name"] = device.text.strip()
+
+        for script in soup.find_all("script"):
+            if "var pageView;" in script.text:
+                m = PAGE_VIEW_REGEX.search(script.text)
+                if not m:
+                    __log__.error("Failed to extract manual and type data from page")
+                    continue
+                ret["manual"] = m.group(1).lower() == "manual"
+                ret["type"] = m.group(2)
+
+            elif "var photosJson" in script.text:
+                m = PHOTOS_REGEX.search(script.text)
+                if not m:
+                    __log__.error("Failed to extract photo data from page")
+                    continue
+                try:
+                    photos = json.loads(m.group(1))
+                except (TypeError, ValueError) as e:
+                    __log__.error("Failed to parse extracted photo data", exc_info=True)
+                    continue
+                ret["photos"] = [ScrapedActivityPhoto(**p) for p in photos]
+
+        return ret
+
     def get_activity_photos(self, activity_id, size=None, only_instagram=None):
         """A scraping-based alternative to stravalib.Client.get_activity_photos
 
@@ -296,27 +218,9 @@ def get_activity_photos(self, activity_id, size=None, only_instagram=None):
         :param size: [unused] (for compatbility with stravalib)
         :param only_instagram: [unused] (for compatibility with stravalib)
 
-        :return: A list of ScrapedPhoto objects
+        :return: A list of ScrapedActivityPhoto objects
         """
-        resp = self._session.get("{}/activities/{}".format(BASE_URL, activity_id))
-        resp.raise_for_status()
-
-        soup = BeautifulSoup(resp.content, 'html5lib')
-        try:
-            script = next((x for x in soup.find_all("script") if "var photosJson" in x.text))
-        except StopIteration:
-            raise ScrapingError("Failed to find photo data in page")
-
-        m = PHOTOS_REGEX.search(script.text)
-        if not m:
-            raise ScrapingError("Failed to extract photo data from page")
-
-        try:
-            photos = json.loads(m.group(1))
-        except (TypeError, ValueError) as e:
-            raise ScrapingError("Failed to parse extracted photo data") from e
-
-        return [ScrapedPhoto(**p) for p in photos]
+        return self.get_extra_activity_details(activity_id).get("photos", None)
 
     def get_activities(self, keywords=None, activity_type=None, workout_type=None,
                        commute=False, is_private=False, indoor=False, gear_id=None,
@@ -416,6 +320,14 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
                 if limit is not None and num_yielded >= limit:
                     return
 
+                # Translate workout types from ints back to strings
+                wt = activity.pop("workout_type")
+                if activity["type"] in ACTIVITY_WORKOUT_TYPES:
+                    for k, v in ACTIVITY_WORKOUT_TYPES[activity["type"]].items():
+                        if wt == v:
+                            activity["workout_type"] = k
+                            break
+
                 activity = ScrapedActivity(bind_client=self, **activity)
 
                 # Respect the before and after filters
@@ -534,49 +446,57 @@ def get_activity_data(self, activity_id, fmt=DataFormat.ORIGINAL, json_fmt=None)
 
         return self._make_export_file(resp, activity_id)
 
-    @staticmethod
-    def _parse_date(date_str):
-        if not date_str:
-            return None
-        if date_str.lower() == "since beginning":
-            # Different from no date, but don't know exactly when it was
-            return datetime.utcfromtimestamp(0).date()
-        try:
-            return datetime.strptime(date_str, "%b %d, %Y").date()
-        except ValueError:
-            return None
-
-    def _get_all_bike_components(self, bike_id):
+    def get_bike_details(self, bike_id):
         """
-        Get all components for the specified bike
+        Scrape the details of the specified bike
 
         :param bike_id: The id of the bike to retreive components for
                         (must start with a "b")
         :type bike_id: str
         """
+        __log__.debug("Getting bike details for bike %s", bike_id)
         if not bike_id.startswith('b'):
             raise ValueError("Invalid bike id (must start with 'b')")
 
-        # chop off the leading "b"
-        url = "{}/bikes/{}".format(BASE_URL, bike_id[1:])
-
-        resp = self._session.get(url, allow_redirects=False)
+        resp = self.request_get(
+            "bikes/{}".format(bike_id[1:]),  # chop off the leading "b"
+            allow_redirects=False
+        )
         if resp.status_code != 200:
             raise stravalib.exc.Fault(
                 "Failed to load bike details page (status code: {})".format(resp.status_code),
             )
 
         soup = BeautifulSoup(resp.text, 'html5lib')
+
+        ret = {}
+
+        # Get data about the bike
+        gear_table = soup.find("div", class_="gear-details").find("table")
+        for k, v in zip(
+                ["frame_type", "brand_name", "model_name", "weight"],
+                [x.text for x in gear_table.find_all("td")][1::2]
+        ):
+            if not k:
+                continue
+            if k == "weight":
+                # Strip non-number chars ("kg")
+                # TODO: other units?
+                v = float(NON_NUMBERS.sub('', v))
+            ret[k.lower()] = v
+
+        # Get component data
         table = None
         for t in soup.find_all('table'):
             if t.find('thead'):
                 table = t
                 break
+        else:
+            raise ScrapingError(
+                "Bike component table not found in the HTML - layout update?"
+            )
 
-        if not table:
-            raise ScrapingError("Bike component table not found in the HTML - layout update?")
-
-        components = []
+        ret["components"] = []
         for row in table.tbody.find_all('tr'):
             cells = row.find_all('td')
             text = [cell.text.strip() for cell in cells]
@@ -591,16 +511,16 @@ def _get_all_bike_components(self, bike_id):
 
             component_id = cells[6].find('a', text="Delete")['href'].rsplit("/", 1)[-1]
 
-            components.append({
-                'id': component_id,
-                'type': text[0],
-                'brand': text[1],
-                'model': text[2],
-                'added': self._parse_date(text[3]),
-                'removed': self._parse_date(text[4]),
-                'distance': distance
-            })
-        return components
+            ret["components"].append(ScrapedBikeComponent(
+                id=component_id,
+                type=text[0],
+                brand_name=text[1],
+                model_name=text[2],
+                added=text[3],
+                removed=text[4],
+                distance=distance
+            ))
+        return ret
 
     def get_bike_components(self, bike_id, on_date=None):
         """

From 99d0b775daeee30f88682ce211c95d169fd8d82d Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Tue, 10 Nov 2020 23:03:07 -0500
Subject: [PATCH 12/23] Pull unicode_escapes out

---
 stravaweblib/model.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 47a37af..6cab1c0 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -21,6 +21,10 @@ def _parse_component_date(date_str):
     except ValueError:
         return None
 
+def _decode_unicode_escapes(s):
+    """Decodes unicode escapes (\xFFFF) enbeddded in a string"""
+    return s.encode("utf-8").decode("unicode_escape")
+
 
 def _dict_modify(d, prev, target, overwrite=True, default=None, fcn=None):
     """Translate the prev key to target
@@ -247,7 +251,7 @@ def from_dict(self, d):
         _dict_modify(d, "owner_id", "athlete_id")
 
         # The caption has unicode escapes (ie. \uFFFF) embedded in the string
-        _dict_modify(d, "caption_escaped", "caption", fcn=lambda x: x.encode("utf-8").decode("unicode_escape"))
+        _dict_modify(d, "caption_escaped", "caption", fcn=_decode_unicode_escapes)
 
         if "dimensions" in d:
             d["urls"] = {

From 2ae1c3d69733f7127fe114d2f6abb6f77a8eb659 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 11:34:16 -0500
Subject: [PATCH 13/23] Implement a replacement for `get_activity`

---
 stravaweblib/webclient.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 48d4393..d13cbb2 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -346,6 +346,19 @@ def get_activities(self, keywords=None, activity_type=None, workout_type=None,
 
             page += 1
 
+    def get_activity(self, activity_id):
+        """A scraping-based alternative to stravalib.Client.get_activity
+
+        Note that this actually performs a search for the activity using
+        `get_activities` to get most of the information. Generally, it would be
+        more efficient to use `get_activities` to find the activities directly.
+        """
+        d = self.get_extra_activity_details(activity_id)
+        for x in self.get_activities(keywords=d["name"], activity_type=d["type"]):
+            if x.id == activity_id:
+                x._do_expand(d, overwrite=False)
+                return x
+
     def delete_activity(self, activity_id):
         """
         Deletes the specified activity.

From 030b3a3572e631891f9d96e592d826a2dc931c2e Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 11:32:27 -0500
Subject: [PATCH 14/23] Add scraped components to Bikes returned from get_gear

Replaces `get_bike_components`
---
 setup.py                  |  2 +-
 stravaweblib/webclient.py | 45 ++++++++++++++-------------------------
 2 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/setup.py b/setup.py
index 81c1798..9104b99 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@
     packages=["stravaweblib"],
     python_requires=">=3.4.0",
     install_requires=[
-        "stravalib>=0.6.6,<1.0.0",
+        "stravalib>=0.10.4,<1.0.0",
         "beautifulsoup4>=4.6.0,<5.0.0",
     ],
 )
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index d13cbb2..d594046 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -535,29 +535,6 @@ def get_bike_details(self, bike_id):
             ))
         return ret
 
-    def get_bike_components(self, bike_id, on_date=None):
-        """
-        Get components for the specified bike
-
-        :param bike_id: The id of the bike to retreive components for
-                        (must start with a "b")
-        :type bike_id: str
-
-        :param on_date: Only return components on the bike for this day. If
-                        `None`, return all components regardless of date.
-        :type on_date: None or datetime.date or datetime.datetime
-        """
-        components = self._get_all_bike_components(bike_id)
-
-        # Filter by the on_date param
-        if on_date:
-            if isinstance(on_date, datetime):
-                on_date = on_date.date()
-            return [c for c in components if \
-                    (c['added'] or date.min) <= on_date <= (c['removed'] or date.max)]
-        else:
-            return components
-
     def get_route_data(self, route_id, fmt=DataFormat.GPX):
         """
         Get a file containing the provided route's data
@@ -596,14 +573,15 @@ class WebClient(stravalib.Client):
     Requires a JWT or both of email and password
     """
 
-    def __new__(cls, *args, **kwargs):
+    def __new__(cls, *_, **__):
         self = super().__new__(cls)
 
-        # Prepend __init__'s docstring with the parent classes one
-        cls.__init__.__doc__ = super().__init__.__doc__ + cls.__init__.__doc__
+        # Prepend some docstrings with the parent classes one
+        for fcn in ("__init__", "get_gear"):
+            getattr(cls, fcn).__doc__ = getattr(super(), fcn).__doc__ + getattr(cls, fcn).__doc__
 
         # Delegate certain methods and properties to the scraper instance
-        for fcn in ("delete_activity", "get_bike_components", "get_activity_data", "jwt", "csrf"):
+        for fcn in ("delete_activity", "get_activity_data", "jwt", "csrf"):
             setattr(cls, fcn, cls._delegate(ScrapingClient, fcn))
         return self
 
@@ -634,9 +612,18 @@ def __init__(self, *args, **kwargs):
         if self._scraper.athlete_id != self.get_athlete().id:
             raise ValueError("API and web credentials are for different accounts")
 
+    def get_gear(self, gear_id):
+        """
+        Returned Bikes will have scraped attributes lazily added
+        """
+        gear = super().get_gear(gear_id)
+        if isinstance(gear, _Bike):
+            return Bike(bind_client=self._scraper, **gear.to_dict())
+        return gear
+
     @staticmethod
-    def _delegate(cls, name):
-        func = getattr(cls, name)
+    def _delegate(clazz, name):
+        func = getattr(clazz, name)
         is_prop = isinstance(func, property)
 
         @functools.wraps(func)

From c13b6bded16bcab9941a1b4c2eb5c9e038500c60 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 11:55:02 -0500
Subject: [PATCH 15/23] Implement a scraping-based `get_gear` function

---
 stravaweblib/webclient.py | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index d594046..68d3dbe 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -563,6 +563,50 @@ def get_route_data(self, route_id, fmt=DataFormat.GPX):
 
         return self._make_export_file(resp, route_id)
 
+    def get_all_bikes(self):
+        """Scrape all bike information from Strava
+
+        :yield: `ScrapedBike` objects
+        """
+        __log__.debug("Getting all bike data")
+        resp = self.request_get("athletes/{}/gear/bikes".format(self.athlete_id))
+        if not resp.ok:
+            raise stravalib.exc.Fault("Failed to get list of bikes")
+        try:
+            yield from (
+                ScrapedBike(
+                    bind_client=self,
+                    id="b{}".format(b.pop("id")),  # add "b" to gear id
+                    **b
+                )
+                for b in resp.json()
+            )
+        except (TypeError, ValueError) as e:
+            raise ScrapingError("Failed to parse bike data") from e
+
+    def get_all_shoes(self):
+        """Scrape all shoe information from Strava
+
+        :yield: `ScrapedShoe` objects
+        """
+        __log__.debug("Getting all shoe data")
+        resp = self.request_get("athletes/{}/gear/shoes".format(self.athlete_id))
+        if not resp.ok:
+            raise stravalib.exc.Fault("Failed to get list of shoes")
+        try:
+            yield from (ScrapedShoe(**s) for s in resp.json())
+        except (TypeError, ValueError) as e:
+            raise ScrapingError("Failed to parse shoe data") from e
+
+    def get_gear(self, gear_id):
+        """A scraping-based replacement for `stravalib.Client.get_gear`"""
+        try:
+            if gear_id.startswith("b"):
+                return next(x for x in self.get_all_bikes() if x.id == gear_id)
+            else:
+                return next(x for x in self.get_all_shoes() if x.id == gear_id)
+        except StopIteration:
+            raise KeyError("No gear with id '{}' found".format(gear_id))
 
 
 class WebClient(stravalib.Client):

From 4eba61aa5fede95a2eb92222ddfd9365499925ab Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 22:26:23 -0500
Subject: [PATCH 16/23] Use EntityCollection type for lists of entities

---
 stravaweblib/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 6cab1c0..7654051 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -6,7 +6,7 @@
 from stravalib.attributes import (Attribute, DateAttribute, TimestampAttribute,
                                   TimeIntervalAttribute, LocationAttribute)
 from stravalib.model import (BaseEntity, BoundEntity, LoadableEntity,
-                             Bike as _Bike)
+                             EntityCollection, Bike as _Bike)
 from stravalib import unithelper as uh
 
 
@@ -177,7 +177,7 @@ def __repr__(self):
 class _BikeData(ExpandableEntity):
     """Mixin class to add weight and components to a Bike"""
     frame_type = Attribute(FrameType)
-    components = Attribute(list)
+    components = Attribute(EntityCollection(ScrapedBikeComponent))
     weight = Attribute(float, units=uh.kg)
 
     _expandable = {"weight", "components"}
@@ -295,7 +295,7 @@ class ScrapedActivity(ExpandableEntity):
     flagged = Attribute(bool)
 
     manual = Attribute(bool)
-    photos = Attribute(list) # list of ScrapedActivityPhoto objects
+    photos = Attribute(EntityCollection(ScrapedActivityPhoto))
     device_name = Attribute(str)
 
     _expandable = {"photos", "manual", "device_name"}

From dde930baa5be43a1328f4fdfddf810d13c687949 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 4 Nov 2020 22:40:20 -0500
Subject: [PATCH 17/23] Refactor how lazy loading works

---
 stravaweblib/model.py | 146 +++++++++++++++++++++++-------------------
 1 file changed, 81 insertions(+), 65 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 7654051..52dc4ac 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -73,42 +73,71 @@ def from_str(cls, s):
         return cls[s.replace(" ", "_").upper().replace("TT_", "TIME_TRIAL_")]
 
 
-class ExpandableEntity(LoadableEntity):
-    """Allows for an object to be "expanded" on demand"""
-
-    _expanded = False
-    _expandable = set()
-
-    def __getattribute__(self, k):
-        if k != "_expandable" and k in self._expandable and not self._expanded:
-            self.expand()
-        return super().__getattribute__(k)
-
-    def _do_expand(self, d, overwrite=True):
-        if self._expanded:
-            return
-
-        if overwrite:
-            self.from_dict(d)
-            self._expanded = True
-            return
-
-        # Only set non-null attributes
-        # Mark as expanded before doing the expansion so __getatttribute__
-        # doesn't cause infinte recursion
-        try:
-            self._expanded = True
-            self.from_dict({
-                k: v for k, v in d.items()
-                if not getattr(self, k, None)
-            })
-        except Exception:
-            self._expanded = False
-            raise
-
-    def expand(self):
-        # Needs to call self._do_expand with some data
-        raise NotImplementedError()
+class MetaLazy(type):
+    """A metaclass that returns subclasses of the class of the passed in Attribute
+
+    This is used with the LazyLoaded class wrapper below to dynamically create
+    lazy-loaded subclasses.
+
+    Also, it names the returned types LazyLoaded<classname>
+    """
+    def __call__(cls, attr, *args, **kwargs):
+        attr_cls = attr.__class__
+        cls = cls.__class__(cls.__name__ + attr_cls.__name__, (cls, attr_cls), {})
+        return super(MetaLazy, cls).__call__(attr, *args, **kwargs)
+
+
+class LazyLoaded(metaclass=MetaLazy):
+    """Class wrapper that handles lazy-loading an Attribute as it is requested"""
+
+    def __init__(self, attr, fcn=None, key=None):
+        """Set up the LazyLoaded wrapper
+
+        Can expand attributes individually using a lambda function (fcn), or
+        multiple attributes at a time via an `expand` function defined on the
+        class that houses it (key).
+
+        Using `fcn`-based attributes is recommended when each attribute needs
+        to be retrieved separately. Using `key`-based attributes is recommended
+        when multiple attributes can be retrieved at the same time.
+
+        :param attr: The `Attribute` to wrap (ie. `Attribute(int)`)
+        :param fcn: This function will be called the first time the attribute
+                    is requested. The result will be set as the attribute value.
+        :param key: The key of the attribute in the lazyload cache. The lazyload
+                    cache is stored on the parent class. When this attribute is
+                    requested and the key in not in the cache, the `load_attribute`
+                    function on the parent class is called and the result is
+                    added to the cache. At this point, the key is poped out of
+                    the cache and set as the attribute variable.
+        """
+        if not (bool(fcn) ^ bool(key)):
+            raise ValueError("One of fcn or key (not both) is required")
+        self._fcn = fcn
+        self._key = key
+        # Mimic the child Attribute's properties
+        super().__init__(
+            type_=attr.type,
+            resource_states=attr.resource_states,
+            units=attr.units
+        )
+
+    def __get__(self, obj, clazz):
+        if obj is not None and obj not in self.data:
+            if self._fcn:
+                # Call the provided function to load the attribute
+                value = self._fcn(obj)
+            elif self._key:
+                if not hasattr(obj, "_lazyload_cache"):
+                    obj._lazyload_cache = {}
+
+                # Use obj.load_attribute() to ensure the object is in the cache
+                if self._key not in obj._lazyload_cache:
+                    obj._lazyload_cache.update(obj.load_attribute(self._key))
+                value = obj._lazyload_cache.pop(self._key)
+
+            self.__set__(obj, value)
+        return super().__get__(obj, clazz)
 
 
 class ScrapedGear(BaseEntity):
@@ -174,18 +203,20 @@ def __repr__(self):
         )
 
 
-class _BikeData(ExpandableEntity):
+class _BikeData(LoadableEntity):
     """Mixin class to add weight and components to a Bike"""
-    frame_type = Attribute(FrameType)
-    components = Attribute(EntityCollection(ScrapedBikeComponent))
-    weight = Attribute(float, units=uh.kg)
-
-    _expandable = {"weight", "components"}
+    frame_type = LazyLoaded(Attribute(FrameType), key="frame_type")
+    components = LazyLoaded(EntityCollection(ScrapedBikeComponent), key="components")
+    weight = LazyLoaded(Attribute(float, units=uh.kg), key="weight")
 
-    def expand(self):
+    def load_attribute(self, _):
         """Expand the bike with more details using scraping"""
         self.assert_bind_client()
-        self._do_expand(self.bind_client.get_bike_details(self.id))
+
+        d = self.bind_client.get_bike_details(self.id)
+        # Upgrade the frame_type to the enum
+        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType.from_str(x))
+        return d
 
     def components_on_date(self, on_date):
         """Get bike components installed on the specified date
@@ -204,11 +235,6 @@ def components_on_date(self, on_date):
             if (c.added or date.min) <= on_date <= (c.removed or date.max)
         ]
 
-    def from_dict(self, d):
-        # Upgrade the frame_type to the enum
-        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType(x))
-        return super().from_dict(d)
-
 
 class Bike(_BikeData, _Bike) :
     __doc__ = _Bike.__doc__ + """
@@ -222,13 +248,6 @@ class ScrapedBike(ScrapedGear, _BikeData):
     The attributes are compatible with stravalib.models.Bike where they exist.
     """
 
-    _expandable = {'frame_type', 'brand_name', 'model_name'}
-
-    def from_dict(self, d):
-        # Upgrade the scraped frame_type string to the enum
-        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType.from_str(x))
-        return super().from_dict(d)
-
 
 class ScrapedActivityPhoto(BaseEntity):
     """Represents a photo scraped from Strava's activity details page
@@ -264,7 +283,7 @@ def from_dict(self, d):
         return super().from_dict(d)
 
 
-class ScrapedActivity(ExpandableEntity):
+class ScrapedActivity(LoadableEntity):
     """
     Represents an Activity (ride, run, etc.) that was scraped from the website
 
@@ -294,16 +313,13 @@ class ScrapedActivity(ExpandableEntity):
     private = Attribute(bool)
     flagged = Attribute(bool)
 
-    manual = Attribute(bool)
-    photos = Attribute(EntityCollection(ScrapedActivityPhoto))
-    device_name = Attribute(str)
-
-    _expandable = {"photos", "manual", "device_name"}
+    manual = LazyLoaded(Attribute(bool), key="manual")
+    photos = LazyLoaded(EntityCollection(ScrapedActivityPhoto), key="photos")
+    device_name = LazyLoaded(Attribute(str), key="device_name")
 
-    def expand(self):
-        """Expand the activity with more details using scraping"""
+    def load_attribute(self, _):
         self.assert_bind_client()
-        self._do_expand(self.bind_client.get_extra_activity_details(self.id), overwrite=False)
+        return self.bind_client.get_extra_activity_details(self.id)
 
     @property
     def total_photo_count(self):

From 6b68efd0a97fc98bcb1658f4f450bd6d3573aca9 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Mon, 9 Nov 2020 14:48:58 -0500
Subject: [PATCH 18/23] Allow LazyLoaded Attributes to behave like properties

---
 stravaweblib/model.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 52dc4ac..1559b07 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -90,7 +90,7 @@ def __call__(cls, attr, *args, **kwargs):
 class LazyLoaded(metaclass=MetaLazy):
     """Class wrapper that handles lazy-loading an Attribute as it is requested"""
 
-    def __init__(self, attr, fcn=None, key=None):
+    def __init__(self, attr, *, fcn=None, key=None, property=False):
         """Set up the LazyLoaded wrapper
 
         Can expand attributes individually using a lambda function (fcn), or
@@ -101,6 +101,9 @@ class that houses it (key).
         to be retrieved separately. Using `key`-based attributes is recommended
         when multiple attributes can be retrieved at the same time.
 
+        If `property` is True, the attribute will be loaded each time it is
+        requested. This makes the attribute act more like a property.
+
         :param attr: The `Attribute` to wrap (ie. `Attribute(int)`)
         :param fcn: This function will be called the first time the attribute
                     is requested. The result will be set as the attribute value.
@@ -108,11 +111,14 @@ class that houses it (key).
                     cache is stored on the parent class. When this attribute is
                     requested and the key in not in the cache, the `load_attribute`
                     function on the parent class is called and the result is
-                    added to the cache. At this point, the key is poped out of
-                    the cache and set as the attribute variable.
+                    added to the cache. At this point, the key is popped out of
+                    the cache and set as the attribute variable. If the key is
+                    not in the cache, `None` is set at the value of the attribute.
+        :param property: Don't store the result of the lazy load
         """
         if not (bool(fcn) ^ bool(key)):
             raise ValueError("One of fcn or key (not both) is required")
+        self._property = property
         self._fcn = fcn
         self._key = key
         # Mimic the child Attribute's properties
@@ -123,7 +129,7 @@ class that houses it (key).
         )
 
     def __get__(self, obj, clazz):
-        if obj is not None and obj not in self.data:
+        if obj is not None and (self._property or obj not in self.data):
             if self._fcn:
                 # Call the provided function to load the attribute
                 value = self._fcn(obj)
@@ -133,12 +139,22 @@ def __get__(self, obj, clazz):
 
                 # Use obj.load_attribute() to ensure the object is in the cache
                 if self._key not in obj._lazyload_cache:
-                    obj._lazyload_cache.update(obj.load_attribute(self._key))
-                value = obj._lazyload_cache.pop(self._key)
+                    obj._lazyload_cache.update(obj.load_attribute(self._key) or {})
+                value = obj._lazyload_cache.pop(self._key, None)
+
+            if self._property:
+                return value
 
             self.__set__(obj, value)
         return super().__get__(obj, clazz)
 
+    def __set__(self, obj, val):
+        if self._property:
+            raise AttributeError(
+                "Can't set {} property on {!r}".format(self.__class__.__name__, obj)
+            )
+        super().__set__(obj, val)
+
 
 class ScrapedGear(BaseEntity):
     """Represents gear scraped from Strava

From a22ea26e8f5b19c2838318f87fe67791513be40a Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Thu, 19 Aug 2021 23:18:48 -0400
Subject: [PATCH 19/23] Add ScrapedAthlete

---
 stravaweblib/model.py     |  60 ++++++++++++++++++
 stravaweblib/webclient.py | 125 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 182 insertions(+), 3 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 1559b07..acef410 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -353,3 +353,63 @@ def from_dict(self, d):
         _dict_modify(d, "elevation_gain_raw", "elevation_gain")
 
         return super().from_dict(d)
+
+
+class ScrapedAthlete(LoadableEntity):
+    """
+    Represents Athlete data scraped from the website
+
+    The attributes are compatible with stravalib.model.Athlete where they exist
+    """
+    firstname = Attribute(str)
+    lastname = Attribute(str)
+    # Dynamically compute the display name in the same way Strava does
+    name = LazyLoaded(
+        Attribute(str),
+        fcn=lambda x: "{} {}".format(x.firstname or "", x.lastname or "").strip(),
+        property=True
+    )
+
+    profile = Attribute(str)
+    photos = EntityCollection(ScrapedActivityPhoto)
+    challenges = Attribute(list)
+
+    city = Attribute(str)
+    state = Attribute(str)
+    country = Attribute(str)
+    location = LocationAttribute()
+
+    bikes = LazyLoaded(EntityCollection(ScrapedBike), key="bikes")
+    shoes = LazyLoaded(EntityCollection(ScrapedShoe), key="shoes")
+
+    def load_attribute(self, key):
+        self.assert_bind_client()
+        if key == "bikes":
+            v = self.bind_client.get_all_bikes(self.id)
+        elif key == "shoes":
+            v = self.bind_client.get_all_shoes(self.id)
+        else:
+            return
+        return {key: v}
+
+    def from_dict(self, d):
+        # Merge geo subdict into the main dict
+        d.update(d.pop("geo", {}))
+
+        _dict_modify(d, "photo", "profile_medium")
+        _dict_modify(d, "photo_large", "profile")
+        _dict_modify(d, "first_name", "firstname")
+        _dict_modify(d, "last_name", "lastname")
+        _dict_modify(d, "gender", "sex")
+        _dict_modify(d, "lat_lng", "location")
+
+        # According to some code returned in the HTML, Strava computes the
+        # display name using "<first> <last>". He we make an attempt to break
+        # the display name back up into it's parts. This is only for
+        # compatibility with the stravalib API - you should always use obj.name
+        name = d.pop("name", None)
+        if name and "firstname" not in d and "lastname" not in d:
+            # total guess: assume more last names have spaces than first
+            d["firstname"], d["lastname"] = name.split(" ", 1)
+
+        return super().from_dict(d)
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 68d3dbe..5e38597 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -29,7 +29,9 @@
 }
 
 # Regexes for pulling information out of the activity details page
-PHOTOS_REGEX = re.compile(r"var photosJson\s*=\s*(\[.*\]);")
+PHOTOS_REGEX = re.compile(r"var\s+photosJson\s*=\s*(\[.*\]);")
+ATHLETE_REGEX = re.compile(r"var\s+currentAthlete\s*=\s*new\s+Strava.Models.CurrentAthlete\(({.*})\);")
+CHALLENGE_IDS_REGEX = re.compile(r"var\s+trophiesAnalyticsProperties\s*=\s*{.*challenge_id:\s*\[(\[[\d\s,]*\])\]")
 PAGE_VIEW_REGEX = re.compile(r"pageView\s*=\s*new\s+Strava.Labs.Activities.Pages.(\S+)PageView\([\"']?\d+[\"']?,\s*[\"']([^\"']+)")
 
 NON_NUMBERS = re.compile(r'[^\d\.]')
@@ -563,11 +565,16 @@ def get_route_data(self, route_id, fmt=DataFormat.GPX):
 
         return self._make_export_file(resp, route_id)
 
-    def get_all_bikes(self):
+    def get_all_bikes(self, athlete_id=None):
         """Scrape all bike information from Strava
 
         :yield: `ScrapedBike` objects
         """
+        # Return minimal information from the athlete page if this isn't the
+        # currently-logged in athlete.
+        if int(athlete_id) != self.athlete_id:
+            return self.get_athlete(athlete_id).bikes
+
         __log__.debug("Getting all bike data")
         resp = self.request_get("athletes/{}/gear/bikes".format(self.athlete_id))
         if not resp.ok:
@@ -584,11 +591,16 @@ def get_all_bikes(self):
         except (TypeError, ValueError) as e:
             raise ScrapingError("Failed to parse bike data") from e
 
-    def get_all_shoes(self):
+    def get_all_shoes(self, athlete_id=None):
         """Scrape all shoe information from Strava
 
         :yield: `ScrapedShoe` objects
         """
+        # Return minimal information from the athlete page if this isn't the
+        # currently-logged in athlete.
+        if int(athlete_id) != self.athlete_id:
+            return self.get_athlete(athlete_id).shoes
+
         __log__.debug("Getting all shoe data")
         resp = self.request_get("athletes/{}/gear/shoes".format(self.athlete_id))
         if not resp.ok:
@@ -608,6 +620,113 @@ def get_gear(self, gear_id):
         except StopIteration:
             raise KeyError("No gear with id '{}' found".format(gear_id))
 
+    def get_athlete(self, athlete_id=None):
+        """A scraping-based replacement for `stravalib.Client.get_athlete`"""
+        if athlete_id is None:
+            athlete_id = self.athlete_id
+
+        athlete_id = int(athlete_id)
+
+        __log__.debug("Getting athlete %s", athlete_id)
+        resp = self.request_get("athletes/{}".format(athlete_id))
+        if not resp.ok:
+            raise stravalib.exc.Fault("Failed to get athlete {}".format(athlete_id))
+
+        ret = {}
+        soup = BeautifulSoup(resp.text, 'html5lib')
+
+        for script in soup.find_all("script"):
+            # This method only works on the currently-logged in athlete but
+            # returns much more data.
+            if athlete_id == self.athlete_id and "Strava.Models.CurrentAthlete" in script.text:
+                m = ATHLETE_REGEX.search(script.text)
+                if not m:
+                    __log__.error("Failed to extract detailed athlete data")
+                    continue
+                try:
+                    ret.update(json.loads(m.group(1)))
+                except (TypeError, ValueError) as e:
+                    __log__.error("Failed to parse extracted athlete data", exc_info=True)
+                    continue
+
+            elif "var trophiesAnalyticsProperties" in script.text:
+                m = CHALLENGE_IDS_REGEX.search(script.text)
+                if not m:
+                    __log__.error("Failed to extract completed challenges")
+                    continue
+                try:
+                    ret["challenges"] = json.loads(m.group(1))
+                except (TypeError, ValueError) as e:
+                    __log__.error("Failed to parse extracted challenge data", exc_info=True)
+                    continue
+
+            elif "var photosJson" in script.text:
+                # Exact same as activity pages
+                m = PHOTOS_REGEX.search(script.text)
+                if not m:
+                    __log__.error("Failed to extract photo data from page")
+                    break
+                try:
+                    photos = json.loads(m.group(1))
+                except (TypeError, ValueError) as e:
+                    __log__.error("Failed to parse extracted photo data", exc_info=True)
+                    break
+                ret["photos"] = [ScrapedActivityPhoto(**p) for p in photos]
+
+        # Failed the detailed scrape or not getting the currently-logged in athlete
+        # (this method works for all athletes)
+        if "id" not in ret:
+            ret["id"] = athlete_id
+            # There are multiple headings depending on the level of access
+            for heading in soup.find_all("div", class_="profile-heading"):
+                name = heading.find("h1", class_="athlete-name")
+                if name:
+                    ret["name"] = name.text.strip()
+
+                location = heading.find("div", class_="location")
+                if location:
+                    ret["city"], ret["state"], ret["country"] = [x.strip() for x in location.text.split(",")]
+
+                profile = heading.find("img", class_="avatar-img")
+                if profile:
+                    ret["profile"] = profile["src"]
+
+        # Scrape basic gear info from the sidebar if not getting the logged
+        # in athlete.
+        # By providing minimal data for non-logged-in athletes, no more data
+        # will be lazy-loaded by the bikes and shoes attributes. This is what
+        # we want since the lazy-load would just call this function again.
+        # However, when getting the logged in athlete's gear, we don't want to
+        # set anything since the lazy-load will use the more detailed
+        # get_all_bikes/gear functions instead of this one.
+        if athlete_id != self.athlete_id:
+            ret["bikes"] = []
+            ret["shoes"] = []
+            for gear in soup.select("div.section.stats.gear"):
+                if "bikes" in gear["class"]:
+                    type_ = "bikes"
+                    cls = ScrapedBike
+                elif "shoes" in gear["class"]:
+                    type_ = "shoes"
+                    cls = ScrapedShoe
+                else:
+                    continue
+
+                for row in gear.find("table").find_all("tr"):
+                    name, dist = row.find_all("td")
+                    link=name.find("a")
+                    gear_id = None
+                    if link and type_ == "bikes":
+                        gear_id = "b{}".format(link["href"].rsplit("/", 1)[-1])
+
+                    ret[type_].append(cls(
+                        id=gear_id,
+                        name=name.text.strip(),
+                        distance=int(float(NON_NUMBERS.sub('', dist.text.strip())) * 1000),
+                    ))
+
+        return ScrapedAthlete(bind_client=self, **ret)
+
 
 class WebClient(stravalib.Client):
     """

From e5c8cd67d4efb3401d74334cc8fae3a33db57af6 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Mon, 10 Jan 2022 13:46:17 -0500
Subject: [PATCH 20/23] WIP

 - Tweak LazyLoaded
 - Add scraping for challenges
 - Tweak gear access
---
 stravaweblib/model.py     | 187 +++++++++++++++++++++++++++-----------
 stravaweblib/webclient.py | 115 ++++++++++++++++++++++-
 2 files changed, 244 insertions(+), 58 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index acef410..551c3e5 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -5,8 +5,9 @@
 
 from stravalib.attributes import (Attribute, DateAttribute, TimestampAttribute,
                                   TimeIntervalAttribute, LocationAttribute)
-from stravalib.model import (BaseEntity, BoundEntity, LoadableEntity,
-                             EntityCollection, Bike as _Bike)
+from stravalib.model import (BaseEntity, BoundEntity, LoadableEntity as _LoadableEntity,
+                             IdentifiableEntity, EntityCollection, EntityAttribute,
+                             Athlete as _Athlete, Bike as _Bike)
 from stravalib import unithelper as uh
 
 
@@ -111,10 +112,18 @@ class that houses it (key).
                     cache is stored on the parent class. When this attribute is
                     requested and the key in not in the cache, the `load_attribute`
                     function on the parent class is called and the result is
-                    added to the cache. At this point, the key is popped out of
-                    the cache and set as the attribute variable. If the key is
-                    not in the cache, `None` is set at the value of the attribute.
+                    added to the cache. Any future accesses will return the value
+                    from the cache. If the key is not in the cache, `None` is
+                    returned.
         :param property: Don't store the result of the lazy load
+
+        Special cases:
+         - If a lazy-loaded attribute is None, lazy-loading will be attempted
+           each time it is accessed. This allows for null values to be updated
+           with new data.
+         - If the load_attribute function returns None for a property, it will
+           not be attempted again.
+
         """
         if not (bool(fcn) ^ bool(key)):
             raise ValueError("One of fcn or key (not both) is required")
@@ -129,24 +138,27 @@ class that houses it (key).
         )
 
     def __get__(self, obj, clazz):
-        if obj is not None and (self._property or obj not in self.data):
-            if self._fcn:
-                # Call the provided function to load the attribute
-                value = self._fcn(obj)
-            elif self._key:
-                if not hasattr(obj, "_lazyload_cache"):
-                    obj._lazyload_cache = {}
+        if obj is None or not (self._property or self.data.get(obj) is None):
+            return super().__get__(obj, clazz)
+
+        if self._fcn:
+            # Call the provided function to load the attribute
+            value = self._fcn(obj)
+            if value is not None and not self._property:
+                self.__set__(obj, value)
+            return value
+        elif self._key:
+            if not hasattr(obj, "_lazyload_cache"):
+                obj._lazyload_cache = {}
 
-                # Use obj.load_attribute() to ensure the object is in the cache
-                if self._key not in obj._lazyload_cache:
-                    obj._lazyload_cache.update(obj.load_attribute(self._key) or {})
-                value = obj._lazyload_cache.pop(self._key, None)
+            # Use obj.load_attribute() to ensure the object is in the cache
+            if self._key not in obj._lazyload_cache:
+                obj._lazyload_cache.update(obj.load_attribute(self._key) or {})
 
-            if self._property:
-                return value
+            # Don't set it on the object, keep accessing out of the cache
+            return obj._lazyload_cache.get(self._key, None)
 
-            self.__set__(obj, value)
-        return super().__get__(obj, clazz)
+        raise AssertionError("No fcn or key?")
 
     def __set__(self, obj, val):
         if self._property:
@@ -156,6 +168,13 @@ def __set__(self, obj, val):
         super().__set__(obj, val)
 
 
+# TODO: probably delete this
+class LoadableEntity(_LoadableEntity):
+
+    def load_attribute(self, key):
+        return {}
+
+
 class ScrapedGear(BaseEntity):
     """Represents gear scraped from Strava
 
@@ -219,20 +238,16 @@ def __repr__(self):
         )
 
 
-class _BikeData(LoadableEntity):
+class _ScrapedBikeData(LoadableEntity):
     """Mixin class to add weight and components to a Bike"""
-    frame_type = LazyLoaded(Attribute(FrameType), key="frame_type")
+
     components = LazyLoaded(EntityCollection(ScrapedBikeComponent), key="components")
     weight = LazyLoaded(Attribute(float, units=uh.kg), key="weight")
 
-    def load_attribute(self, _):
+    def load_attribute(self, key):
         """Expand the bike with more details using scraping"""
         self.assert_bind_client()
-
-        d = self.bind_client.get_bike_details(self.id)
-        # Upgrade the frame_type to the enum
-        _dict_modify(d, "frame_type", "frame_type", fcn=lambda x: FrameType.from_str(x))
-        return d
+        return self.bind_client.get_bike_details(self.id)
 
     def components_on_date(self, on_date):
         """Get bike components installed on the specified date
@@ -252,17 +267,28 @@ def components_on_date(self, on_date):
         ]
 
 
-class Bike(_BikeData, _Bike) :
+class Bike(_ScrapedBikeData, _Bike) :
     __doc__ = _Bike.__doc__ + """
     Scraping adds weight and components attributes
     """
 
+    def from_object(self, b):
+        self.from_dict(b.to_dict())
+        return self
 
-class ScrapedBike(ScrapedGear, _BikeData):
+
+class ScrapedBike(ScrapedGear, _ScrapedBikeData):
     """Represents a bike scraped from Strava
 
     The attributes are compatible with stravalib.models.Bike where they exist.
     """
+    # NOTE: These are here to take advantage of the load_attributes function
+    #       of the _ScrapedBikeData class in case the ScrapedBike was
+    #       constructed from a regular bike without the attributes set.
+    frame_type = LazyLoaded(Attribute(FrameType), key="frame_type")
+    brand_name = LazyLoaded(Attribute(str), key="brand_name")
+    model_name = LazyLoaded(Attribute(str), key="model_name")
+    description = LazyLoaded(Attribute(str), key="description")
 
 
 class ScrapedActivityPhoto(BaseEntity):
@@ -333,7 +359,10 @@ class ScrapedActivity(LoadableEntity):
     photos = LazyLoaded(EntityCollection(ScrapedActivityPhoto), key="photos")
     device_name = LazyLoaded(Attribute(str), key="device_name")
 
-    def load_attribute(self, _):
+    def load_attribute(self, key):
+        if key not in {"manual", "photos", "device_name"}:
+            return super().load_attribute(key)
+
         self.assert_bind_client()
         return self.bind_client.get_extra_activity_details(self.id)
 
@@ -355,14 +384,44 @@ def from_dict(self, d):
         return super().from_dict(d)
 
 
-class ScrapedAthlete(LoadableEntity):
-    """
-    Represents Athlete data scraped from the website
+class ScrapedChallenge(IdentifiableEntity):
+
+    url = Attribute(str)
+    name = Attribute(str)
+    subtitle = Attribute(str)
+    teaser = Attribute(str)
+    overview = Attribute(str)
+    badge_url = Attribute(str)
+
+    start_date = TimestampAttribute()
+    end_date = TimestampAttribute()
+
+    def trophy_url(self, percent_complete=100):
+        """Return a url for a trophy image for the percentage complete
+
+        Note that not all challenges have images for all percentages. Using
+        100 should always work.
+        """
+        if not self.badge_url:
+            return
+        base, ext = self.badge_url.rsplit(".", 1)
+        return "{}-{}.{}".format(base, percent_complete, ext)
+
+    def from_dict(self, d):
+        #_dict_modify(d, "title", "name")
+        _dict_modify(d, "description", "overview")
+        _dict_modify(d, "url", "badge_url")
+        _dict_modify(d, "share_url", "url")
+        return super().from_dict(d)
+
+
+class _AthleteData(LoadableEntity):
+    """Mixin class to add photos, challenges, and a name to an Athlete"""
+    photos = LazyLoaded(EntityCollection(ScrapedActivityPhoto), key="photos")
+    challenges = LazyLoaded(Attribute(list), key="challenges")
+    bikes = LazyLoaded(EntityCollection(ScrapedBike), key="bikes")
+    shoes = LazyLoaded(EntityCollection(ScrapedShoe), key="shoes")
 
-    The attributes are compatible with stravalib.model.Athlete where they exist
-    """
-    firstname = Attribute(str)
-    lastname = Attribute(str)
     # Dynamically compute the display name in the same way Strava does
     name = LazyLoaded(
         Attribute(str),
@@ -370,27 +429,47 @@ class ScrapedAthlete(LoadableEntity):
         property=True
     )
 
-    profile = Attribute(str)
-    photos = EntityCollection(ScrapedActivityPhoto)
-    challenges = Attribute(list)
-
-    city = Attribute(str)
-    state = Attribute(str)
-    country = Attribute(str)
-    location = LocationAttribute()
-
-    bikes = LazyLoaded(EntityCollection(ScrapedBike), key="bikes")
-    shoes = LazyLoaded(EntityCollection(ScrapedShoe), key="shoes")
-
     def load_attribute(self, key):
         self.assert_bind_client()
+
+        # TODO: bikes and shoes only returns scraping-based data
         if key == "bikes":
-            v = self.bind_client.get_all_bikes(self.id)
+            return {"bikes": self.bind_client.get_all_bikes(self.id)}
         elif key == "shoes":
-            v = self.bind_client.get_all_shoes(self.id)
+            return {"shoes": self.bind_client.get_all_shoes(self.id)}
+        elif key in {"photos", "challenges"}:
+            d = self.bind_client.get_athlete(self.id)
+            return {
+                "photos": d.photos,
+                "challenges": d.challenges,
+            }
         else:
-            return
-        return {key: v}
+            return super().load_attribute(key)
+
+
+class Athlete(_AthleteData, _Athlete):
+    __doc__ = _Athlete.__doc__ + """
+    Scraping adds photos, challenges, and name attributes
+    """
+    def from_object(self, a):
+        self.from_dict(a.to_dict())
+        return self
+
+
+class ScrapedAthlete(_AthleteData):
+    """
+    Represents Athlete data scraped from the website
+
+    The attributes are compatible with stravalib.model.Athlete where they exist
+    """
+    firstname = Attribute(str)
+    lastname = Attribute(str)
+
+    profile = Attribute(str)
+    city = Attribute(str)
+    state = Attribute(str)
+    country = Attribute(str)
+    location = LocationAttribute()
 
     def from_dict(self, d):
         # Merge geo subdict into the main dict
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 5e38597..4d89abc 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -4,6 +4,7 @@
 from collections import namedtuple
 from datetime import datetime
 import functools
+import html
 import json
 import logging
 import re
@@ -16,7 +17,8 @@
 from stravalib.model import Activity, Bike as _Bike
 from stravaweblib.model import (DataFormat, ScrapedShoe, Bike, ScrapedBike,
                                 ScrapedBikeComponent, ScrapedActivity,
-                                ScrapedActivityPhoto, ScrapedAthlete)
+                                ScrapedActivityPhoto, Athlete, ScrapedAthlete,
+                                ScrapedChallenge, FrameType)
 
 
 __log__ = logging.getLogger(__name__)
@@ -33,6 +35,8 @@
 ATHLETE_REGEX = re.compile(r"var\s+currentAthlete\s*=\s*new\s+Strava.Models.CurrentAthlete\(({.*})\);")
 CHALLENGE_IDS_REGEX = re.compile(r"var\s+trophiesAnalyticsProperties\s*=\s*{.*challenge_id:\s*\[(\[[\d\s,]*\])\]")
 PAGE_VIEW_REGEX = re.compile(r"pageView\s*=\s*new\s+Strava.Labs.Activities.Pages.(\S+)PageView\([\"']?\d+[\"']?,\s*[\"']([^\"']+)")
+CHALLENGE_REGEX = re.compile(r"var\s+challenge\s*=\s*new\s+Strava.Models.Challenge\(({.*})\);")
+CHALLENGE_DATE_REGEX = re.compile(r"(\S{3} \d{2}, \d{4}) to (\S{3} \d{2}, \d{4})")
 
 NON_NUMBERS = re.compile(r'[^\d\.]')
 
@@ -498,6 +502,8 @@ def get_bike_details(self, bike_id):
                 # Strip non-number chars ("kg")
                 # TODO: other units?
                 v = float(NON_NUMBERS.sub('', v))
+            elif k == "frame_type":
+                v = FrameType.from_str(v)
             ret[k.lower()] = v
 
         # Get component data
@@ -610,6 +616,14 @@ def get_all_shoes(self, athlete_id=None):
         except (TypeError, ValueError) as e:
             raise ScrapingError("Failed to parse shoe data") from e
 
+    def get_all_gear(self):
+        """Scrape all gear information from Strava
+
+        :yield: `ScrapedBike` and `ScrapedShoe` objects
+        """
+        yield from self.get_all_bikes()
+        yield from self.get_all_shoes()
+
     def get_gear(self, gear_id):
         """A scraping-based replacement for `stravalib.Client.get_gear`"""
         try:
@@ -632,7 +646,10 @@ def get_athlete(self, athlete_id=None):
         if not resp.ok:
             raise stravalib.exc.Fault("Failed to get athlete {}".format(athlete_id))
 
-        ret = {}
+        ret = {
+            "photos": [],
+            "challenges": [],
+        }
         soup = BeautifulSoup(resp.text, 'html5lib')
 
         for script in soup.find_all("script"):
@@ -727,6 +744,71 @@ def get_athlete(self, athlete_id=None):
 
         return ScrapedAthlete(bind_client=self, **ret)
 
+    def get_challenge(self, challenge_id):
+        """Get data about a challenge"""
+        __log__.debug("Getting details for challenge %s", challenge_id)
+        resp = self.request_get("challenges/{}".format(challenge_id))
+        if not resp.ok:
+            raise stravalib.exc.Fault("Failed to get challenge {}".format(challenge_id))
+
+        data = {}
+        soup = BeautifulSoup(resp.text, 'html5lib')
+        react_data = soup.find("div", **{"data-react-class": "Show"})
+        if react_data:
+            # Extract data from the react version of the page
+            data_str = html.unescape(
+                react_data["data-react-props"]
+                    .replace("&nbsp;", " ")
+                    .replace("\n", "\\n")
+            )
+            try:
+                data = json.loads(data_str)
+            except (TypeError, ValueError) as e:
+                raise ScrapingError("Failed to parse extracted challenge data") from e
+
+            # Get the descript
+            description_html = next(x for x in data["sections"] if x["title"] == "Overview")["content"][0]["text"].replace("&nbsp;", "")
+            data["description"] = BeautifulSoup(description_html, 'html5lib').text
+            data["name"] = data["header"]["name"]
+            data["subtitle"] = data["header"]["subtitle"]
+            data["teaser"] = data["summary"]["challenge"]["title"]
+            data["badge_url"] = data["header"]["challengeLogoUrl"]
+            data["share_url"] = "https://www.strava.com/challenges/{}".format(challenge_id)
+
+            m = CHALLENGE_DATE_REGEX.search(data["summary"]["calendar"]["title"])
+            if m:
+                try:
+                    data["start_date"], data["end_date"] = [
+                        datetime.strptime(x, "%b %d, %Y") for x in m.groups()
+                    ]
+                except ValueError:
+                    __log__.error("Failed to parse dates {}".format(m.groups()))
+        else:
+            # Look for the data in the older-style page
+            for script in soup.find_all("script"):
+                if "Strava.Models.Challenge" in script.text:
+                    break
+            else:
+                raise ScrapingError("Failed to scrape challenge data {}".format(challenge_id))
+
+            m = CHALLENGE_REGEX.search(script.text)
+            if not m:
+                raise ScrapingError("Failed to extract challenge data from page")
+
+            data_str = html.unescape(m.group(1))
+            try:
+                data = json.loads(data_str)
+            except (TypeError, ValueError) as e:
+                raise ScrapingError("Failed to parse extracted challenge data") from e
+
+            desc = soup.find("div", id="desc")
+            if desc:
+                data["description"] = desc.text
+
+        data["id"] = challenge_id
+
+        return ScrapedChallenge(**data)
+
 
 class WebClient(stravalib.Client):
     """
@@ -740,7 +822,7 @@ def __new__(cls, *_, **__):
         self = super().__new__(cls)
 
         # Prepend some docstrings with the parent classes one
-        for fcn in ("__init__", "get_gear"):
+        for fcn in ("__init__", "get_gear", "get_athlete"):
             getattr(cls, fcn).__doc__ = getattr(super(), fcn).__doc__ + getattr(cls, fcn).__doc__
 
         # Delegate certain methods and properties to the scraper instance
@@ -775,15 +857,40 @@ def __init__(self, *args, **kwargs):
         if self._scraper.athlete_id != self.get_athlete().id:
             raise ValueError("API and web credentials are for different accounts")
 
+    def get_athlete(self, athlete_id=None):
+        """
+        Returned Athletes will have scraped attributes lazily added.
+        Also, when accessing the bikes attribute, more scraped data will be available
+        """
+        athlete = super().get_athlete(athlete_id)
+        # TODO: Should make the bind client this instance
+        #       That way scraping/API functions can be mixed
+        return Athlete(bind_client=self._scraper).from_object(athlete)
+
     def get_gear(self, gear_id):
         """
         Returned Bikes will have scraped attributes lazily added
         """
         gear = super().get_gear(gear_id)
         if isinstance(gear, _Bike):
-            return Bike(bind_client=self._scraper, **gear.to_dict())
+            # TODO: Should make the bind client this instance
+            #       That way scraping/API functions can be mixed
+            return Bike(bind_client=self._scraper).from_object(gear)
         return gear
 
+    def get_all_gear(self):
+        """Get all gear information from Strava
+
+        :yield: `stravalib.model.Bike` and `stravalib.model.Shoe` instances
+        """
+        athlete = self.get_athlete()
+        if athlete.bikes is None and athlete.shoes is None:
+            __log__.error("Failed to get gear data (missing profile:read_all scope?)")
+            return
+
+        for gear in athlete.bikes + athlete.shoes:
+            yield self.get_gear(gear)
+
     @staticmethod
     def _delegate(clazz, name):
         func = getattr(clazz, name)

From 856de245203b845f1a17a1b28472049ae5a48fd3 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 26 Jan 2022 16:01:03 -0500
Subject: [PATCH 21/23] Fix extracting data from script tags

BeautifulSoup v4.9.0 changed how `.text` works for `<script>` tags (ie.
not at all), breaking parsing.

See https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/revision/564
---
 setup.py                  |   2 +-
 stravaweblib/webclient.py | 127 +++++++++++++++++---------------------
 2 files changed, 59 insertions(+), 70 deletions(-)

diff --git a/setup.py b/setup.py
index 9104b99..21c75d2 100644
--- a/setup.py
+++ b/setup.py
@@ -38,6 +38,6 @@
     python_requires=">=3.4.0",
     install_requires=[
         "stravalib>=0.10.4,<1.0.0",
-        "beautifulsoup4>=4.6.0,<5.0.0",
+        "beautifulsoup4>=4.9.0,<5.0.0",
     ],
 )
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 4d89abc..263b1dd 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -195,25 +195,22 @@ def get_extra_activity_details(self, activity_id):
                 ret["device_name"] = device.text.strip()
 
         for script in soup.find_all("script"):
-            if "var pageView;" in script.text:
-                m = PAGE_VIEW_REGEX.search(script.text)
-                if not m:
-                    __log__.error("Failed to extract manual and type data from page")
-                    continue
+            if not script.string:
+                continue
+
+            m = PAGE_VIEW_REGEX.search(script.string)
+            if m:
                 ret["manual"] = m.group(1).lower() == "manual"
                 ret["type"] = m.group(2)
+                continue
 
-            elif "var photosJson" in script.text:
-                m = PHOTOS_REGEX.search(script.text)
-                if not m:
-                    __log__.error("Failed to extract photo data from page")
-                    continue
+            m = PHOTOS_REGEX.search(script.string)
+            if m:
                 try:
-                    photos = json.loads(m.group(1))
+                    ret["photos"] = [ScrapedActivityPhoto(**p) for p in json.loads(m.group(1))]
                 except (TypeError, ValueError) as e:
                     __log__.error("Failed to parse extracted photo data", exc_info=True)
-                    continue
-                ret["photos"] = [ScrapedActivityPhoto(**p) for p in photos]
+                continue
 
         return ret
 
@@ -647,53 +644,46 @@ def get_athlete(self, athlete_id=None):
             raise stravalib.exc.Fault("Failed to get athlete {}".format(athlete_id))
 
         ret = {
+            "id": athlete_id,
             "photos": [],
             "challenges": [],
         }
+
         soup = BeautifulSoup(resp.text, 'html5lib')
 
         for script in soup.find_all("script"):
-            # This method only works on the currently-logged in athlete but
-            # returns much more data.
-            if athlete_id == self.athlete_id and "Strava.Models.CurrentAthlete" in script.text:
-                m = ATHLETE_REGEX.search(script.text)
-                if not m:
-                    __log__.error("Failed to extract detailed athlete data")
-                    continue
-                try:
-                    ret.update(json.loads(m.group(1)))
-                except (TypeError, ValueError) as e:
-                    __log__.error("Failed to parse extracted athlete data", exc_info=True)
-                    continue
+            data = script.string
+            if not data:
+                continue
 
-            elif "var trophiesAnalyticsProperties" in script.text:
-                m = CHALLENGE_IDS_REGEX.search(script.text)
-                if not m:
-                    __log__.error("Failed to extract completed challenges")
+            # This method only works on the currently-logged in athlete but returns much more data than the above
+            if athlete_id == self.athlete_id:
+                m = ATHLETE_REGEX.search(data)
+                if m:
+                    try:
+                        ret.update(json.loads(m.group(1)))
+                    except (TypeError, ValueError) as e:
+                        __log__.error("Failed to parse extracted athlete data", exc_info=True)
                     continue
+
+            m = CHALLENGE_IDS_REGEX.search(data)
+            if m:
                 try:
                     ret["challenges"] = json.loads(m.group(1))
                 except (TypeError, ValueError) as e:
                     __log__.error("Failed to parse extracted challenge data", exc_info=True)
-                    continue
+                continue
 
-            elif "var photosJson" in script.text:
-                # Exact same as activity pages
-                m = PHOTOS_REGEX.search(script.text)
-                if not m:
-                    __log__.error("Failed to extract photo data from page")
-                    break
+            m = PHOTOS_REGEX.search(data)
+            if m:
                 try:
-                    photos = json.loads(m.group(1))
+                    ret["photos"] = [ScrapedActivityPhoto(**p) for p in json.loads(m.group(1))]
                 except (TypeError, ValueError) as e:
                     __log__.error("Failed to parse extracted photo data", exc_info=True)
-                    break
-                ret["photos"] = [ScrapedActivityPhoto(**p) for p in photos]
+                continue
 
-        # Failed the detailed scrape or not getting the currently-logged in athlete
-        # (this method works for all athletes)
-        if "id" not in ret:
-            ret["id"] = athlete_id
+        if athlete_id != self.athlete_id:
+            # Get basic profile data for an athlete.
             # There are multiple headings depending on the level of access
             for heading in soup.find_all("div", class_="profile-heading"):
                 name = heading.find("h1", class_="athlete-name")
@@ -702,21 +692,19 @@ def get_athlete(self, athlete_id=None):
 
                 location = heading.find("div", class_="location")
                 if location:
-                    ret["city"], ret["state"], ret["country"] = [x.strip() for x in location.text.split(",")]
+                    ret["city"], ret["state"], ret["country"] = [x.strip() for x in location.text.split(",", 2)]
 
                 profile = heading.find("img", class_="avatar-img")
                 if profile:
                     ret["profile"] = profile["src"]
 
-        # Scrape basic gear info from the sidebar if not getting the logged
-        # in athlete.
-        # By providing minimal data for non-logged-in athletes, no more data
-        # will be lazy-loaded by the bikes and shoes attributes. This is what
-        # we want since the lazy-load would just call this function again.
-        # However, when getting the logged in athlete's gear, we don't want to
-        # set anything since the lazy-load will use the more detailed
-        # get_all_bikes/gear functions instead of this one.
-        if athlete_id != self.athlete_id:
+            # Get basic gear info from the sidebar.
+            # By providing minimal data for non-logged-in athletes, no more data
+            # will be lazy-loaded by the bikes and shoes attributes. This is what
+            # we want since the lazy-load would just call this function again.
+            # However, when getting the logged in athlete's gear, we don't want to
+            # set anything since the lazy-load will use the more detailed
+            # get_all_bikes/gear functions instead of this one.
             ret["bikes"] = []
             ret["shoes"] = []
             for gear in soup.select("div.section.stats.gear"):
@@ -766,7 +754,7 @@ def get_challenge(self, challenge_id):
             except (TypeError, ValueError) as e:
                 raise ScrapingError("Failed to parse extracted challenge data") from e
 
-            # Get the descript
+            # Get the description
             description_html = next(x for x in data["sections"] if x["title"] == "Overview")["content"][0]["text"].replace("&nbsp;", "")
             data["description"] = BeautifulSoup(description_html, 'html5lib').text
             data["name"] = data["header"]["name"]
@@ -786,24 +774,25 @@ def get_challenge(self, challenge_id):
         else:
             # Look for the data in the older-style page
             for script in soup.find_all("script"):
-                if "Strava.Models.Challenge" in script.text:
-                    break
-            else:
-                raise ScrapingError("Failed to scrape challenge data {}".format(challenge_id))
+                if not script.string:
+                    continue
 
-            m = CHALLENGE_REGEX.search(script.text)
-            if not m:
-                raise ScrapingError("Failed to extract challenge data from page")
+                m = CHALLENGE_REGEX.search(script.string)
+                if not m:
+                    continue
 
-            data_str = html.unescape(m.group(1))
-            try:
-                data = json.loads(data_str)
-            except (TypeError, ValueError) as e:
-                raise ScrapingError("Failed to parse extracted challenge data") from e
+                data_str = html.unescape(m.group(1))
+                try:
+                    data = json.loads(data_str)
+                except (TypeError, ValueError) as e:
+                    raise ScrapingError("Failed to parse extracted challenge data") from e
+
+                desc = soup.find("div", id="desc")
+                if desc:
+                    data["description"] = desc.text
 
-            desc = soup.find("div", id="desc")
-            if desc:
-                data["description"] = desc.text
+        if not data:
+            raise ScrapingError("Failed to scrape challenge data {}".format(challenge_id))
 
         data["id"] = challenge_id
 

From f6d872cc09578b9c9e63ef509c778f443ed18304 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 26 Jan 2022 17:27:10 -0500
Subject: [PATCH 22/23] Fix trying to get more data on other athlete's bikes

---
 stravaweblib/model.py     | 5 +++--
 stravaweblib/webclient.py | 6 ------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index 551c3e5..f0a58a1 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -246,8 +246,9 @@ class _ScrapedBikeData(LoadableEntity):
 
     def load_attribute(self, key):
         """Expand the bike with more details using scraping"""
-        self.assert_bind_client()
-        return self.bind_client.get_bike_details(self.id)
+        if self.id is not None:
+            self.assert_bind_client()
+            return self.bind_client.get_bike_details(self.id)
 
     def components_on_date(self, on_date):
         """Get bike components installed on the specified date
diff --git a/stravaweblib/webclient.py b/stravaweblib/webclient.py
index 263b1dd..4651eac 100644
--- a/stravaweblib/webclient.py
+++ b/stravaweblib/webclient.py
@@ -719,13 +719,7 @@ def get_athlete(self, athlete_id=None):
 
                 for row in gear.find("table").find_all("tr"):
                     name, dist = row.find_all("td")
-                    link=name.find("a")
-                    gear_id = None
-                    if link and type_ == "bikes":
-                        gear_id = "b{}".format(link["href"].rsplit("/", 1)[-1])
-
                     ret[type_].append(cls(
-                        id=gear_id,
                         name=name.text.strip(),
                         distance=int(float(NON_NUMBERS.sub('', dist.text.strip())) * 1000),
                     ))

From 3685464538dc214548da1253a225d9690ee47278 Mon Sep 17 00:00:00 2001
From: Carey Metcalfe <carey@cmetcalfe.ca>
Date: Wed, 26 Jan 2022 17:27:46 -0500
Subject: [PATCH 23/23] Fix null locations serializing to 'None,None'

---
 stravaweblib/model.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/stravaweblib/model.py b/stravaweblib/model.py
index f0a58a1..cc048f4 100644
--- a/stravaweblib/model.py
+++ b/stravaweblib/model.py
@@ -320,8 +320,10 @@ def from_dict(self, d):
                 str(min(dim.values())): d.pop(name)
                 for name, dim in d.pop("dimensions").items()
             }
-        if "lat" in d and "lng" in d:
-            d["location"] = [d.pop("lat"), d.pop("lng")]
+        lat = d.pop("lat", None)
+        lon = d.pop("lng", None)
+        if lat is not None and lon is not None:
+            d["location"] = [lat, lon]
 
         return super().from_dict(d)