In [3]:
import bs4
import json
import pandas as pd
import datetime

In [5]:
pip install --upgrade beautifulsoup4 lxml numpy pandas python-dateutil pytz requests setuptools slumber soupsieve urllib3

Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install slumber

Note: you may need to restart the kernel to use updated packages.


In [40]:
from slumber import API

class FirstCyclingAPI(API):
    """ Wrapper for FirstCycling API """
    def __init__(self):
        super().__init__("https://firstcycling.com", append_slash=False)
        
    def __getitem__(self, key):
        return getattr(self, key)
    
    def _fix_kwargs(self, **kwargs):
        return {k: v for k, v in kwargs.items() if v}
    
    def _get_resource_response(self, resource, **kwargs):
        return self._store['session'].get(resource.url(), params=self._fix_kwargs(**kwargs)).content

    def get_rider_endpoint(self, rider_id, **kwargs):
        return self._get_resource_response(self['rider.php'], r=rider_id, **kwargs)

    def get_race_endpoint(self, race_id, **kwargs):
        return self._get_resource_response(self['race.php'], r=race_id, **kwargs)

    def get_ranking_endpoint(self, **kwargs):
        return self._get_resource_response(self['ranking.php'], **kwargs)

fc = FirstCyclingAPI()

In [42]:
class Endpoint:
	"""
	Generalized class to store endpoint responses.

	Attributes
	----------
	response : bytes
		Raw response from firstcycling.com
	"""

	def __init__(self, response):
		self.response = response
		""" Raw response from firstcycling.com. """

	def _to_json(self):
		return vars(self).copy()

	def get_json(self):
		""" Get JSON representation of endpoint response. """
		return json.dumps(self, default=ComplexHandler)


class ParsedEndpoint(Endpoint):
	def __init__(self, response):
		super().__init__(response)
		self._parse_result()

	def _parse_result(self):
		self.soup = bs4.BeautifulSoup(self.response, 'html.parser')
		self._parse_soup()
	def _parse_soup(self):
		return


def ComplexHandler(obj):
	"""
	Customized handler to convert object to JSON by recursively calling to_json() method.
	Adapted from https://stackoverflow.com/questions/5160077/encoding-nested-python-object-in-json
	""" 
	if hasattr(obj, '_to_json'):
		return obj._to_json()
	elif isinstance(obj, datetime.date):
		return str(obj)
	elif isinstance(obj, pd.DataFrame):
		return obj.to_json()
	elif isinstance(obj, bytes):
		return obj.decode("utf-8")
	else:
		raise TypeError('Object of type %s with value of %s is not JSON serializable' % (type(obj), repr(obj)))


In [46]:
fc = FirstCyclingAPI()
rider_data = fc.get_rider_endpoint(rider_id = '45992')
print(rider_data)

b'<!DOCTYPE html>\r\n<html lang="en">\r\n<head>\r\n  <title>Tadej Pogacar | FirstCycling</title>\r\n  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\r\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\r\n  <meta name="description" content="Tadej Pogacar is a 26-year-old professional cyclist from Slovenia, born September 21st 1998. He rides for UAE Team Emirates XRG, a UCI WorldTeam. He has won 92 UCI races, and is currently ranked 1st in the UCI Ranking. " />\r\n  <meta name="keywords" content="Tadej Pogacar, Pogacar, UAE Team Emirates XRG" />\r\n  <link rel="alternate" media="only screen and (max-width: 720px)" href="https://firstcycling.com/m/rider.php?r=45992" />\r\n  <link rel="alternate" hreflang="x-default" href="https://firstcycling.com/rider.php?r=45992" />\r\n  <link rel="alternate" hreflang="en" href="https://firstcycling.com/rider.php?r=45992" />\r\n  <link rel="alternate" hreflang="da" href="https://dk.firstcycling.com/rider.ph

In [50]:
class ParsedEndpoint(Endpoint):
    def __init__(self, response):
        super().__init__(response)
        self._parse_result()

    def _parse_result(self):
        if self.response:  # Make sure response isn't None
            self.soup = bs4.BeautifulSoup(self.response, 'html.parser')
            self._parse_soup()
        else:
            print("No response data to parse")

    def _parse_soup(self):
        # Assuming you're trying to extract specific data
        # This is where you can customize your parsing logic
        if self.soup:
            # Example of extracting text from a <title> tag
            self.parsed_data = self.soup.title.text if self.soup.title else 'No title'
        else:
            self.parsed_data = 'Soup parsing failed'

    def get_json(self):
        return json.dumps({'parsed_data': self.parsed_data})


In [52]:
parsed_data = ParsedEndpoint(rider_data)
print(parsed_data.get_json())

{"parsed_data": "Tadej Pogacar | FirstCycling"}


In [54]:
from first_cycling_api import RaceEdition
amstel_2019 = RaceEdition(race_id=9, year=2019) # The race_id comes from the race page URL
amstel_2019.results().results_table.head() # A pandas DataFrame of the race results



Unnamed: 0,Pos,Rider,Team,UCI,Time,Team_ID
0,1,van der Poel Mathieu,Corendon - Circus,500.0,06:28:18,13279
1,2,Clarke Simon,EF Education First,400.0,+ 00,13208
2,3,Fuglsang Jakob,Astana Pro Team,325.0,+ 00,13198
3,4,Alaphilippe Julian,Deceuninck-Quick Step,275.0,+ 00,13206
4,5,Schachmann Max,Bora - Hansgrohe,225.0,+ 00,13200


In [56]:
>>> from first_cycling_api import Rider
>>> roglic = Rider(18655) # The rider ID comes from the rider page URL
>>> roglic.year_results(2020).results_df.head() # A pandas DataFrame of Roglic's 2020 results



Unnamed: 0,Date,Pos,GC,Race,UCI,Unnamed: 6,Race_ID
0,21.06,1.0,,Slovenia RR | CN,100.0,Show more,2127.0
1,8.11,,,Vuelta a España | 2.UWT,,Show more,
2,8.11,1.0,,Vuelta a España | Overall,850.0,,23.0
3,4.1,1.0,,Liège-Bastogne-Liège | 1.UWT,500.0,Show more,11.0
4,20.09,,,Tour de France | 2.UWT,,Show more,


In [124]:
>>> from first_cycling_api import Ranking
>>> ranking = Ranking(h=1, rank=1, y=2024, page=1) # Parameters from corresponding URL
>>> ranking.table # A pandas DataFrame of the rankings table



Unnamed: 0,Pos,Rider,Nation,Team,Points,Rider_ID,Team_ID
0,1,Pogacar Tadej,Slovenia,UAE Team Emirates,11655,45992,21841
1,2,Evenepoel Remco,Belgium,Soudal Quick-Step,6062,84019,26329
2,3,Philipsen Jasper,Belgium,Alpecin-Deceuninck,479,45363,28019
3,4,O'Connor Ben,Australia,Decathlon AG2R La Mondiale,4131,42186,23295
4,5,van der Poel Mathieu,Netherlands,Alpecin-Deceuninck,4053,16672,28019
...,...,...,...,...,...,...,...
95,96,Storer Michael,Australia,Tudor Pro Cycling Team,9680,37447,34245
96,97,Baudin Alex,France,Decathlon AG2R La Mondiale,9630,100033,23295
97,98,Segaert Alec,Belgium,Lotto Dstny,9430,139003,27882
98,99,Albanese Vincenzo,Italy,ARKEA-B&B HOTELS,9350,37351,27967


In [64]:
from first_cycling_api import RaceEdition
tour_de_swiss = RaceEdition(race_id = 16, year=2024)
tour_de_swiss.stage_profiles()

<first_cycling_api.race.endpoints.RaceEndpoint at 0x26aea4e4140>

In [66]:
print(tour_de_swiss)

RaceEdition(2024 16)


In [68]:
profiles = tour_de_swiss.stage_profiles()
profiles

<first_cycling_api.race.endpoints.RaceEndpoint at 0x26a8d1c1c70>