In [20]:
# This script examines the UCI sanctioned racing calendar to look at global changes in the number of events.
# Source is https://www.uci.org/road/calendar, all categories/classes selected
# Year could be extracted from the start date, but UCI 'seasons' differ from calendar years
# for Africa, Asia and Oceania circuits, so when outputting new calendars, add the year as _2XXX before .xlsx
# The script will add the year as a new column.

In [21]:
import pandas as pd
import glob
import os
#Map the UCI Calendar excel output to the Continental circuits using the UCI's country names
continents = {'Americas': ['ANTIGUA AND BARBUDA', 'ARGENTINA', 'ARUBA', 'BELIZE', 'BERMUDA', 
                           'BOLIVARIAN REPUBLIC OF VENEZUELA', 'BOLIVIA', 'BRAZIL', 'CANADA',
                           'CHILE', 'COLOMBIA', 'CUBA', 'DOMINICAN REPUBLIC', 'ECUADOR', 
                           'EL SALVADOR', 'HONDURAS', 'PANAMA', 'PARAGUAY', 'PUERTO RICO', 
                           'SAINT VINCENT AND THE GRENADINES', 'TRINIDAD AND TOBAGO', 
                           'UNITED STATES OF AMERICA', 'URUGUAY','MEXICO', 'COSTA RICA','GUATEMALA'],
              'Africas' : ['ALBANIA', 'ALGERIA', 'ANGOLA', 'CAMEROON', 'CONGO', 'COTE D\'IVOIRE',
                           'EGYPT', 'ETHIOPIA', 'GUYANA', 'LIBYA', 'MALI', 'MAURITIUS', 'MOROCCO',
                           'NAMIBIA', 'RWANDA', 'SENEGAL', 'SOUTH AFRICA', 'SWAZILAND', 'TUNISIA', 
                           'UGANDA', 'ZIMBABWE', 'GABON','ERITREA', 'BURKINA FASO'],
              'Asia' : ['AZERBAIJAN', 'BAHRAIN', 'BRUNEI DARUSSALAM', 'CHINESE TAIPEI', 'GEORGIA',
                        'INDIA', 'ISLAMIC REPUBLIC OF IRAN', 'ISRAEL', 'KAZAKHSTAN', 'KOREA', 'KUWAIT', 
                        'KYRGYZSTAN', 'LEBANON', 'MALAYSIA', 'MONGOLIA', 'MYANMAR', 'OMAN', 'PHILIPPINES',
                        'RUSSIAN FEDERATION', 'SINGAPORE', 'SRI LANKA', 'SYRIAN ARAB REPUBLIC', 'THAILAND',
                        'TURKEY', 'UNITED ARAB EMIRATES', 'VIETNAM','HONG KONG, CHINA','QATAR', 'UZBEKISTAN',
                        'INDONESIA', 'JAPAN','PEOPLE\'S REPUBLIC OF CHINA'],
              'Europe' : ['AUSTRIA', 'BELARUS', 'BELGIUM', 'BOSNIA AND HERZEGOVINA', 'BULGARIA', 'CROATIA',
                          'CYPRUS', 'CZECH REPUBLIC', 'DENMARK', 'ESTONIA', 'FINLAND', 
                          'FORMER YUGOSLAV REPUBLIC OF MACEDONIA', 'FRANCE', 'GERMANY', 'GREAT BRITAIN', 
                          'GREECE', 'HUNGARY', 'ICELAND', 'IRELAND', 'ITALY', 'KOSOVO', 'LATVIA', 'LITHUANIA',
                          'LUXEMBOURG', 'MONTENEGRO', 'NETHERLANDS', 'NORWAY', 'POLAND', 'PORTUGAL', 
                          'REPUBLIC OF MOLDOVA', 'ROMANIA', 'SAN MARINO', 'SERBIA', 
                          'SLOVAKIA', 'SLOVENIA', 'SWEDEN', 'SWITZERLAND', 'UKRAINE','SPAIN'],
              'Oceania' : ['NEW ZEALAND','AUSTRALIA']}

In [22]:
#Pull in every excel sheet from the current folder that starts with Calendar and ends in xlsx. YMMV!

all_data = pd.DataFrame()
for f in glob.glob('Calendar*.xlsx'):
    df = pd.read_excel(f, header=1, encoding='UTF-8')
    year = os.path.basename(f).split('.')[0].split('_')[-1]
    df['Season'] = year
    all_data = all_data.append(df,ignore_index=True)

In [23]:
#Examine the head and length to make sure it's kosher
# all_data['Class'].unique()
# array(['2.2', 'CRT', '2.HC', '2.1', '1.2', '1.1', 'CN', '2.UWT', '1.UWT',
#        '2.Ncup', '1.HC', 'CC', '1.WWT', '1.Ncup', '1.2U', '2.2U', 'JR',
#        '2.WWT', 'JC', 'CM', 'JOJ', 'CDM', nan, 'JO', 'CPE', 'MNM', 'AU1',
#        '2.CH', '1.CH', 'GT2', 'GT1'], dtype=object)

# I suspect there are some classes have changed over the years. MP is ME, CPE was WT stage races during dispute
# with ASO (2.UWT), MNM = Monument and CPE were WT one-days during disputed years (1.UWT)
all_data['Category'].replace('MP', 'ME')
all_data['Class'].replace('CPE', '2.UWT')
all_data['Class'].replace(['MNM','AU1', '1.CH'], '1.UWT')

all_data['Class'].unique()

array(['2.2', 'CRT', '2.HC', '2.1', '1.2', '1.1', 'CN', '2.UWT', '1.UWT',
       '2.Ncup', '1.HC', 'CC', '1.WWT', '1.Ncup', '1.2U', '2.2U', 'JR',
       '2.WWT', 'JC', 'CM', 'JOJ', 'CDM', nan, 'JO', 'CPE', 'MNM', 'AU1',
       '2.CH', '1.CH', 'GT2', 'GT1'], dtype=object)

In [24]:
#Get rid of unwanted columns.
del all_data['EMail']
del all_data['WebSite']
del all_data['Calendar']
del all_data['Venue']
all_data.head()

Unnamed: 0,Date From,Date To,Name,Country,Category,Class,Season
0,24/10/2017,29/10/2017,Vuelta a Colombia Femenina Oro y Paz,COLOMBIA,WE,2.2,2018
1,23/10/2017,01/11/2017,Vuelta a Guatemala,GUATEMALA,ME,2.2,2018
2,04/11/2017,04/11/2017,Subaru Australian Open Criterium,AUSTRALIA,ME,CRT,2018
3,04/11/2017,04/11/2017,Le Tour De France Saitama Criterium,JAPAN,ME,CRT,2018
4,04/11/2017,04/11/2017,Subaru Australian Open Criterium,AUSTRALIA,WE,CRT,2018


In [25]:
for x in ['CC', 'CM', 'CN']:
    row = (all_data.loc[all_data['Class'] == x]).sort_values(by='Season', ascending=0).iloc[0]
label = row.name
base_df = all_data.drop(label)
base_df.head()

Unnamed: 0,Date From,Date To,Name,Country,Category,Class,Season
0,24/10/2017,29/10/2017,Vuelta a Colombia Femenina Oro y Paz,COLOMBIA,WE,2.2,2018
1,23/10/2017,01/11/2017,Vuelta a Guatemala,GUATEMALA,ME,2.2,2018
2,04/11/2017,04/11/2017,Subaru Australian Open Criterium,AUSTRALIA,ME,CRT,2018
3,04/11/2017,04/11/2017,Le Tour De France Saitama Criterium,JAPAN,ME,CRT,2018
4,04/11/2017,04/11/2017,Subaru Australian Open Criterium,AUSTRALIA,WE,CRT,2018


In [9]:
#You can extract the year from the start date, but UCI 'seasons' differ from calendar years
# for Africa, Asia and Oceania circuits, so when outputting new calendars, add the year as _2XXX before .xlsx
# If you want you can just export the combined data

# all_data['Year'] = pd.DatetimeIndex(all_data['Date From']).year

# writer = pd.ExcelWriter('combined.xlsx')
# all_data.to_excel(writer,'Sheet1')

# writer.save()

In [26]:
#We need to make the dictionary of lists one big dictionary to add the Continents

cont_dict_converted = {k: oldk for oldk, oldv in continents.items() for k in oldv}

#Note there are some events that cross borders or Continental championships that have no Continent.
#Add some code here to handle these cases.

base_df['Continent'] = base_df['Country'].map(cont_dict_converted).fillna('Stateless')


In [29]:
#Let's get rid of the national, continental championships and Olympics to remove null categories
cats = ['ME', 'WE']
base_df.Category.isin(cats)
base_df_cats = base_df[base_df.Category.isin(cats)]
group_cont = base_df_cats.groupby("Continent")
group_cont.count().head()

Unnamed: 0_level_0,Date From,Date To,Name,Country,Category,Class,Season
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Africas,351,351,351,351,351,349,351
Americas,839,839,839,839,839,839,839
Asia,807,807,807,807,807,807,807
Europe,5185,5185,5185,5185,5185,5185,5185
Oceania,179,179,179,179,179,179,179


In [33]:
#add in the coordinates for each race so we can geolocate them on a map later

uci_country_coord = pd.DataFrame()
uci_country_coord = pd.read_excel('uci_country_coord.xlsx', encoding='UTF-8')
uci_df = pd.merge(base_df_cats, uci_country_coord, right_on='uci_name', left_on='Country', how="left")

df_nulls = uci_df[uci_df.isnull().any(axis=1)]
loc_nulls = df_nulls.groupby('Country')
loc_nulls['Country'].value_counts()

Country    Country  
ERITREA    ERITREA      2
STATELESS  STATELESS    4
Name: Country, dtype: int64

In [34]:
#Look at the numbers of races per continent
grouped_df = base_df_cats.groupby(['Season','Category', 'Class'])
grouped_df.count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date From,Date To,Name,Country,Continent
Season,Category,Class,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006,ME,1.1,98,98,98,98,98
2006,ME,1.2,89,89,89,89,89
2006,ME,1.HC,16,16,16,16,16
2006,ME,2.1,43,43,43,43,43
2006,ME,2.2,86,86,86,84,86
2006,ME,2.HC,16,16,16,16,16
2006,ME,CC,6,6,6,6,6
2006,ME,CM,2,2,2,2,2
2006,ME,CN,107,107,107,107,107
2006,ME,JR,4,4,4,4,4
