# TOUR DE FRANCE

In [1]:
import pandas as pd

## STAGES

In [2]:
stages = pd.read_csv("k-tdf_stages.csv")

In [3]:
stages.head()

Unnamed: 0,Stage,Date,Distance,Origin,Destination,Type,Winner,Winner_Country
0,1,2017-07-01,14.0,Düsseldorf,Düsseldorf,Individual time trial,Geraint Thomas,GBR
1,2,2017-07-02,203.5,Düsseldorf,Liège,Flat stage,Marcel Kittel,GER
2,3,2017-07-03,212.5,Verviers,Longwy,Medium mountain stage,Peter Sagan,SVK
3,4,2017-07-04,207.5,Mondorf-les-Bains,Vittel,Flat stage,Arnaud Démare,FRA
4,5,2017-07-05,160.5,Vittel,La Planche des Belles Filles,Medium mountain stage,Fabio Aru,ITA


In [4]:
# DISTANCE REPRESENTS THE LENGTH OF THE STAGE IN KM

stages["Distance"].describe()

count    2236.000000
mean      196.782994
std        90.176385
min         1.000000
25%       156.000000
50%       199.000000
75%       236.000000
max       482.000000
Name: Distance, dtype: float64

In [55]:
# MOST OF THE SUPER SHORT STAGES APPEAR TO BE TIME TRIALS

stages[stages["Distance"] > 5].sort_values(by="Distance", ascending=False).tail(5)

Unnamed: 0,Stage,Date,Distance,Origin,Destination,Type,Winner,Winner_Country,Combined,Year
641,P,1987-07-01,6.0,West Berlin,West Berlin,Individual time trial,Jelle Nijdam,NED,West Berlin-West Berlin,1987
940,P,1975-06-26,6.0,Charleroi,Charleroi,Individual time trial,Francesco Moser,ITA,Charleroi-Charleroi,1975
1150,1a,1967-06-29,5.775,Angers,Angers,Individual time trial,José-Maria Errandonea,ESP,Angers-Angers,1967
399,P,1998-07-11,5.6,Dublin,Dublin,Individual time trial,Chris Boardman,GBR,Dublin-Dublin,1998
551,P,1991-07-06,5.4,Lyon,Lyon,Individual time trial,Thierry Marie,FRA,Lyon-Lyon,1991


### TYPES OF STAGES

In [6]:
# WHAT IS A PLAIN STAGE? 
# PLAIN STAGES ARE FLAT

stages["Type"].value_counts()

Plain stage                      1053
Stage with mountain(s)            530
Individual time trial             205
Flat stage                        110
Team time trial                    87
Hilly stage                        76
High mountain stage                43
Mountain stage                     40
Medium mountain stage              33
Mountain time trial                13
Stage with mountain                11
Mountain Stage                     11
Flat Stage                          9
Half Stage                          5
Transition stage                    4
Intermediate stage                  3
Flat cobblestone stage              2
Plain stage with cobblestones       1
Name: Type, dtype: int64

In [51]:
stages["Date"] = pd.to_datetime(stages["Date"])
stages["Year"] = pd.DatetimeIndex(stages["Date"]).year

In [52]:
stages.head(1)

Unnamed: 0,Stage,Date,Distance,Origin,Destination,Type,Winner,Winner_Country,Combined,Year
0,1,2017-07-01,14.0,Düsseldorf,Düsseldorf,Individual time trial,Geraint Thomas,GBR,Düsseldorf-Düsseldorf,2017


### STAGES BY TYPE BY YEAR

In [284]:
stages.groupby("Year")["Type"].value_counts()

Year  Type                  
1903  Plain stage               5
      Stage with mountain(s)    1
1904  Plain stage               5
      Stage with mountain(s)    1
1905  Plain stage               9
                               ..
2016  Mountain time trial       1
2017  Flat stage                8
      Medium mountain stage     6
      High mountain stage       5
      Individual time trial     2
Name: Type, Length: 391, dtype: int64

In [173]:
# SIMPLIFY THE VARIOUS STAGE TYPES INTO THEIR MODERN-ERA EQUIVALENTS

stages.loc[stages["Type"] == "Plain stage", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Stage with mountain(s)", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Individual time trial", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Flat stage", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Team time trial", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Hilly stage", "Type_Adjusted"] = "Hilly"
stages.loc[stages["Type"] == "High mountain stage", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Mountain stage", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Medium mountain stage", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Mountain time trial", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Stage with mountain", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Mountain Stage", "Type_Adjusted"] = "Mountain"
stages.loc[stages["Type"] == "Flat Stage", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Half Stage", "Type_Adjusted"] = "Other"
stages.loc[stages["Type"] == "Transition stage", "Type_Adjusted"] = "Other"
stages.loc[stages["Type"] == "Intermediate stage", "Type_Adjusted"] = "Other"
stages.loc[stages["Type"] == "Flat cobblestone stage", "Type_Adjusted"] = "Flat"
stages.loc[stages["Type"] == "Plain stage with cobblestones", "Type_Adjusted"] = "Flat"

In [246]:
# COUNT WITH THE NEW CLEANED UP TYPE NAMES

stages.groupby("Year")["Type_Adjusted"].value_counts()

Year  Type_Adjusted
1903  Flat              5
      Mountain          1
1904  Flat              5
      Mountain          1
1905  Flat              9
                       ..
2015  Flat              9
2016  Mountain         11
      Flat             10
2017  Mountain         11
      Flat             10
Name: Type_Adjusted, Length: 238, dtype: int64

In [324]:
len(stages[(stages["Year"] == 1903) & (stages["Type_Adjusted"] == "Flat")])

5

In [275]:
# NUMBER OF STAGES BY YEAR

stages.groupby("Year")["Type_Adjusted"].count()

Year
1903     6
1904     6
1905    11
1906    13
1907    14
        ..
2013    21
2014    21
2015    21
2016    21
2017    21
Name: Type_Adjusted, Length: 104, dtype: int64

In [220]:
# GROUP THE YEARS INTO DECADES

stages.loc[(stages["Year"] >= 1900) & (stages["Year"] <= 1909), "Decade"] = "1900s"
stages.loc[(stages["Year"] >= 1910) & (stages["Year"] <= 1919), "Decade"] = "1910s"
stages.loc[(stages["Year"] >= 1920) & (stages["Year"] <= 1929), "Decade"] = "1920s"
stages.loc[(stages["Year"] >= 1930) & (stages["Year"] <= 1939), "Decade"] = "1930s"
stages.loc[(stages["Year"] >= 1940) & (stages["Year"] <= 1949), "Decade"] = "1940s"
stages.loc[(stages["Year"] >= 1950) & (stages["Year"] <= 1959), "Decade"] = "1950s"
stages.loc[(stages["Year"] >= 1960) & (stages["Year"] <= 1969), "Decade"] = "1960s"
stages.loc[(stages["Year"] >= 1970) & (stages["Year"] <= 1979), "Decade"] = "1970s"
stages.loc[(stages["Year"] >= 1980) & (stages["Year"] <= 1989), "Decade"] = "1980s"
stages.loc[(stages["Year"] >= 1990) & (stages["Year"] <= 1999), "Decade"] = "1990s"
stages.loc[(stages["Year"] >= 2000) & (stages["Year"] <= 2009), "Decade"] = "2000s"
stages.loc[(stages["Year"] >= 2010) & (stages["Year"] <= 2019), "Decade"] = "2010s"

### TYPES OF STAGE BY DECADE

In [313]:
stages.groupby("Decade")["Type_Adjusted"].value_counts()

Decade  Type_Adjusted
1900s   Flat              60
        Mountain          18
1910s   Flat              50
        Mountain          40
1920s   Flat             126
        Mountain          52
1930s   Flat             175
        Mountain          80
1940s   Flat              44
        Mountain          19
1950s   Flat             151
        Mountain          81
1960s   Flat             162
        Mountain          80
1970s   Flat             171
        Mountain          80
        Hilly              7
        Other              5
1980s   Flat             168
        Mountain          49
        Hilly             22
1990s   Flat             141
        Mountain          46
        Hilly             31
2000s   Flat             133
        Mountain          54
        Hilly             16
        Other              7
2010s   Flat              86
        Mountain          82
Name: Type_Adjusted, dtype: int64

In [140]:
stages[stages["Year"] == 1903]

Unnamed: 0,Stage,Date,Distance,Origin,Destination,Type,Winner,Winner_Country,Combined,Year
2230,1,1903-07-01,467.0,Paris,Lyon,Plain stage,Maurice Garin,FRA,Paris-Lyon,1903
2231,2,1903-07-05,374.0,Lyon,Marseille,Stage with mountain(s),Hippolyte Aucouturier,FRA,Lyon-Marseille,1903
2232,3,1903-07-08,423.0,Marseille,Toulouse,Plain stage,Hippolyte Aucouturier,FRA,Marseille-Toulouse,1903
2233,4,1903-07-12,268.0,Toulouse,Bordeaux,Plain stage,Charles Laeser,SUI,Toulouse-Bordeaux,1903
2234,5,1903-07-13,425.0,Bordeaux,Nantes,Plain stage,Maurice Garin,FRA,Bordeaux-Nantes,1903
2235,6,1903-07-18,471.0,Nantes,Paris,Plain stage,Maurice Garin,FRA,Nantes-Paris,1903


In [143]:
stages["Year"].value_counts().sort_values(ascending=True)

1903     6
1904     6
1905    11
1906    13
1907    14
        ..
1977    28
1939    28
1938    29
1970    29
1937    31
Name: Year, Length: 104, dtype: int64

#### LOWEST NUMBER OF STAGES IN THE TOUR

In [144]:
stages["Year"].value_counts().sort_values(ascending=True).min()

6

#### HIGHEST NUMBER OF STAGES IN THE TOUR

In [145]:
stages["Year"].value_counts().sort_values(ascending=True).max()

31

### TOUR DISTANCE BY YEAR

In [56]:
stages.groupby("Year")["Distance"].sum()

Year
1903    2428.0
1904    2429.0
1905    3021.0
1906    4543.0
1907    4488.0
         ...  
2013    3403.5
2014    3660.5
2015    3360.3
2016    3529.0
2017    3540.0
Name: Distance, Length: 104, dtype: float64

#### SHORTEST TOUR IN KM

In [59]:
stages.groupby("Year")["Distance"].sum().min()

2428.0

#### LONGEST TOUR IN KM

In [60]:
stages.groupby("Year")["Distance"].sum().max()

5745.0

#### MODERN-ERA TOUR LENGTH (1990-PRESENT)

In [156]:
stages[stages["Year"] >= 1990].groupby("Year").sum()

Unnamed: 0_level_0,Distance
Year,Unnamed: 1_level_1
1990,3403.8
1991,3914.4
1992,3975.0
1993,3714.3
1994,3978.7
1995,3547.3
1996,3764.9
1997,3943.8
1998,3877.1
1999,3690.8


In [157]:
stages[stages["Year"] >= 1990].groupby("Year").sum().mean()

Distance    3604.446429
dtype: float64

### MODERN-ERA TOUR DE FRANCE LENGTH

In [158]:
stages[stages["Year"] >= 1990].groupby("Year").sum().median()

Distance    3564.7
dtype: float64

In [326]:
stages[stages["Year"] >= 1990]["Distance"].median()

187.0

In [327]:
stages[stages["Year"] >= 1990]["Distance"].mean()

169.33640939597316

### STAGE START AND FINISH LOCATIONS

In [8]:
# THE NUMBER REPRESENTS THE TIMES THE LOCATION HAS BEEN FEATURED IN THE TOUR
# IT DOES NOT MEAN THAT THE VERY FIRST STAGE OF THE TOUR STARTS FROM PAU

stages["Origin"].value_counts()

Pau                 62
Bordeaux            56
Luchon              51
Paris               44
Grenoble            40
                    ..
Saint-Gervais        1
Cap d'Agde           1
Girona               1
Andorra la Vella     1
Douai                1
Name: Origin, Length: 591, dtype: int64

In [9]:
stages["Destination"].value_counts().head(10)

Paris        108
Bordeaux      79
Pau           60
Luchon        43
Metz          38
Grenoble      35
Nice          35
Marseille     35
Perpignan     35
Caen          33
Name: Destination, dtype: int64

#### MOST COMMON STAGES IN THE TOUR

In [74]:
stages["Combined"] = stages["Origin"] + "-" + stages["Destination"]
stages["Combined"].value_counts().head(5)

Pau-Bordeaux        18
Luchon-Perpignan    17
Strasbourg-Metz     13
Pau-Luchon          12
Caen-Paris          12
Name: Combined, dtype: int64

#### STAGE WINNERS

In [79]:
stages["Winner"].value_counts().head(5)

Eddy Merckx        34
Mark Cavendish     30
Bernard Hinault    28
André Leducq       24
André Darrigade    22
Name: Winner, dtype: int64

## INDIVIDUAL STAGES

In [13]:
stage = pd.read_csv("k-stage_data.csv")
stage.tail(10)

stage.dtypes

edition               int64
year                  int64
stage_results_id     object
rank                 object
time                 object
rider                object
age                 float64
team                float64
points              float64
elapsed              object
bib_number           object
dtype: object

## WINNERS

In [14]:
winners = pd.read_csv("k-tdf_winners.csv")
winners.head(10)

Unnamed: 0,edition,start_date,winner_name,winner_team,distance,time_overall,time_margin,stage_wins,stages_led,height,weight,age,born,died,full_name,nickname,birth_town,birth_country,nationality
0,1,1903-07-01,Maurice Garin,La Française,2428.0,94.553889,2.989167,3,6,1.62,60.0,32,1871-03-03,1957-02-19,,The Little Chimney-sweep,Arvier,Italy,France
1,2,1904-07-02,Henri Cornet,Conte,2428.0,96.098611,2.270556,1,3,,,19,1884-08-04,1941-03-18,,Le rigolo (The joker),Desvres,France,France
2,3,1905-07-09,Louis Trousselier,Peugeot–Wolber,2994.0,,,5,10,,,24,1881-06-29,1939-04-24,,Levaloy / Trou-trou,Paris,France,France
3,4,1906-07-04,René Pottier,Peugeot–Wolber,4637.0,,,5,12,,,27,1879-06-05,1907-01-25,,,Moret-sur-Loing,France,France
4,5,1907-07-08,Lucien Petit-Breton,Peugeot–Wolber,4488.0,,,2,5,,,24,1882-10-18,1917-12-20,Lucien Georges Mazan,Lucien Petit-Breton,Plessé,France,France
5,6,1908-07-13,Lucien Petit-Breton,Peugeot–Wolber,4497.0,,,5,13,,,25,1882-10-18,1917-12-20,Lucien Georges Mazan,Lucien Petit-Breton,Plessé,France,France
6,7,1909-07-05,François Faber,Alcyon–Dunlop,4498.0,,,6,13,1.78,88.0,22,1887-01-26,1915-05-09,,The giant of colombes,Aulnay-sur-Iton,France,Luxembourg
7,8,1910-07-01,Octave Lapize,Alcyon–Dunlop,4734.0,,,4,3,,,22,1887-10-24,1917-07-14,,,Paris,France,France
8,9,1911-07-02,Gustave Garrigou,Alcyon–Dunlop,5343.0,,,2,13,,,26,1884-09-24,1963-01-28,Cyprien Gustave Garrigou,,Vabres,France,France
9,10,1912-06-30,Odile Defraye,Alcyon–Dunlop,5289.0,,,3,13,,,23,1888-07-14,1965-08-21,,,Rumbeke,Belgium,Belgium


#### SPEED IN KM/H

In [84]:
# TIME OVERALL = TIME IN HOURS TO COMPLETE THE RACE
# SPEED = DISTANCE / TIME

winners["speed"] = (winners["distance"] / winners["time_overall"]).dropna()

#### AVERAGE SPEED OF A WINNING RIDER

In [87]:
winners["speed"].dropna().mean()

34.77715143473631

#### MEDIAN SPEED

In [88]:
winners["speed"].dropna().median()

35.88429203655315

#### MEDIAN WEIGHT OF RIDER IN KILOGRAMS

In [15]:
winners["weight"].dropna().median()

69.0

In [16]:
# CONVERT KILOGRAMS TO LBS BY MULTIPLYING BY 2.2

winners["weight_lbs"] = winners["weight"].dropna() * 2.2
winners["weight_lbs"].median()

151.8

#### MEDIAN HEIGHT OF RIDERS IN METERS

In [17]:
# MEDIAN HEIGHT OF RIDERS IN METERS

winners["height"].dropna().median()

1.77

#### RIDER BMI

In [18]:
# WEIGHT / HEIGHT^2

winners["weight"].dropna() / (winners["height"].dropna() * winners["height"].dropna())

0      22.862369
6      27.774271
31     22.309356
33     23.147255
34     22.309356
         ...    
101    19.944502
102    19.944502
103    19.944502
104    21.200991
105    19.591837
Length: 67, dtype: float64

#### YOUNGEST TOUR WINNER

In [19]:
# NEED TO MERGE RECENT YEARS 2020-PRESENT FOR WINNERS NOT PRESENT IN ORIGINAL DATSET

winners.sort_values(by="age", ascending=False).tail(5)

Unnamed: 0,edition,start_date,winner_name,winner_team,distance,time_overall,time_margin,stage_wins,stages_led,height,weight,age,born,died,full_name,nickname,birth_town,birth_country,nationality,weight_lbs
7,8,1910-07-01,Octave Lapize,Alcyon–Dunlop,4734.0,,,4,3,,,22,1887-10-24,1917-07-14,,,Paris,France,France,
6,7,1909-07-05,François Faber,Alcyon–Dunlop,4498.0,,,6,13,1.78,88.0,22,1887-01-26,1915-05-09,,The giant of colombes,Aulnay-sur-Iton,France,Luxembourg,193.6
51,52,1965-06-22,Felice Gimondi,Salvarani,4188.0,116.701667,0.033889,3,18,1.81,71.0,22,1942-09-29,2019-08-16,,The Phoenix,Sedrina,Italy,Italy,156.2
105,106,2019-07-06,Egan Bernal,Team Ineos,3349.0,82.95,0.016944,0,2,1.75,60.0,22,1997-01-13,,Egan Arley Bernal Gómez,,Bogotá,Columbia,Colombia,132.0
1,2,1904-07-02,Henri Cornet,Conte,2428.0,96.098611,2.270556,1,3,,,19,1884-08-04,1941-03-18,,Le rigolo (The joker),Desvres,France,France,


In [20]:
winners["start_date"] = pd.to_datetime(winners["start_date"])

In [21]:
import datetime

In [95]:
winners["year"] = pd.DatetimeIndex(winners['start_date']).year

In [100]:
winners["year"]

0      1903
1      1904
2      1905
3      1906
4      1907
       ... 
101    2015
102    2016
103    2017
104    2018
105    2019
Name: year, Length: 106, dtype: int64

#### WINNING SPEED IN 1903

In [124]:
speed_1903 = winners[winners["year"] == 1903]["distance"][0] / winners[winners["year"] == 1903]["time_overall"][0]

#### WINNING SPEED IN 2019

In [123]:
speed_2019 = winners[winners["year"] == 2019]["distance"][105] / winners[winners["year"] == 2019]["time_overall"][105]

In [125]:
# ROUGHLY 15 KM/H DIFFERENCE IN SPEED BETWEEN 1903 AND 2019

diff = speed_2019 - speed_1903
diff

14.695240638828121

In [127]:
# CALCULATE PERCENT CHANGE USING ((V2-V1) / V1) * 100
# PCT CHANGE IN SPEED, NEARLY 60% INCREASE

((speed_2019 - speed_1903) / speed_1903) * 100

57.227848046097144

In [289]:
winners.loc[(winners["year"] >= 1900) & (winners["year"] <= 1909), "decade"] = "1900s"
winners.loc[(winners["year"] >= 1910) & (winners["year"] <= 1919), "decade"] = "1910s"
winners.loc[(winners["year"] >= 1920) & (winners["year"] <= 1929), "decade"] = "1920s"
winners.loc[(winners["year"] >= 1930) & (winners["year"] <= 1939), "decade"] = "1930s"
winners.loc[(winners["year"] >= 1940) & (winners["year"] <= 1949), "decade"] = "1940s"
winners.loc[(winners["year"] >= 1950) & (winners["year"] <= 1959), "decade"] = "1950s"
winners.loc[(winners["year"] >= 1960) & (winners["year"] <= 1969), "decade"] = "1960s"
winners.loc[(winners["year"] >= 1970) & (winners["year"] <= 1979), "decade"] = "1970s"
winners.loc[(winners["year"] >= 1980) & (winners["year"] <= 1989), "decade"] = "1980s"
winners.loc[(winners["year"] >= 1990) & (winners["year"] <= 1999), "decade"] = "1990s"
winners.loc[(winners["year"] >= 2000) & (winners["year"] <= 2009), "decade"] = "2000s"
winners.loc[(winners["year"] >= 2010) & (winners["year"] <= 2019), "decade"] = "2010s"

In [290]:
winners.tail(50)

Unnamed: 0,edition,start_date,winner_name,winner_team,distance,time_overall,time_margin,stage_wins,stages_led,height,...,died,full_name,nickname,birth_town,birth_country,nationality,weight_lbs,year,decade,speed
56,57,1970-06-27,Eddy Merckx,Faemino–Faema,4254.0,119.530278,0.203333,8,20,1.82,...,,Édouard Louis Joseph Merckx,The Cannibal,Meensel-Kiezegem,Belgium,Belgium,162.8,1970,1970s,35.589309
57,58,1971-06-26,Eddy Merckx,Molteni,3608.0,96.753889,0.1525,4,17,1.82,...,,Édouard Louis Joseph Merckx,The Cannibal,Meensel-Kiezegem,Belgium,Belgium,162.8,1971,1970s,37.290491
58,59,1972-07-01,Eddy Merckx,Molteni,3846.0,108.288333,0.169444,6,15,1.82,...,,Édouard Louis Joseph Merckx,The Cannibal,Meensel-Kiezegem,Belgium,Belgium,162.8,1972,1970s,35.516291
59,60,1973-06-30,Luis Ocaña,Bic,4090.0,122.426111,0.254167,6,14,1.65,...,1994-06-19,Jesús Luis Ocaña Pernía,,Priego,Spain,Spain,114.4,1973,1970s,33.407906
60,61,1974-06-27,Eddy Merckx,Molteni,4098.0,116.282778,0.135556,8,18,1.82,...,,Édouard Louis Joseph Merckx,The Cannibal,Meensel-Kiezegem,Belgium,Belgium,162.8,1974,1970s,35.241676
61,62,1975-06-26,Bernard Thévenet,Peugeot–BP–Michelin,4000.0,114.591944,0.033889,2,8,,...,,,Nanard,Saint-Julien-de-Civry,France,France,,1975,1970s,34.906468
62,63,1976-06-24,Lucien Van Impe,Gitane–Campagnolo,4017.0,116.373056,0.067778,1,12,,...,,,de kleine van Mere,Mere,Belgium,Belgium,127.6,1976,1970s,34.5183
63,64,1977-06-30,Bernard Thévenet,Peugeot–Esso–Michelin,4096.0,115.641667,0.013333,1,8,,...,,,Nanard,Saint-Julien-de-Civry,France,France,,1977,1970s,35.419759
64,65,1978-06-29,Bernard Hinault,Renault–Gitane–Campagnolo,3908.0,108.3,0.050833,3,3,1.74,...,,,"Le Blaireau (The Badger), Le Patron (The Boss)",Yffiniac,France,France,136.4,1978,1970s,36.084949
65,66,1979-06-27,Bernard Hinault,Renault–Gitane,3765.0,103.113889,0.220278,7,17,1.74,...,,,"Le Blaireau (The Badger), Le Patron (The Boss)",Yffiniac,France,France,136.4,1979,1970s,36.513025


#### AVERAGE WINNING SPEED BY DECADE

In [293]:
winners.groupby("decade")["speed"].mean()

decade
1900s    25.472096
1910s    25.869263
1920s    25.402801
1930s    30.302641
1940s    32.296398
1950s    34.302476
1960s    35.902913
1970s    35.448817
1980s    37.055978
1990s    39.370973
2000s    40.348746
2010s    40.136622
Name: speed, dtype: float64

In [25]:
winners.dtypes

edition                   int64
start_date       datetime64[ns]
winner_name              object
winner_team              object
distance                float64
time_overall            float64
time_margin             float64
stage_wins                int64
stages_led                int64
height                  float64
weight                  float64
age                       int64
born                     object
died                     object
full_name                object
nickname                 object
birth_town               object
birth_country            object
nationality              object
weight_lbs              float64
year                      int64
dtype: object

In [26]:
winners[["winner_name", "start_date"]].sort_values(by="start_date", ascending=False)

Unnamed: 0,winner_name,start_date
105,Egan Bernal,2019-07-06
104,Geraint Thomas,2018-07-07
103,Chris Froome,2017-07-01
102,Chris Froome,2016-07-02
101,Chris Froome,2015-07-04
...,...,...
4,Lucien Petit-Breton,1907-07-08
3,René Pottier,1906-07-04
2,Louis Trousselier,1905-07-09
1,Henri Cornet,1904-07-02


In [27]:
winners["birth_country"].value_counts().head(5)

France     36
Belgium    19
Spain      12
Italy      11
USA        10
Name: birth_country, dtype: int64

In [28]:
winners["nationality"].value_counts().head(5)

 France           36
 Belgium          18
 Spain            12
 Italy            10
 United States    10
Name: nationality, dtype: int64