In [1]:
# %load prepareStats.py
import pandas as pd
import matplotlib
import os

unknownResult = ["nan0n", "NaN", "NA", "inf"]
crashes = ["Accident", "Collision", "Fatal accident", "Collision damage", "Spun off"]
Europe = ["Austrian", "Belgian", "British", "Czech", "Danish", "Dutch", "East German", "Finnish", "French", "German", "Hungarian", "Irish", "Italian", "Liechtensteiner", "Monegasque", "Polish", "Portuguese", "Russian", "Spanish", "Swedish", "Swiss"]
NAmerica = ["American", "Canadian", "Mexican"]
SAmerica = ["Argentine", "Brazilian", "Chilean", "Colombian", "Uruguayan", "Venezuelan"]
Asia = ["Chinese", "Indian", "Indonesian", "Japanese", "Malaysian", "Thai", "Hong Kong"]
Africa = ["Rhodesian", "South African"]
Oceania = ["Australian", "New Zealander"]
multiple = ["American-Italian", "Argentine-Italian"]

drivers = pd.read_csv(os.path.join("data", "drivers.csv"))
results = pd.read_csv(os.path.join("data", "results.csv"))
races = pd.read_csv(os.path.join("data", "races.csv"))
circuits = pd.read_csv(os.path.join("data", "circuits.csv"))
status = pd.read_csv(os.path.join("data", "status.csv"))
constructors = pd.read_csv(os.path.join("data", "constructors.csv"))

results = results.merge(drivers, on="driverId", how="right")
races = races.merge(circuits, on="circuitId", how="right")
results = results.merge(races, on="raceId", how="right")
results = results.merge(status, on="statusId", how="right")
results = results.merge(constructors, on="constructorId", how="right")

results["fullname"] = results["forename"] + " " + results["surname"]
results["date"] = results["date"].str.slice(0,6) + results["year"].astype(str).str.slice(0,4)
results["date"] = pd.to_datetime(results["date"], format="%d/%m/%Y")
results["year"] = results["year"].astype(float)
results["round"] = results["round"].astype(float)
results["position"] = results["position"].apply(pd.to_numeric, errors = "coerce").dropna().astype(float)
results["year_round"] = results["year"].astype(str).str.slice(0,4) + results["round"].astype(str).str.slice(0,-2).str.zfill(2)
results = results[~results.year_round.isin(unknownResult)]
results["year_round"] = results["year_round"].apply(pd.to_numeric, errors = "coerce").dropna().astype(int)

results.loc[results.nationality_x.isin(Europe),"driverContinent"]="Europe"
results.loc[results.nationality_x.isin(NAmerica),"driverContinent"]="NAmerica"
results.loc[results.nationality_x.isin(SAmerica),"driverContinent"]="SAmerica"
results.loc[results.nationality_x.isin(Asia),"driverContinent"]="Asia"
results.loc[results.nationality_x.isin(Africa),"driverContinent"]="Africa"
results.loc[results.nationality_x.isin(Oceania),"driverContinent"]="Oceania"
results.loc[results.nationality_x.isin(multiple),"driverContinent"]="multiple"

wins = results[results["position"] == 1]
podiums = results[results["position"] < 4]
top6 = results[results["position"] < 7]

  results = results.merge(constructors, on="constructorId", how="right")


In [2]:
results = results[~results.year_round.isin(unknownResult)]

In [3]:
entries = pd.Series(results.groupby(["fullname"]).size(), name="entries")

In [4]:
totalLocations = pd.Series(results.groupby(["fullname"])["location"].nunique(), name="totalLocations")
results = results.merge(totalLocations, on="fullname", how="right")

In [5]:
driversIndyOnly = results[(results["totalLocations"] == 1) & (results["location"] == "Indianapolis")].fullname.drop_duplicates().tolist()

In [6]:
firstGP = results["year_round"].min()
lastGP = results["year_round"].max()
firstDrivers = results[results["year_round"] == firstGP].fullname.tolist()
lastDrivers = results[results["year_round"] == lastGP].fullname.tolist()

## The shiniest debuts

(Without the drivers participating in the very first F1 race, and without the drivers participating only in Indianapolis 500.)

In [7]:
results = results.sort_values(by="year_round").reset_index()

In [8]:
firstRaces = results.iloc[results.groupby(["driverId"])["year_round"].agg(pd.Series.idxmin)]

firstRaces = firstRaces[~firstRaces.fullname.isin(firstDrivers)]

firstRaces = firstRaces[~firstRaces.fullname.isin(driversIndyOnly)]

firstRaces = firstRaces[["fullname", "year", "location", "position"]].sort_values(by=["year", "position"])

In [22]:
firstRaces[["fullname", "year", "location", "position"]].sort_values(by=["position", "year"]).head(50)

Unnamed: 0,fullname,year,location,position
2319,Giancarlo Baghetti,1961.0,Reims,1.0
35,Alberto Ascari,1950.0,Monte-Carlo,2.0
135,Dorino Serafini,1950.0,Monza,2.0
893,Karl Kling,1954.0,Reims,2.0
15283,Jacques Villeneuve,1996.0,Melbourne,2.0
22151,Kevin Magnussen,2014.0,Melbourne,2.0
345,Jean Behra,1952.0,Bern,3.0
1083,Cesare Perdisa,1955.0,Monte-Carlo,3.0
1420,Masten Gregory,1957.0,Monte-Carlo,3.0
4384,Reine Wisell,1970.0,New York State,3.0


## The most successful farewells

(Without the drivers participating in the very last race.)

In [11]:
results = results.sort_values(by="year_round", ascending=True).reset_index(drop=True)

In [12]:
lastRaces = results.loc[results.groupby(["fullname"])["year_round"].agg(pd.Series.idxmax)]

In [13]:
lastRaces = lastRaces[~lastRaces.fullname.isin(lastDrivers)]
lastRaces = lastRaces[~lastRaces.fullname.isin(driversIndyOnly)]

In [23]:
lastRaces[["fullname", "year", "location", "position"]].sort_values(by=["position", "year"]).head(20)

Unnamed: 0,fullname,year,location,position
238,Luigi Fagioli,1951.0,Reims,1.0
3724,Jim Clark,1968.0,Midrand,1.0
151,Dorino Serafini,1950.0,Monza,2.0
1283,Paul Frère,1956.0,Spa,2.0
1789,Mike Hawthorn,1958.0,Casablanca,2.0
4675,Jo Siffert,1971.0,New York State,2.0
14382,Alain Prost,1993.0,Adelaide,2.0
22114,Mark Webber,2013.0,São Paulo,2.0
23366,Nico Rosberg,2016.0,Abu Dhabi,2.0
2430,Tony Brooks,1961.0,New York State,3.0


It's safe to say that not a single driver has ever ended his F1 career with a satisfying victory. 

Luigi Fagioli's first and last win was a shared drive, with [Fagioli finishing his part 11th](https://en.wikipedia.org/wiki/Luigi_Fagioli):

> His only Grand Prix of 1951 was his last, but he nevertheless won the French Grand Prix with Juan-Manuel Fangio, earning the distinction of being the oldest person to ever win a Formula One race. During the race, the Alfa Romeo team manager ordered him to hand over his healthy car to Fangio while Fagioli would drive Fangio's car, which was plagued with engine problems. Ferrari had done the same, ordering José Froilán González to hand over to the quicker and more experienced Alberto Ascari; this was common practice in Grand Prix racing before 1957. Fangio battled hard with Ascari and took victory while Fagioli finished 11th and last in Fangio's original car, 22 laps down. Fagioli was so incensed by this that he retired from Grand Prix racing after this race.

Jim Clark was killed in a F2 race between the 1968 South African Grand Prix and the 1968 Spanish Grand Prix, little did he know this was his farewell win.

## Careers crowned with the best ever result in the very last race

In [15]:
bestRaces = results.sort_values(by="year_round", ascending=True)

bestRaces = bestRaces.loc[bestRaces.groupby(["fullname"])["position"].agg(pd.Series.idxmin).dropna()]

bestRaces = bestRaces[["fullname", "year", "location", "position"]].sort_values(by=["position", "year"])

bestLastRaces = lastRaces.merge(bestRaces, on="fullname", how="right")

bestLastRaces = bestLastRaces.merge(entries, on="fullname", how="right")

In [16]:
bestLastRaces = bestLastRaces[bestLastRaces["position_x"] == bestLastRaces["position_y"]]

bestLastRaces = bestLastRaces[bestLastRaces["location_x"] == bestLastRaces["location_y"]]

bestLastRaces = bestLastRaces[bestLastRaces["entries"] > 1]

In [17]:
bestLastRaces[["fullname", "location_x", "year_x", "position_x", "entries"]].dropna().sort_values(by="entries", ascending=False).head(10)

Unnamed: 0,fullname,location_x,year_x,position_x,entries
394,Jan Magnussen,Montreal,1998.0,6.0,25
481,Jérôme d'Ambrosio,Monza,2012.0,13.0,20
164,Corrado Fabi,Dallas,1984.0,7.0,18
570,Michael Andretti,Monza,1993.0,3.0,13
647,Paul Frère,Spa,1956.0,2.0,11
111,Bruce Halford,Reims,1960.0,8.0,9
532,Luigi Fagioli,Reims,1951.0,1.0,8
137,Chico Landi,Buenos Aires,1956.0,4.0,6
150,Chuck Daigh,California,1960.0,10.0,6
766,Skip Barber,New York State,1972.0,16.0,6


## Careers launched with the best result, never to be achieved again 

This is kind of funny: Jan Magnussen ended his career with his best result (6th place), then 14 years later his son Kevin started his own F1 career with his own best result (2nd place). Both have the longest careers among the drivers with similar fates.

In [18]:
bestRaces = results.sort_values(by="year_round", ascending=False)

bestRaces = bestRaces.loc[bestRaces.groupby(["fullname"])["position"].agg(pd.Series.idxmin).dropna()]

bestRaces = bestRaces[["fullname", "year", "location", "position"]].sort_values(by=["position", "year"])

bestFirstRaces = firstRaces.merge(bestRaces, on="fullname", how="right")

bestFirstRaces = bestFirstRaces.merge(entries, on="fullname", how="right")

In [19]:
bestFirstRaces = bestFirstRaces[bestFirstRaces["position_x"] == bestFirstRaces["position_y"]]

bestFirstRaces = bestFirstRaces[bestFirstRaces["location_x"] == bestFirstRaces["location_y"]]

bestFirstRaces = bestFirstRaces[bestFirstRaces["entries"] > 1]

In [20]:
bestFirstRaces[["fullname", "location_x", "year_x", "position_x", "entries"]].dropna().sort_values(by="entries", ascending=False).head(10)

Unnamed: 0,fullname,location_x,year_x,position_x,entries
499,Kevin Magnussen,Melbourne,2014.0,2.0,123
255,Felipe Nasr,Melbourne,2015.0,5.0,40
300,Giancarlo Baghetti,Reims,1961.0,1.0,26
701,Reine Wisell,New York State,1970.0,3.0,23
495,Ken Wharton,Bern,1952.0,4.0,16
551,Mark Donohue,Ontario,1971.0,3.0,15
826,Vic Elford,Rouen,1968.0,4.0,13
483,Karl Kling,Reims,1954.0,2.0,12
363,Ian Ashley,Nürburg,1974.0,14.0,11
10,Alan Brown,Bern,1952.0,5.0,9
