In [35]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import wikipedia

# Constants

In [58]:
MODIFIER_SCORE_MAPPINGS = {
    'DNS': 0,
    'NC': 0,
    'Ret': 0,
    'DNQ': -5,
    'DNPQ': -5,
    'DSQ': -10,
    'C': 0,
    'DNP': 0,
    'EX': 0,
    'DNA': 0,
    'WD': 0,
    'P': 10,
    'F': 5,
    'PF': 15,
}

POSITION_POINT_MAPPING = {
    '1': 25,
    '2': 18,
    '3': 15,
    '4': 12,
    '5': 10,
    '6': 8,
    '7': 6,
    '8': 4,
    '9': 2,
    '10': 1
}

In [84]:
def parse_table(raw_table, scores_or_teams="scores"):
    """
    Parses HTML table into a reasonable/interpretable
    pandas dataframe.
    """
    table_rows = raw_table.find_all('tr')

    l = []
    for tr in table_rows:
        td = tr.find_all('td')
        row = [tr.text for tr in td]
        l.append(row)
    if scores_or_teams == "scores":
        # header's first col is treated as index, so we skip that
        header = [th.text.rstrip() for th in table_rows[0].find_all('th')][1:]
    else:
        header = ['Entrant', 'Chassis', 'Power unit',
                  'No.', 'Driver name', 'Rounds']

    try:
        parsed_df = pd.DataFrame(l, columns=header)
    except:
        if scores_or_teams != 'scores':
            header = ['Entrant', 'Constructor', 'Chassis', 'Power unit',
                      'No.', 'Driver name', 'Rounds']
            parsed_df = pd.DataFrame(l, columns = header)
        else:
            header = header[:-1]
            parsed_df = pd.DataFrame(l, columns = header)
        
    return parsed_df


def map_outcome_to_score(race_outcome: str) -> int:
    '''
    Crunches Fantasy points from race outcome.
    INPUT
        race_outcome: representation of race outcome (e.g. "4P")
    OUTPUT
        race_score: score for racer for race
    '''
    position = ""
    modifiers = ""
    for char in race_outcome:
        if char.isdigit():
            position += char
        else:
            modifiers += char

    # This just ignores modifiers it doesn't have handling for
    modifier_score = MODIFIER_SCORE_MAPPINGS.get(modifiers, 0)
    position_score = POSITION_POINT_MAPPING.get(str(position), 0)
    race_score = modifier_score + position_score

    return race_score


def map_outcome_to_position(race_outcome: str) -> int:
    '''
    Crunches Fantasy points from race outcome.
    INPUT
        race_outcome: representation of race outcome (e.g. "4P")
    OUTPUT
        race_position: score for racer for race
    '''
    position = ""
    for char in race_outcome:
        if char.isdigit():
            position += char
    if position:
        return int(position)
    else:
        return 10000


def score_dataframe_cleanup(dirty_df):
    '''
    Cleans up the text in the dataframe
    and removes e.g. empty rows.
    '''
    score_df = dirty_df.copy()
    try:
        score_df = score_df.dropna().drop('Points', axis=1)
    except:
        score_df = score_df.dropna()
        
    position_df = score_df.copy()
    
    for col in score_df.columns:
        score_df[col] = score_df[col].apply(lambda x: x.replace("\n", ""))
        position_df[col] = position_df[col].apply(lambda x: x.replace("\n", ""))
        if col != 'Driver':
            score_df[col] = score_df[col].map(map_outcome_to_score)
            position_df[col] = position_df[col].map(map_outcome_to_position)

    return score_df, position_df


def team_dataframe_cleanup(dirty_df):
    df = dirty_df.dropna(axis=0)
    df = df.loc[:, ["Driver name", "Entrant"]]
    for col in df.columns:
        df[col] = df[col].apply(lambda x: x.replace("\n", ""))

    return df


def get_data_by_year(year):
    """
    Pulls in the raw HTML table of F1 results from Wikipedia.
    """
    wiki = wikipedia.WikipediaPage(f"{year} Formula One World Championship")
    soup = BeautifulSoup(wiki.html())
    tables = soup.find_all('table', {'class': 'wikitable'})

    raw_score_table = tables[-4]
    raw_team_table = tables[0]

    score_table = parse_table(raw_score_table, "scores")
    score_df, position_df = score_dataframe_cleanup(score_table)

    
    team_table = parse_table(raw_team_table, "teams")
    team_df = team_dataframe_cleanup(team_table)

    return score_df, position_df, team_df


def split_drivers(team_members, drivers):
    results = []
    for d in drivers:
        if d in team_members:
            results.append(d)
            team_members = team_members.replace(d, "")
    else:
        return results

def get_teammate(driver, members):
    for m in members:
        if driver in m:
            teammate = [person for person in m if person != driver][0]
            return teammate
    
def get_teammate_mappings(score_df, team_df):
    teammates = []
    # Break string of members into separate drivers
    drivers = score_df.Driver.values
    team_df2 = team_df.copy()
    team_df2["Driver name"] = team_df2["Driver name"].map(
        lambda x: split_drivers(x, drivers))

    score_teammate_df = score_df.copy()
    score_teammate_df["teammate"] = None
    for idx, row in score_teammate_df.iterrows():
        score_teammate_df.loc[idx, "teammate"] = get_teammate(
            row.Driver, 
            team_df2["Driver name"].values
        )
    return score_teammate_df


def update_scores_by_comparison(position_teammates_df, score_df):
    final_score_df = score_df.copy()
    
    for row_index, (_ ,row) in enumerate(position_teammates_df.iterrows()):
        teammate = row.teammate
        teammate_row = position_teammates_df.loc[position_teammates_df.Driver==teammate]
        
        for col_index, column in enumerate(position_teammates_df.columns):
            if column not in ['Driver', 'teammate']:
                racer_position = row[column] 
                teammate_position = teammate_row[column].iloc[0]
                if racer_position < teammate_position:
                    try:
                        final_score_df.iloc[row_index, col_index] += 3
                    except:
                        print(row_index, col_index)
        
    return final_score_df

In [93]:
score_df_17, position_df_17, team_df_17 = get_data_by_year(2017)
score_df_18, position_df_18, team_df_18 = get_data_by_year(2018)
score_df_19, position_df_19, team_df_19 = get_data_by_year(2019)
score_df_20, position_df_20, team_df_20 = get_data_by_year(2020)

In [89]:
position_teammates_df = get_teammate_mappings(position_df, team_df)
position_teammates_df

Unnamed: 0,Driver,AUS,CHN,BHR,RUS,ESP,MON,CAN,AZE,AUT,...,BEL,ITA,SIN,MAL,JPN,USA,MEX,BRA,ABU,teammate
1,Lewis Hamilton,2,1,2,4,1,7,1,5,4,...,1,1,1,2,1,1,9,4,2,Valtteri Bottas
2,Sebastian Vettel,1,2,1,2,2,1,4,4,2,...,2,3,10000,4,10000,2,4,1,3,Kimi Räikkönen
3,Valtteri Bottas,3,6,3,1,10000,4,2,2,1,...,5,2,3,5,4,5,2,2,1,Lewis Hamilton
4,Kimi Räikkönen,4,5,4,3,10000,2,7,14,5,...,4,5,10000,10000,5,3,3,3,4,Sebastian Vettel
5,Daniel Ricciardo,10000,4,5,10000,3,3,3,1,3,...,3,4,2,3,3,10000,10000,6,10000,Max Verstappen
6,Max Verstappen,5,3,10000,5,10000,5,10000,10000,10000,...,10000,10,10000,1,2,4,1,5,5,Daniel Ricciardo
7,Sergio Pérez,7,9,7,6,4,13,5,10000,7,...,17,9,5,6,7,8,7,9,7,Esteban Ocon
8,Esteban Ocon,10,10,10,7,5,12,6,6,8,...,9,6,10,10,6,6,5,10000,8,Sergio Pérez
9,Carlos Sainz Jr.,8,7,10000,10,7,6,10000,8,10000,...,10,14,4,10000,10000,7,10000,11,10000,Nico Hülkenberg
10,Nico Hülkenberg,11,12,9,8,6,10000,8,10000,13,...,6,13,10000,16,10000,10000,10000,10,6,Carlos Sainz Jr.


In [90]:
score_teammates_final_df = update_scores_by_comparison(position_teammates_df, score_df)

In [91]:
score_teammates_final_df

Unnamed: 0,Driver,AUS,CHN,BHR,RUS,ESP,MON,CAN,AZE,AUT,...,HUN,BEL,ITA,SIN,MAL,JPN,USA,MEX,BRA,ABU
1,Lewis Hamilton,21,28,21,12,28,6,28,10,12,...,12,28,28,28,21,28,28,2,12,18
2,Sebastian Vettel,28,21,28,21,21,28,15,15,21,...,28,21,18,0,15,0,21,12,28,18
3,Valtteri Bottas,15,8,15,28,0,15,18,21,28,...,18,10,18,15,10,12,10,21,21,28
4,Kimi Räikkönen,12,10,12,15,0,18,6,0,10,...,18,12,10,0,0,13,15,18,15,12
5,Daniel Ricciardo,0,12,13,0,18,18,18,28,18,...,0,18,15,21,15,15,0,0,8,0
6,Max Verstappen,13,18,0,13,0,10,0,0,0,...,13,0,1,0,28,21,15,28,13,13
7,Sergio Pérez,9,5,9,11,15,0,13,0,9,...,7,0,2,13,11,6,4,6,5,9
8,Esteban Ocon,1,1,1,6,10,3,8,11,4,...,2,5,11,1,1,11,11,13,0,4
9,Carlos Sainz Jr.,7,9,0,1,6,11,0,7,0,...,9,1,0,15,0,0,9,0,0,0
10,Nico Hülkenberg,0,0,5,7,11,0,7,0,3,...,0,11,3,0,3,0,0,0,4,11
