In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import wikipedia

# Constants

In [2]:
MODIFIER_SCORE_MAPPINGS = {
    'DNS': 0,
    'NC': 0,
    'Ret': 0,
    'DNQ': -5,
    'DNPQ': -5,
    'DSQ': -10,
    'C': 0,
    'DNP': 0,
    'EX': 0,
    'DNA': 0,
    'WD': 0,
    'P': 10,
    'F': 5,
    'PF': 15,
}

POSITION_POINT_MAPPING = {
    '1': 25,
    '2': 18,
    '3': 15,
    '4': 12,
    '5': 10,
    '6': 8,
    '7': 6,
    '8': 4,
    '9': 2,
    '10': 1
}

In [5]:
def parse_table(raw_table, scores_or_teams="scores"):
    """
    Parses HTML table into a reasonable/interpretable
    pandas dataframe.
    """
    table_rows = raw_table.find_all('tr')

    l = []
    for tr in table_rows:
        td = tr.find_all('td')
        row = [tr.text for tr in td]
        l.append(row)
    if scores_or_teams == "scores":
        # header's first col is treated as index, so we skip that
        header = [th.text.rstrip() for th in table_rows[0].find_all('th')][1:]
    else:
        header = ['Entrant', 'Chassis', 'Power unit',
                  'No.', 'Driver name', 'Rounds']

    parsed_df = pd.DataFrame(l, columns=header)
    return parsed_df


def map_outcome_to_score(race_outcome: str) -> int:
    '''
    Crunches Fantasy points from race outcome.
    INPUT
        race_outcome: representation of race outcome (e.g. "4P")
    OUTPUT
        race_score: score for racer for race
    '''
    position = ""
    modifiers = ""
    for char in race_outcome:
        if char.isdigit():
            position += char
        else:
            modifiers += char

    # This just ignores modifiers it doesn't have handling for
    modifier_score = MODIFIER_SCORE_MAPPINGS.get(modifiers, 0)
    position_score = POSITION_POINT_MAPPING.get(str(position), 0)
    race_score = modifier_score + position_score

    return race_score


def map_outcome_to_position(race_outcome: str) -> int:
    '''
    Crunches Fantasy points from race outcome.
    INPUT
        race_outcome: representation of race outcome (e.g. "4P")
    OUTPUT
        race_position: score for racer for race
    '''
    position = ""
    for char in race_outcome:
        if char.isdigit():
            position += char
    if position:
        return int(position)
    else:
        return 10000


def score_dataframe_cleanup(dirty_df):
    '''
    Cleans up the text in the dataframe
    and removes e.g. empty rows.
    '''
    score_df = dirty_df.copy()
    score_df = score_df.dropna().drop('Points', axis=1)
    position_df = score_df.copy()
    
    for col in score_df.columns:
        score_df[col] = score_df[col].apply(lambda x: x.replace("\n", ""))
        position_df[col] = position_df[col].apply(lambda x: x.replace("\n", ""))
        if col != 'Driver':
            score_df[col] = score_df[col].map(map_outcome_to_score)
            position_df[col] = position_df[col].map(map_outcome_to_position)

    return score_df, position_df


def team_dataframe_cleanup(dirty_df):
    df = dirty_df.dropna(axis=0)
    df = df.loc[:, ["Driver name", "Entrant"]]
    for col in df.columns:
        df[col] = df[col].apply(lambda x: x.replace("\n", ""))

    return df


def get_data_by_year(year):
    """
    Pulls in the raw HTML table of F1 results from Wikipedia.
    """
    wiki = wikipedia.WikipediaPage(f"{year} Formula One World Championship")
    soup = BeautifulSoup(wiki.html())
    tables = soup.find_all('table', {'class': 'wikitable'})

    raw_score_table = tables[-4]
    raw_team_table = tables[0]

    score_table = parse_table(raw_score_table, "scores")
    score_df, position_df = score_dataframe_cleanup(score_table)

    team_table = parse_table(raw_team_table, "teams")
    team_df = team_dataframe_cleanup(team_table)

    return score_df, position_df, team_df

In [6]:
score_df, position_df, team_df = get_data_by_year(2019)

In [7]:
team_df

Unnamed: 0,Driver name,Entrant
2,Kimi Räikkönen Antonio Giovinazzi,Alfa Romeo Racing
3,Sebastian Vettel Charles Leclerc,Scuderia Ferrari[a]
4,Romain Grosjean Kevin Magnussen,Haas F1 Team[b]
5,Lando Norris Carlos Sainz Jr.,McLaren F1 Team
6,Lewis Hamilton Valtteri Bottas,Mercedes AMG Petronas Motorsport
7,Sergio Pérez Lance Stroll,SportPesa Racing Point F1 Team
8,Pierre Gasly Alexander Albon Max Verstappen,Aston Martin Red Bull Racing
9,Daniel Ricciardo Nico Hülkenberg,Renault F1 Team
10,Alexander Albon Pierre Gasly Daniil Kvyat,Red Bull Toro Rosso Honda
11,George Russell Robert Kubica,ROKiT Williams Racing


In [8]:
score_df

Unnamed: 0,Driver,AUS,BHR,CHN,AZE,ESP,MON,CAN,FRA,AUT,...,HUN,BEL,ITA,SIN,RUS,JPN,MEX,USA,BRA,ABU
1,Lewis Hamilton,28,25,25,18,30,35,25,35,10,...,25,18,20,12,30,20,25,18,6,40
2,Valtteri Bottas,30,18,28,35,28,15,17,18,15,...,4,15,18,10,18,25,15,35,0,12
3,Max Verstappen,15,12,12,12,15,12,10,12,30,...,33,0,4,15,12,0,8,15,35,18
4,Charles Leclerc,10,30,10,15,10,0,15,15,28,...,12,35,35,28,25,8,27,17,0,15
5,Sebastian Vettel,12,10,15,15,12,18,28,15,12,...,15,17,0,25,0,28,18,0,0,10
6,Carlos Sainz Jr.,0,0,0,6,4,8,0,8,4,...,10,0,0,0,8,10,0,4,15,1
7,Pierre Gasly,0,4,13,0,8,15,4,1,6,...,8,2,0,4,0,6,2,0,18,0
8,Alexander Albon,0,2,1,0,0,4,0,0,0,...,1,10,8,8,10,12,10,10,0,8
9,Daniel Ricciardo,0,0,6,0,0,2,8,0,0,...,0,0,12,0,0,-10,4,8,8,0
10,Sergio Pérez,0,1,4,8,0,0,0,0,0,...,0,8,6,0,6,4,6,1,2,6


In [9]:
position_df

Unnamed: 0,Driver,AUS,BHR,CHN,AZE,ESP,MON,CAN,FRA,AUT,...,HUN,BEL,ITA,SIN,RUS,JPN,MEX,USA,BRA,ABU
1,Lewis Hamilton,2,1,1,2,1,1,1,1,5,...,1,2,3,4,1,3,1,2,7,1
2,Valtteri Bottas,1,2,2,1,2,3,4,2,3,...,8,3,2,5,2,1,3,1,10000,4
3,Max Verstappen,3,4,4,4,3,4,5,4,1,...,2,10000,8,3,4,10000,6,3,1,2
4,Charles Leclerc,5,3,5,5,5,10000,3,3,2,...,4,1,1,2,3,6,4,4,18,3
5,Sebastian Vettel,4,5,3,3,4,2,2,5,4,...,3,4,13,1,10000,2,2,10000,17,5
6,Carlos Sainz Jr.,10000,19,14,7,8,6,11,6,8,...,5,10000,10000,12,6,5,13,8,3,10
7,Pierre Gasly,11,8,6,10000,6,5,8,10,7,...,6,9,11,8,14,7,9,16,2,18
8,Alexander Albon,14,9,10,11,11,8,10000,15,15,...,10,5,6,6,5,4,5,5,14,6
9,Daniel Ricciardo,10000,18,7,10000,12,9,6,11,12,...,14,14,4,14,10000,10000,8,6,6,11
10,Sergio Pérez,13,10,8,6,15,12,12,12,11,...,11,6,7,10000,7,8,7,10,9,7


In [13]:
def split_drivers(team_members, drivers):
    results = []
    for d in drivers:
        if d in team_members:
            results.append(d)
            team_members = team_members.replace(d, "")
    else:
        return results

def get_teammate(driver, members):
    for m in members:
        if driver in m:
            teammate = [person for person in m if person != driver][0]
            return teammate
    
def get_teammate_mappings(score_df, team_df):
    teammates = []
    # Break string of members into separate drivers
    drivers = score_df.Driver.values
    team_df2 = team_df.copy()
    team_df2["Driver name"] = team_df2["Driver name"].map(
        lambda x: split_drivers(x, drivers))

    score_teammate_df = score_df.copy()
    score_teammate_df["teammate"] = None
    for idx, row in score_teammate_df.iterrows():
        score_teammate_df.loc[idx, "teammate"] = get_teammate(
            row.Driver, 
            team_df2["Driver name"].values
        )
    return score_teammate_df


position_teammates_df = get_teammate_mappings(position_df, team_df)
position_teammates_df

Unnamed: 0,Driver,AUS,BHR,CHN,AZE,ESP,MON,CAN,FRA,AUT,...,BEL,ITA,SIN,RUS,JPN,MEX,USA,BRA,ABU,teammate
1,Lewis Hamilton,2,1,1,2,1,1,1,1,5,...,2,3,4,1,3,1,2,7,1,Valtteri Bottas
2,Valtteri Bottas,1,2,2,1,2,3,4,2,3,...,3,2,5,2,1,3,1,10000,4,Lewis Hamilton
3,Max Verstappen,3,4,4,4,3,4,5,4,1,...,10000,8,3,4,10000,6,3,1,2,Pierre Gasly
4,Charles Leclerc,5,3,5,5,5,10000,3,3,2,...,1,1,2,3,6,4,4,18,3,Sebastian Vettel
5,Sebastian Vettel,4,5,3,3,4,2,2,5,4,...,4,13,1,10000,2,2,10000,17,5,Charles Leclerc
6,Carlos Sainz Jr.,10000,19,14,7,8,6,11,6,8,...,10000,10000,12,6,5,13,8,3,10,Lando Norris
7,Pierre Gasly,11,8,6,10000,6,5,8,10,7,...,9,11,8,14,7,9,16,2,18,Max Verstappen
8,Alexander Albon,14,9,10,11,11,8,10000,15,15,...,5,6,6,5,4,5,5,14,6,Max Verstappen
9,Daniel Ricciardo,10000,18,7,10000,12,9,6,11,12,...,14,4,14,10000,10000,8,6,6,11,Nico Hülkenberg
10,Sergio Pérez,13,10,8,6,15,12,12,12,11,...,6,7,10000,7,8,7,10,9,7,Lance Stroll


In [None]:
def update_scores_by_comparison(position_teammates_df, score_df):
    
    
    
score_teammates_final_df = compare_with_teammate(position_teammates_df, score_df)