## Indy 500 Race Statistics

Data scraped from the Indianapolis Motor Speedway [website](https://www.indianapolismotorspeedway.com/events/indy500/history/historical-stats/race-stats/).

In [1]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import time

In [2]:
years = [x for x in range(1911, 2020)]

In [3]:
def get_race(year):
    
    #Base url with a variable of year
    url = f'https://www.indianapolismotorspeedway.com/events/indy500/history/historical-stats/race-stats/race-results/{year}'
    res = requests.get(url)
    
    #if the request works (some years there was no race) gather the data
    if res.status_code == 200:
        soup = BeautifulSoup(res.text, 'lxml')
        data = [x.text for x in soup.find_all('td')[1:]]
        
        races = []
        base = 0
        for n in range(int(len(data)/10)):
            race = {
                'finish': data[base+0],
                'start': data[base+1],
                'car_number': data[base+2],
                'driver': data[base+3],
                'car_name/entrant': data[base+4],
                'make_model': data[base+5],
                'status': data[base+6],
                'laps': data[base+7],
                'LED': data[base+8],
                'winnings': data[base+9],
                'year': year
            }
            base += 10
            races.append(race)
    return pd.DataFrame(races)

In [4]:
all_races = []

#For every year 1911-2019, try to gather the data
for year in years:
    try:
        df = get_race(year)
        
        #Add data to the list
        all_races.append(df)
    except:
        pass
    
    time.sleep(2)

In [5]:
indy = pd.concat(all_races, axis = 0)

In [6]:
indy.head()

Unnamed: 0,finish,start,car_number,driver,car_name/entrant,make_model,status,laps,LED,winnings,year
0,1,28,32,Ray Harroun,"Marmon ""Wasp""",Marmon/Marmon,74.602,200,88,"$14,250",1911
1,2,29,33,Ralph Mulford,Lozier,Lozier/Lozier,74.285,200,10,"$5,200",1911
2,3,25,28,David Bruce-Brown,Fiat,Fiat/Fiat,72.73,200,81,"$3,250",1911
3,4,11,11,Spencer Wishart,Mercedes,Mercedes/Mercedes,72.648,200,5,"$2,350",1911
4,5,27,31,Joe Dawson,Marmon,Marmon/Marmon,72.365,200,0,"$1,500",1911


In [7]:
indy.shape

(3330, 11)

In [8]:
indy.to_csv('./races.csv', index = False)