In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from meteostat import Point, Daily, Hourly
import warnings

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
races = pd.read_csv(path+'data/races.csv')
results = pd.read_csv(path+'data/results.csv')

weather = races.iloc[:,[0,1,2,3,4,5,7]]
# weather = weather.query('season == 2021')
weather.shape

(160, 7)

In [3]:
def format_datetime(dt):
    year, time = dt.split(' ')

    return datetime(*map(int, year.split('-')), *map(int, time.split(':')))

In [None]:
# weather.date = weather.date.apply(lambda x: format_datetime(x))

In [17]:
def get_hourly_weather(dt, lat, long):
    start = dt
    end = dt + pd.Timedelta(hours=3)
    coord = Point(lat, long)

    data = Hourly(coord, start, end).fetch()
    coco = None
    try:
        coco = data.coco.mode().values[0]
    except:
        pass
    
    return data.temp.mean(), data.rhum.mean(), data.prcp.sum(), coco

def get_daily_weather(dt, lat, long):
    start = dt
    end = dt
    coord = Point(lat, long)

    data = Daily(coord, start, end).fetch()

    return data.coco.mode.values

In [5]:
rounds = []
for year in np.array(weather.season.unique()):
            rounds.append([year, list(weather[weather.season == year]['round'])])

In [18]:
weather_conditions = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'temp': [],
        'rhum': [],
        'coco': [],
        'prcp': []
    }
for n in list(range(len(rounds))):
    for i in rounds[n][1]:
        r = weather.query('season ==@rounds[@n][0] & round == @i')
        data = get_hourly_weather(format_datetime(r.date.values[0]), r.lat.values[0], r.long.values[0])
        weather_conditions['season'].append(r.season.values[0])
        weather_conditions['round'].append(i)
        weather_conditions['circuit_id'].append(r.circuit_id.values[0])
        weather_conditions['coco'].append(data[3])
        weather_conditions['temp'].append(round(float(data[0]), 2))
        weather_conditions['rhum'].append(round(float(data[1]), 1))
        weather_conditions['prcp'].append(float(data[2]))

weather_conditions = pd.DataFrame(weather_conditions)
weather_conditions.head()

Unnamed: 0,season,round,circuit_id,temp,rhum,coco,prcp
0,2014,1,albert_park,17.93,44.0,,0.0
1,2014,2,sepang,31.3,65.0,,0.0
2,2014,3,bahrain,22.85,44.5,,0.0
3,2014,4,shanghai,16.5,66.5,,0.0
4,2014,5,catalunya,21.75,60.5,,0.0


In [30]:
def format_coco(coco):
    codes = {
        1.0 :'Clear',
        2.0 : 'Fair',
        3.0 : 'Cloudy',
        4.0 : 'Overcast',
        5.0 : 'Fog',
        6.0 : 'Freezing Fog',
        7.0 : 'Light Rain',
        8.0 : 'Rain',
        9.0 : 'Heavy Rain',
        10.0 : 'Freezing Rain',
        11.0 : 'Heavy Freezing Rain',
        12.0 : 'Sleet',
        13.0 : 'Heavy Sleet',
        14.0 : 'Light Snowfall',
        15.0 : 'Snowfall',
        16.0 : 'Heavy Snowfall',
        17.0 : 'Rain Shower',
        18.0 : 'Heavy Rain Shower',
        19.0 : 'Sleet Shower',
        20.0 : 'Heavy Sleet Shower',
        21.0 : 'Snow Shower',
        22.0 : 'Heavy Snow Shower',
        23.0 : 'Lightning',
        24.0 : 'Hail',
        25.0 : 'Thunderstorm',
        26.0 : 'Heavy Thunderstorm',
        27.0 : 'Storm',
    }
    
    if not np.isnan(coco):
        return codes[coco]
    else: 
        return None

In [37]:
weather_conditions['conditions'] = weather_conditions.coco.apply(lambda x: format_coco(x))

weather_conditions.conditions.tail(10)

150    Overcast
151       Clear
152        None
153        None
154        Fair
155        Fair
156        Fair
157       Clear
158       Clear
159        Fair
Name: conditions, dtype: object

In [41]:
weather_conditions.to_csv(path+'data/weather.csv', index=False)

In [38]:
# start = datetime(2021, 6, 27)
# end = datetime(2021, 6, 27,23)
# coord = Point(47.2197, 14.7647)

# data = Hourly(coord, start, end).fetch()

# data.head()

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-06-27 00:00:00,17.0,12.9,77.0,,,280.0,16.6,9.3,1018.7,,2.0
2021-06-27 01:00:00,15.0,12.3,84.0,,,308.0,3.7,9.3,1019.0,,2.0
2021-06-27 02:00:00,14.0,12.0,88.0,,,298.0,4.0,9.3,1019.0,,2.0
2021-06-27 03:00:00,14.0,12.0,88.0,,,289.0,3.6,9.3,1020.0,,2.0
2021-06-27 04:00:00,14.0,12.0,88.0,,,327.0,3.6,9.3,1020.0,,2.0
