In [1]:
import sqlite3
import pandas as pd

In [2]:
from make_db import make_example_tables

In [3]:
make_example_tables()

Weather Data
       day  temp
2022-07-13  80.0
2022-07-17  90.0
2022-07-20  85.0
2022-07-30  80.0
2022-08-01  77.0
2022-08-04  70.0
2022-08-10  68.0
2022-08-12  74.0
2022-08-17  65.0
2022-08-22  71.0
2000-01-01   NaN
2022-08-25   NaN

Game Data: W is a Win, L is a Loss
       day outcome
2022-07-11       W
2022-07-16       W
2022-07-20       L
2022-07-27       L
2022-07-31       L
2022-08-05       W
2022-08-09       L
2022-08-13       L
2022-08-14       W
2022-08-20       W
2022-08-22       L


Create pandas dataframes from tables, convert day to datetime and make index for weather table

In [5]:
conn = sqlite3.connect("nearest_date.sqlite")
cursor = conn.cursor()

In [6]:
weather_df = pd.read_sql(sql="SELECT * FROM WEATHER", con=conn)
weather_df['day'] = pd.to_datetime(weather_df['day'])
weather_sorted = weather_df.sort_values('day').reset_index(drop=True)
weather_index_df = weather_sorted.copy().set_index('day')
weather_sorted.rename(columns={'day':'weather_day'}, inplace=True)
weather_sorted

Unnamed: 0,weather_day,temp
0,2000-01-01,
1,2022-07-13,80.0
2,2022-07-17,90.0
3,2022-07-20,85.0
4,2022-07-30,80.0
5,2022-08-01,77.0
6,2022-08-04,70.0
7,2022-08-10,68.0
8,2022-08-12,74.0
9,2022-08-17,65.0


In [7]:
weather_index_df

Unnamed: 0_level_0,temp
day,Unnamed: 1_level_1
2000-01-01,
2022-07-13,80.0
2022-07-17,90.0
2022-07-20,85.0
2022-07-30,80.0
2022-08-01,77.0
2022-08-04,70.0
2022-08-10,68.0
2022-08-12,74.0
2022-08-17,65.0


In [8]:
game_df = pd.read_sql(sql="SELECT * FROM GAME", con=conn)
game_df['day'] = pd.to_datetime(game_df['day'])
game_df

Unnamed: 0,day,outcome
0,2022-07-11,W
1,2022-07-16,W
2,2022-07-20,L
3,2022-07-27,L
4,2022-07-31,L
5,2022-08-05,W
6,2022-08-09,L
7,2022-08-13,L
8,2022-08-14,W
9,2022-08-20,W


Create a function to use pandas `index.get_indexer` function

In [9]:
def get_closest_weather_index(game_date):
    '''
    input: game_date - date of game
    output: weather_index - weather table index
    
    requirements
    -------------
    need to have sort weather dataframe with a datetime index
    
    notes
    ------
    Tied distances are broken by preferring the larger index value.
    '''
    weather_index = weather_index_df.index.get_indexer([game_date], method='nearest')[0]
    return weather_index

In [10]:
game_df.index = game_df['day'].apply(lambda x: get_closest_weather_index(x))

In [11]:
game_df.merge(weather_sorted, how='left', left_index=True, right_index=True)

Unnamed: 0,day,outcome,weather_day,temp
1,2022-07-11,W,2022-07-13,80.0
2,2022-07-16,W,2022-07-17,90.0
3,2022-07-20,L,2022-07-20,85.0
4,2022-07-27,L,2022-07-30,80.0
5,2022-07-31,L,2022-08-01,77.0
6,2022-08-05,W,2022-08-04,70.0
7,2022-08-09,L,2022-08-10,68.0
8,2022-08-13,L,2022-08-12,74.0
8,2022-08-14,W,2022-08-12,74.0
10,2022-08-20,W,2022-08-22,71.0
