In [3]:
import pandas as pd
from datetime import datetime

def fetch_over_under_runline(oddshark_id, game_date):
    year = game_date.year
    url = f"https://www.oddsshark.com/stats/gamelog/baseball/mlb/{oddshark_id}?season={year}"
    
    try:
        tables = pd.read_html(url)
        df = tables[0]
    except Exception as e:
        print(f"BAD - error for team {oddshark_id} on date {game_date}: {e}")
        return 'unknown', None, None, None
    
    if df.empty:
        print(f"BAD - No data in table for team {oddshark_id} on date {game_date}")
        return 'unknown', None, None, None
    
    df['Date'] = pd.to_datetime(df['Date'], format='%b %d, %Y')
    matching_rows = df[df['Date'] == game_date]

    if len(matching_rows) > 1:
        print(f"DOUBLEHEADER on {game_date}")
        return '', oddshark_id, year, game_date
    
    if matching_rows.empty:
        print(f"BAD - No matching date found for team {oddshark_id} on date {game_date}")
        return 'unknown', None, None, None
    
    over_under = matching_rows.iloc[0]['Total']
    return over_under, None, None, None

def update_gamelogs_with_over_under(game_pks_file, gamelogs_folder):
    game_pks_df = pd.read_csv(game_pks_file)
    game_pks = game_pks_df['game_id'].tail(50)  # Set the number of recent games to do
    duplicates = []
    
    for game_id in game_pks:
        try:
            gamelog_file = f'{gamelogs_folder}/game_{game_id}.csv'
            gamelog_df = pd.read_csv(gamelog_file)
            
            home_oddshark_id = gamelog_df.loc[0, 'home_oddshark_id']
            game_date_str = gamelog_df.loc[0, 'game_date']
            game_date = datetime.strptime(game_date_str, '%Y-%m-%d')
            
            over_under_runline, duplicate_id, duplicate_year, duplicate_date = fetch_over_under_runline(home_oddshark_id, game_date)
            
            if duplicate_id:
                duplicates.append((duplicate_id, duplicate_year, duplicate_date))
                
            gamelog_df['over_under_runline'] = over_under_runline
            
            gamelog_df.to_csv(gamelog_file, index=False)
            print(f"Updated {gamelog_file} with over/under runline.")
        except Exception as e:
            print(f"Error updating gamelog for game_id {game_id}: {e}")
    
    print("\nGames with duplicate dates:")
    for dup in duplicates:
        print(f"Team Oddshark ID: {dup[0]}, Year: {dup[1]}, Date: {dup[2].strftime('%Y-%m-%d')}")

# Update the paths as necessary
game_pks_file = 'game_pks.csv'
gamelogs_folder = 'gamelogs'

# Run the update function
update_gamelogs_with_over_under(game_pks_file, gamelogs_folder)

Updated gamelogs/game_744844.csv with over/under runline.
Updated gamelogs/game_745738.csv with over/under runline.
Updated gamelogs/game_746707.csv with over/under runline.
Updated gamelogs/game_745174.csv with over/under runline.
Updated gamelogs/game_746784.csv with over/under runline.
Updated gamelogs/game_745408.csv with over/under runline.
Updated gamelogs/game_745496.csv with over/under runline.
Updated gamelogs/game_746466.csv with over/under runline.
Updated gamelogs/game_744845.csv with over/under runline.
Updated gamelogs/game_745083.csv with over/under runline.
Updated gamelogs/game_745737.csv with over/under runline.
Updated gamelogs/game_746704.csv with over/under runline.
Updated gamelogs/game_746053.csv with over/under runline.
Updated gamelogs/game_745008.csv with over/under runline.
Updated gamelogs/game_746302.csv with over/under runline.
Updated gamelogs/game_746785.csv with over/under runline.
Updated gamelogs/game_745169.csv with over/under runline.
Updated gamelo