<a href="https://colab.research.google.com/github/ola-seni/Claude-Hr/blob/main/untitled6_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Daily MLB Home Run Prediction System with Real API Data
# Predicts which players are most likely to hit home runs in today's games

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import requests
import time
import re
import os
import json
import warnings
warnings.filterwarnings('ignore')

# Install required packages if not already installed
try:
    import pybaseball
except ImportError:
    print("Installing pybaseball...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pybaseball"])

# Import after installation
import pybaseball
from pybaseball import statcast_batter, playerid_lookup
from pybaseball import batting_stats
from pybaseball import cache

# Enable cache for pybaseball to speed up repeated queries
cache.enable()

class MLBStatsAPI:
    """
    Wrapper for MLB Stats API
    """

    BASE_URL = "https://statsapi.mlb.com/api"

    @staticmethod
    def get_schedule(date=None):
        """
        Get MLB schedule for a given date

        Parameters:
        -----------
        date : str
            Date in format 'YYYY-MM-DD', defaults to today

        Returns:
        --------
        dict
            Schedule data
        """
        if date is None:
            date = datetime.now().strftime('%Y-%m-%d')

        url = f"{MLBStatsAPI.BASE_URL}/v1/schedule/games?sportId=1&date={date}"
        response = requests.get(url)

        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching schedule: {response.status_code}")
            return None

    @staticmethod
    def get_game(game_id):
        """
        Get detailed game data

        Parameters:
        -----------
        game_id : str
            MLB game ID

        Returns:
        --------
        dict
            Game data
        """
        url = f"{MLBStatsAPI.BASE_URL}/v1.1/game/{game_id}/feed/live"
        response = requests.get(url)

        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching game data: {response.status_code}")
            return None

    @staticmethod
    def get_player(player_id):
        """
        Get player data

        Parameters:
        -----------
        player_id : str
            MLB player ID

        Returns:
        --------
        dict
            Player data
        """
        url = f"{MLBStatsAPI.BASE_URL}/v1/people/{player_id}"
        response = requests.get(url)

        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching player data: {response.status_code}")
            return None

    @staticmethod
    def get_team(team_id):
        """
        Get team data

        Parameters:
        -----------
        team_id : str
            MLB team ID

        Returns:
        --------
        dict
            Team data
        """
        url = f"{MLBStatsAPI.BASE_URL}/v1/teams/{team_id}"
        response = requests.get(url)

        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching team data: {response.status_code}")
            return None

    @staticmethod
    def get_lineups(game_id):
        """
        Get lineups for a game

        Parameters:
        -----------
        game_id : str
            MLB game ID

        Returns:
        --------
        tuple
            (away_lineup, home_lineup)
        """
        game_data = MLBStatsAPI.get_game(game_id)

        if not game_data:
            return None, None

        # Try to get lineups from boxscore
        try:
            away_lineup = []
            home_lineup = []

            # Check if lineups are available
            if 'liveData' in game_data and 'boxscore' in game_data['liveData']:
                boxscore = game_data['liveData']['boxscore']

                # Get teams
                teams = boxscore['teams']

                # Away team lineup
                if 'players' in teams['away'] and 'batters' in teams['away']:
                    for batter_id in teams['away']['batters']:
                        player_key = f"ID{batter_id}"
                        if player_key in teams['away']['players']:
                            player = teams['away']['players'][player_key]
                            person = player.get('person', {})
                            position = player.get('position', {})
                            batting_order = player.get('battingOrder', '0')

                            # Convert batting order string to number
                            # Example: "100" -> 1, "200" -> 2, etc.
                            try:
                                batting_order_num = int(batting_order[0])
                            except:
                                batting_order_num = 0

                            away_lineup.append({
                                'id': person.get('id'),
                                'name': person.get('fullName'),
                                'position': position.get('abbreviation'),
                                'batting_order': batting_order_num
                            })

                # Home team lineup
                if 'players' in teams['home'] and 'batters' in teams['home']:
                    for batter_id in teams['home']['batters']:
                        player_key = f"ID{batter_id}"
                        if player_key in teams['home']['players']:
                            player = teams['home']['players'][player_key]
                            person = player.get('person', {})
                            position = player.get('position', {})
                            batting_order = player.get('battingOrder', '0')

                            # Convert batting order string to number
                            # Example: "100" -> 1, "200" -> 2, etc.
                            try:
                                batting_order_num = int(batting_order[0])
                            except:
                                batting_order_num = 0

                            home_lineup.append({
                                'id': person.get('id'),
                                'name': person.get('fullName'),
                                'position': position.get('abbreviation'),
                                'batting_order': batting_order_num
                            })

            # Sort by batting order
            away_lineup = sorted(away_lineup, key=lambda x: x['batting_order'])
            home_lineup = sorted(home_lineup, key=lambda x: x['batting_order'])

            return away_lineup, home_lineup

        except Exception as e:
            print(f"Error parsing lineups: {str(e)}")
            return [], []

    @staticmethod
    def get_probable_pitchers(game_id):
        """
        Get probable pitchers for a game

        Parameters:
        -----------
        game_id : str
            MLB game ID

        Returns:
        --------
        tuple
            (away_pitcher, home_pitcher)
        """
        game_data = MLBStatsAPI.get_game(game_id)

        if not game_data:
            return None, None

        try:
            away_pitcher = {}
            home_pitcher = {}

            # Try to get from gameData
            if 'gameData' in game_data and 'probablePitchers' in game_data['gameData']:
                probable_pitchers = game_data['gameData']['probablePitchers']

                # Away pitcher
                if 'away' in probable_pitchers and probable_pitchers['away']:
                    pitcher = probable_pitchers['away']
                    away_pitcher = {
                        'id': pitcher.get('id'),
                        'name': pitcher.get('fullName'),
                        'throws': pitcher.get('pitchHand', {}).get('code', 'R')
                    }

                # Home pitcher
                if 'home' in probable_pitchers and probable_pitchers['home']:
                    pitcher = probable_pitchers['home']
                    home_pitcher = {
                        'id': pitcher.get('id'),
                        'name': pitcher.get('fullName'),
                        'throws': pitcher.get('pitchHand', {}).get('code', 'R')
                    }

            # If we have IDs, try to get more stats
            if away_pitcher and 'id' in away_pitcher:
                # In a real implementation, you would add pitcher stats here
                away_pitcher['hr_per_nine'] = 1.0  # Default value

            if home_pitcher and 'id' in home_pitcher:
                # In a real implementation, you would add pitcher stats here
                home_pitcher['hr_per_nine'] = 1.0  # Default value

            return away_pitcher, home_pitcher

        except Exception as e:
            print(f"Error parsing pitchers: {str(e)}")
            return {}, {}

class DailyHRPredictor:
    """
    A system for predicting which MLB players are most likely to hit
    home runs in today's games
    """

    def __init__(self, output_dir="hr_predictions"):
        """
        Initialize the HR predictor

        Parameters:
        -----------
        output_dir : str
            Directory to save prediction outputs
        """
        self.output_dir = output_dir

        # Create output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Today's date
        self.today = datetime.now().strftime('%Y-%m-%d')

        # Days to look back for player stats
        self.days_back = 60

        # Store player data
        self.player_stats = {}
        self.pitcher_stats = {}

        # Store ballpark factors
        self.park_factors = self._load_park_factors()

        # MLB team IDs (used for mapping)
        self.team_ids = {
            108: 'LAA', 109: 'ARI', 110: 'BAL', 111: 'BOS', 112: 'CHC',
            113: 'CIN', 114: 'CLE', 115: 'COL', 116: 'DET', 117: 'HOU',
            118: 'KC', 119: 'LAD', 120: 'WSH', 121: 'NYM', 133: 'OAK',
            134: 'PIT', 135: 'SD', 136: 'SEA', 137: 'SF', 138: 'STL',
            139: 'TB', 140: 'TEX', 141: 'TOR', 142: 'MIN', 143: 'PHI',
            144: 'ATL', 145: 'CWS', 146: 'MIA', 147: 'NYY', 158: 'MIL'
        }

        print(f"Daily HR Predictor initialized for {self.today}")

    def _load_park_factors(self):
        """
        Load MLB ballpark home run factors

        Returns:
        --------
        dict
            Dictionary of ballpark HR factors
        """
        # These are park factors for home runs
        # Higher values mean more home-run friendly
        park_factors = {
            'COL': 1.35,  # Coors Field - very HR friendly
            'CIN': 1.25,  # Great American Ball Park
            'HOU': 1.18,  # Minute Maid Park
            'NYY': 1.15,  # Yankee Stadium
            'PHI': 1.12,  # Citizens Bank Park
            'MIL': 1.10,  # American Family Field
            'CHC': 1.08,  # Wrigley Field
            'LAA': 1.05,  # Angel Stadium
            'ATL': 1.05,  # Truist Park
            'BOS': 1.03,  # Fenway Park
            'TEX': 1.02,  # Globe Life Field
            'TOR': 1.02,  # Rogers Centre
            'BAL': 1.02,  # Camden Yards
            'ARI': 1.00,  # Chase Field
            'CLE': 0.98,  # Progressive Field
            'MIN': 0.98,  # Target Field
            'LAD': 0.98,  # Dodger Stadium
            'CHW': 0.97,  # Guaranteed Rate Field
            'WSH': 0.97,  # Nationals Park
            'DET': 0.95,  # Comerica Park
            'STL': 0.95,  # Busch Stadium
            'TB': 0.95,   # Tropicana Field
            'KC': 0.94,   # Kauffman Stadium
            'SEA': 0.94,  # T-Mobile Park
            'OAK': 0.93,  # Oakland Coliseum
            'MIA': 0.92,  # loanDepot Park
            'SF': 0.90,   # Oracle Park
            'PIT': 0.90,  # PNC Park
            'SD': 0.90,   # Petco Park
            'NYM': 0.88,  # Citi Field
        }

        return park_factors

    def get_todays_games(self):
        """
        Get today's MLB games using MLB Stats API

        Returns:
        --------
        pandas.DataFrame
            Today's scheduled games
        """
        try:
            # Get schedule from API
            schedule_data = MLBStatsAPI.get_schedule(self.today)

            if not schedule_data:
                print("No schedule data found")
                return pd.DataFrame()

            # Extract game data
            games = []

            if 'dates' in schedule_data and schedule_data['dates']:
                for date in schedule_data['dates']:
                    if 'games' in date:
                        for game in date['games']:
                            # Only include games that are scheduled or live
                            if game['status']['abstractGameState'] in ['Preview', 'Live']:
                                # Get team abbreviations
                                away_team_id = game['teams']['away']['team']['id']
                                home_team_id = game['teams']['home']['team']['id']

                                away_team = self.team_ids.get(away_team_id, 'Unknown')
                                home_team = self.team_ids.get(home_team_id, 'Unknown')

                                games.append({
                                    'game_id': game['gamePk'],
                                    'game_date': self.today,
                                    'away_team': away_team,
                                    'home_team': home_team,
                                    'status': game['status']['abstractGameState']
                                })

            # Create DataFrame
            games_df = pd.DataFrame(games)

            print(f"Retrieved {len(games_df)} games scheduled for today")
            return games_df

        except Exception as e:
            print(f"Error retrieving today's games: {str(e)}")
            return pd.DataFrame()

    def get_todays_lineups(self):
        """
        Get today's MLB lineups using MLB Stats API

        Returns:
        --------
        dict
            Dictionary of lineups by team
        """
        # Get today's games
        games = self.get_todays_games()

        if games.empty:
            print("No games found")
            return {}

        # Dictionary to store lineups
        lineups = {}

        # Process each game
        for _, game in games.iterrows():
            game_id = game['game_id']
            away_team = game['away_team']
            home_team = game['home_team']

            # Get lineups for this game
            away_lineup, home_lineup = MLBStatsAPI.get_lineups(game_id)

            # Store lineups
            if away_lineup:
                lineups[away_team] = away_lineup
            else:
                # If no lineup available, use projected lineup
                print(f"No lineup available for {away_team}, using projected lineup")
                lineups[away_team] = self._get_projected_lineup(away_team)

            if home_lineup:
                lineups[home_team] = home_lineup
            else:
                # If no lineup available, use projected lineup
                print(f"No lineup available for {home_team}, using projected lineup")
                lineups[home_team] = self._get_projected_lineup(home_team)

        return lineups

    def _get_projected_lineup(self, team):
        """
        Get a projected lineup for a team when official lineup is not available

        Parameters:
        -----------
        team : str
            Team abbreviation

        Returns:
        --------
        list
            Projected lineup
        """
        # These are simplified projected lineups
        # In a real implementation, you would use a more sophisticated approach
        projected_lineups = {
            'NYY': [
                {'name': 'DJ LeMahieu', 'position': '3B', 'batting_order': 1},
                {'name': 'Aaron Judge', 'position': 'RF', 'batting_order': 2},
                {'name': 'Juan Soto', 'position': 'LF', 'batting_order': 3},
                {'name': 'Giancarlo Stanton', 'position': 'DH', 'batting_order': 4},
                {'name': 'Anthony Rizzo', 'position': '1B', 'batting_order': 5},
                {'name': 'Anthony Volpe', 'position': 'SS', 'batting_order': 6},
                {'name': 'Alex Verdugo', 'position': 'LF', 'batting_order': 7},
                {'name': 'Jose Trevino', 'position': 'C', 'batting_order': 8},
                {'name': 'Gleyber Torres', 'position': '2B', 'batting_order': 9},
            ],
            'BOS': [
                {'name': 'Jarren Duran', 'position': 'CF', 'batting_order': 1},
                {'name': 'Tyler O\'Neill', 'position': 'LF', 'batting_order': 2},
                {'name': 'Rafael Devers', 'position': '3B', 'batting_order': 3},
                {'name': 'Triston Casas', 'position': '1B', 'batting_order': 4},
                {'name': 'Masataka Yoshida', 'position': 'LF', 'batting_order': 5},
                {'name': 'Connor Wong', 'position': 'C', 'batting_order': 6},
                {'name': 'Wilyer Abreu', 'position': 'RF', 'batting_order': 7},
                {'name': 'Enmanuel Valdez', 'position': '2B', 'batting_order': 8},
                {'name': 'Ceddanne Rafaela', 'position': 'SS', 'batting_order': 9},
            ],
            # Default empty lineup for teams without specific projections
        }

        # Return projected lineup if available, otherwise empty list
        return projected_lineups.get(team, [])

    def get_todays_pitchers(self):
        """
        Get today's scheduled starting pitchers

        Returns:
        --------
        dict
            Dictionary of pitchers by team
        """
        # Get today's games
        games = self.get_todays_games()

        if games.empty:
            print("No games found")
            return {}

        # Dictionary to store pitchers
        pitchers = {}

        # Process each game
        for _, game in games.iterrows():
            game_id = game['game_id']
            away_team = game['away_team']
            home_team = game['home_team']

            # Get pitchers for this game
            away_pitcher, home_pitcher = MLBStatsAPI.get_probable_pitchers(game_id)

            # Store pitchers
            if away_pitcher:
                pitchers[away_team] = away_pitcher

            if home_pitcher:
                pitchers[home_team] = home_pitcher

        return pitchers

    def collect_player_stats(self, player_name, days_back=None):
        """
        Collect recent stats for a specific player

        Parameters:
        -----------
        player_name : str
            Player's name
        days_back : int
            Number of days to look back for stats

        Returns:
        --------
        dict
            Player's stats
        """
        if days_back is None:
            days_back = self.days_back

        if player_name in self.player_stats:
            return self.player_stats[player_name]

        try:
            # Look up player ID
            player_parts = player_name.split(' ')
            last_name = player_parts[-1]
            first_name = ' '.join(player_parts[:-1])

            player_ids = playerid_lookup(last_name, first_name)

            if player_ids.empty:
                print(f"Could not find player ID for {player_name}")

                # Set default stats for players not found
                default_stats = {
                    'name': player_name,
                    'exit_velocity_avg': 85.0,
                    'launch_angle_avg': 15.0,
                    'barrel_pct': 1.0,
                    'hard_hit_pct': 8.0,
                    'hr_per_event': 1.0,
                    'iso': 0.150,
                    'events': 100,
                    'home_runs': 5
                }

                self.player_stats[player_name] = default_stats
                return default_stats

            # Get the first match (in case there are multiple)
            player_id = player_ids.iloc[0]['key_mlbam']

            # Calculate date range
            end_date = datetime.now()
            start_date = end_date - timedelta(days=days_back)

            # Convert to string format
            start_str = start_date.strftime('%Y-%m-%d')
            end_str = end_date.strftime('%Y-%m-%d')

            # Get player stats from Statcast
            player_data = statcast_batter(start_dt=start_str, end_dt=end_str, player_id=player_id)

            if player_data.empty:
                print(f"No Statcast data found for {player_name}")

                # Set default stats for players with no data
                default_stats = {
                    'name': player_name,
                    'exit_velocity_avg': 85.0,
                    'launch_angle_avg': 15.0,
                    'barrel_pct': 1.0,
                    'hard_hit_pct': 8.0,
                    'hr_per_event': 1.0,
                    'iso': 0.150,
                    'events': 100,
                    'home_runs': 5
                }

                self.player_stats[player_name] = default_stats
                return default_stats

            # Calculate key metrics
            hr_count = player_data[player_data['events'] == 'home_run'].shape[0]
            avg_exit_velo = player_data['launch_speed'].mean()
            avg_launch_angle = player_data['launch_angle'].mean()

            # Calculate barrel percentage
            barrels = player_data[
                (player_data['launch_speed'] >= 95) &
                (player_data['launch_angle'] >= 25) &
                (player_data['launch_angle'] <= 35)
            ]
            barrel_pct = len(barrels) / len(player_data) * 100 if len(player_data) > 0 else 0

            # Calculate hard hit percentage
            hard_hit = player_data[player_data['launch_speed'] >= 95]
            hard_hit_pct = len(hard_hit) / len(player_data) * 100 if len(player_data) > 0 else 0

            # Calculate HR per event
            hr_per_event = hr_count / len(player_data) * 100 if len(player_data) > 0 else 0

            # Get ISO (isolated power) from batting stats
            # In a real implementation, you would calculate this from Statcast data
            # or get it from FanGraphs or Baseball Reference
            # For demonstration, we'll use a simulated value
            iso = 0.200 + (hr_count / 20) * 0.100

            # Create player stats record
            stats = {
                'name': player_name,
                'exit_velocity_avg': avg_exit_velo,
                'launch_angle_avg': avg_launch_angle,
                'barrel_pct': barrel_pct,
                'hard_hit_pct': hard_hit_pct,
                'hr_per_event': hr_per_event,
                'iso': iso,
                'events': len(player_data),
                'home_runs': hr_count
            }

            # Cache the stats
            self.player_stats[player_name] = stats

            return stats

        except Exception as e:
            print(f"Error collecting stats for {player_name}: {str(e)}")

            # Set default stats on error
            default_stats = {
                'name': player_name,
                'exit_velocity_avg': 85.0,
                'launch_angle_avg': 15.0,
                'barrel_pct': 1.0,
                'hard_hit_pct': 8.0,
                'hr_per_event': 1.0,
                'iso': 0.150,
                'events': 100,
                'home_runs': 5
            }

            self.player_stats[player_name] = default_stats
            return default_stats

    def calculate_hr_probability(self, player, team, opponent_team, pitcher):
        """
        Calculate probability of a player hitting a home run today

        Parameters:
        -----------
        player : dict
            Player information
        team : str
            Player's team
        opponent_team : str
            Opponent team
        pitcher : dict
            Pitcher information

        Returns:
        --------
        float
            Home run probability (0-100%)
        """
        # Get player stats
        player_stats = self.collect_player_stats(player['name'])

        # Base probability from player's HR per event rate
        base_prob = player_stats['hr_per_event']

        # Factor in the ballpark
        park_factor = self.park_factors.get(opponent_team, 1.0)

        # Factor in the pitcher's HR rate
        pitcher_factor = 2.0 - pitcher.get('hr_per_nine', 1.1) / 1.1  # Normalize around 1.1 HR/9

        # Factor in lefty/righty matchup
        if pitcher.get('throws') == 'L':
            # Slight boost for right-handed hitters against lefty pitchers
            handedness_factor = 1.1
        else:
            # Slight boost for left-handed hitters against righty pitchers
            handedness_factor = 1.1

        # Adjustments for batting order position (middle of order gets a boost)
        if player['batting_order'] in [3, 4]:
            order_factor = 1.2
        elif player['batting_order'] in [2, 5]:
            order_factor = 1.1
        else:
            order_factor = 1.0

        # Combine all factors
        hr_probability = base_prob * park_factor * pitcher_factor * handedness_factor * order_factor

        # Cap at reasonable values
        hr_probability = min(hr_probability, 25.0)  # No one has >25% chance in a single game

        return hr_probability

    def predict_todays_hr_hitters(self):
        """
        Predict which players are most likely to hit home runs today

        Returns:
        --------
        pandas.DataFrame
            Ranked list of players and their HR probabilities
        """
        print("Predicting today's home run hitters...")

        # Get today's games
        games = self.get_todays_games()

        if games.empty:
            print("No games scheduled for today")
            return pd.DataFrame()

        # Get lineups and pitchers
        lineups = self.get_todays_lineups()
        pitchers = self.get_todays_pitchers()

        # List to store player probabilities
        player_probs = []

        # Process each game
        for _, game in games.iterrows():
            away_team = game['away_team']
            home_team = game['home_team']

            # Away team batters vs. home team pitcher
            if away_team in lineups and home_team in pitchers:
                away_lineup = lineups[away_team]
                home_pitcher = pitchers[home_team]

                for player in away_lineup:
                    # Calculate HR probability
                    hr_prob = self.calculate_hr_probability(
                        player, away_team, home_team, home_pitcher
                    )

                    # Add to list
                    player_probs.append({
                        'Player': player['name'],
                        'Team': away_team,
                        'Position': player['position'],
                        'Batting_Order': player['batting_order'],
                        'Opponent': home_team,
                        'Opponent_Pitcher': home_pitcher.get('name', 'Unknown'),
                        'Pitcher_Throws': home_pitcher.get('throws', 'R'),
                        'HR_Probability': hr_prob,
                        'Game_Location': 'Away'
                    })

            # Home team batters vs. away team pitcher
            if home_team in lineups and away_team in pitchers:
                home_lineup = lineups[home_team]
                away_pitcher = pitchers[away_team]

                for player in home_lineup:
                    # Calculate HR probability
                    hr_prob = self.calculate_hr_probability(
                        player, home_team, away_team, away_pitcher
                    )

                    # Add to list
                    player_probs.append({
                        'Player': player['name'],
                        'Team': home_team,
                        'Position': player['position'],
                        'Batting_Order': player['batting_order'],
                        'Opponent': away_team,
                        'Opponent_Pitcher': away_pitcher.get('name', 'Unknown'),
                        'Pitcher_Throws': away_pitcher.get('throws', 'R'),
                        'HR_Probability': hr_prob,
                        'Game_Location': 'Home'
                    })

        # Convert to DataFrame
        predictions_df = pd.DataFrame(player_probs)

        if not predictions_df.empty:
            # Sort by HR probability in descending order
            predictions_df = predictions_df.sort_values(by='HR_Probability', ascending=False).reset_index(drop=True)

            # Format probability as percentage
            predictions_df['HR_Probability'] = predictions_df['HR_Probability'].apply(lambda x: f"{x:.2f}%")

            print(f"Found {len(predictions_df)} players with HR probabilities")
        else:
            print("No player data available to make predictions")

        return predictions_df

    def get_top_hr_picks(self, n=10):
        """
        Get top n players most likely to hit home runs today

        Parameters:
        -----------
        n : int
            Number of players to return

        Returns:
        --------
        pandas.DataFrame
            Top n players with highest HR probabilities
        """
        predictions = self.predict_todays_hr_hitters()

        if predictions.empty:
            return pd.DataFrame()

        # Return top n players
        return predictions.head(n)

    def export_predictions(self, filename='hr_predictions.csv'):
        """
        Export today's HR predictions to CSV file

        Parameters:
        -----------
        filename : str
            Name of the CSV file to export
        """
        predictions = self.predict_todays_hr_hitters()

        if not predictions.empty:
            predictions.to_csv(filename, index=False)
            print(f"Predictions exported to {filename}")
        else:
            print("No predictions to export")

    def visualize_top_hr_probabilities(self, n=15):
        """
        Create a bar chart of top n players with highest HR probabilities

        Parameters:
        -----------
        n : int
            Number of players to visualize
        """
        import matplotlib.pyplot as plt
        import seaborn as sns

        predictions = self.predict_todays_hr_hitters()

        if predictions.empty:
            print("No predictions available to visualize")
            return

        # Convert probability back to float for plotting
        predictions['HR_Probability'] = predictions['HR_Probability'].str.rstrip('%').astype(float)

        # Get top n players
        top_n = predictions.head(n)

        # Set up the plot
        plt.figure(figsize=(12, 8))
        sns.set_style("whitegrid")

        # Create the bar chart
        ax = sns.barplot(x='HR_Probability', y='Player', data=top_n,
                         palette='viridis', orient='h')

        # Add team info to player names
        y_labels = [f"{row['Player']} ({row['Team']} vs {row['Opponent']})" for _, row in top_n.iterrows()]
        ax.set_yticklabels(y_labels)

        # Add labels
        plt.title(f"Top {n} Home Run Probabilities for Today's Games", fontsize=16)
        plt.xlabel("Home Run Probability (%)", fontsize=12)
        plt.ylabel("Player", fontsize=12)

        # Add pitcher information as annotations
        for i, (_, row) in enumerate(top_n.iterrows()):
            pitcher_info = f"vs. {row['Opponent_Pitcher']} ({row['Pitcher_Throws']})"
            ax.text(row['HR_Probability'] + 0.2, i, pitcher_info, va='center')

        plt.tight_layout()
        plt.savefig('hr_predictions.png', dpi=300, bbox_inches='tight')
        plt.show()

        print(f"Visualization saved as hr_predictions.png")


# Main execution block
if __name__ == "__main__":
    # Initialize the HR predictor
    hr_predictor = DailyHRPredictor()

    # Get and display top 10 HR picks for today
    top_picks = hr_predictor.get_top_hr_picks(n=10)

    if not top_picks.empty:
        print("\nTop 10 Players Most Likely to Hit Home Runs Today:")
        print(top_picks[['Player', 'Team', 'Opponent', 'Opponent_Pitcher', 'HR_Probability']])

        # Visualize the results
        hr_predictor.visualize_top_hr_probabilities(n=15)

        # Export full predictions
        hr_predictor.export_predictions()
    else:
        print("No predictions available.")

In [None]:
# Initialize the HR predictor
hr_predictor = DailyHRPredictor()

# Get and display top 10 HR picks for today
top_picks = hr_predictor.get_top_hr_picks(n=10)

if not top_picks.empty:
    print("\nTop 10 Players Most Likely to Hit Home Runs Today:")
    print(top_picks[['Player', 'Team', 'Opponent', 'Opponent_Pitcher', 'HR_Probability']])

    # Visualize the results
    hr_predictor.visualize_top_hr_probabilities(n=15)

    # Export full predictions
    hr_predictor.export_predictions()
else:
    print("No predictions available.")

In [None]:
!pip install matplotlib seaborn