# **NBA PREDICTION MODEL**


### **INPUT TEAMS**

In [7]:
HOME = "Boston"
AWAY = "Oklahoma City"

In [8]:
TEAM_TO_ABBR = {
    "Atlanta": "ATL",
    "Boston": "BOS",
    "Brooklyn": "BRK",
    "Charlotte": "CHO",
    "Chicago": "CHI",
    "Cleveland": "CLE",
    "Dallas": "DAL",
    "Denver": "DEN",
    "Detroit": "DET",
    "Golden State": "GSW",
    "Houston": "HOU",
    "Indiana": "IND",
    "LA Clippers": "LAC",
    "LA Lakers": "LAL",
    "Memphis": "MEM",
    "Miami": "MIA",
    "Milwaukee": "MIL",
    "Minnesota": "MIN",
    "New Orleans": "NOP",
    "New York": "NYK",
    "Oklahoma City": "OKC",
    "Orlando": "ORL",
    "Philadelphia": "PHI",
    "Phoenix": "PHO",
    "Portland": "POR",
    "Sacramento": "SAC",
    "San Antonio": "SAS",
    "Toronto": "TOR",
    "Utah": "UTA",
    "Washington": "WAS"
}

### **INSTALL**

In [13]:
!pip3 install pandas requests scikit-learn fake_useragent selenium webdriver-manager tensorflow



### **IMPORTS**

In [9]:
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np
from bs4 import BeautifulSoup, Comment
import traceback
from pathlib import Path
import random
from fake_useragent import UserAgent

### **BASKETBALL REFERENCE LINKS**


In [10]:
BOX_SCORES = "https://www.basketball-reference.com/boxscores/"
SCORES_BY_DATE = BOX_SCORES + "?month={}&day={}&year={}"
TEAMS = "https://www.basketball-reference.com/teams/"

### **SAVE DATA**

##### **GET DATES**

In [12]:
def get_last_7_days():
    today = datetime.today()
    last_7_days = [(today - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(7)]
    return last_7_days

def get_last_15_days():
    today = datetime.today()
    last_15_days = [(today - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(15)]
    return last_15_days

def get_last_30_days():
    today = datetime.today()
    last_30_days = [(today - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(30)]
    return last_30_days

def get_last_60_days():
    

print(get_last_30_days())

['2024-11-26', '2024-11-25', '2024-11-24', '2024-11-23', '2024-11-22', '2024-11-21', '2024-11-20', '2024-11-19', '2024-11-18', '2024-11-17', '2024-11-16', '2024-11-15', '2024-11-14', '2024-11-13', '2024-11-12', '2024-11-11', '2024-11-10', '2024-11-09', '2024-11-08', '2024-11-07', '2024-11-06', '2024-11-05', '2024-11-04', '2024-11-03', '2024-11-02', '2024-11-01', '2024-10-31', '2024-10-30', '2024-10-29', '2024-10-28']


##### **SCORES BY DATES**

In [None]:
def save_score(date):
    try:
        year, month, day = date.split("-")
        url = SCORES_BY_DATE.format(month, day, year)

        ua = UserAgent()
        headers = {'User-Agent': ua.random}
        data = requests.get(url, headers=headers)

        soup = BeautifulSoup(data.text, "html.parser")

        for ad in soup.find_all("link", href=lambda href: href and "pub.network" in href):
            ad.decompose()
        for ad_script in soup.find_all("script", src=lambda src: src and "pub.network" in src):
            ad_script.decompose()


        content = soup.prettify()
       
        with open("SCORES/{}-{}-{}.html".format(month, day, year), "w+") as f:
            f.write(content)
    
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")

def save_last_30_days_scores():
    dates = get_last_30_days()
    for date in dates:
        save_score(date)
        time.sleep(3.1)

save_last_30_days_scores()

##### **GAMES BY DATES**

In [None]:
def save_games(date):
    try:
        year, month, day = date.split("-")
        file = "SCORES/{}-{}-{}.html".format(month, day, year)
        
        with open(file) as f:   
            page = f.read()
    
        soup = BeautifulSoup(page, "html.parser")
        games = soup.find_all('div', class_="game_summary expanded nohover")
        
        for game in games:            
            home_team = game.find('table', class_="teams").find_all('tr')[1].find_all('td')[0].find('a').get_text().strip()
            away_team = game.find('table', class_="teams").find_all('tr')[0].find_all('td')[0].find('a').get_text().strip()
            
            game_url = game.find('td', class_="right gamelink").find('a')['href'][11:]
            
            url = BOX_SCORES + game_url
            
            data = requests.get(url)

            with open("GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home_team, away_team), "w+") as f:
                f.write(data.text)

            time.sleep(3.1)
                
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")

def save_last_30_days_games():
    dates = get_last_30_days()
    for date in dates:
        save_games(date)


save_last_30_days_games()

##### **CURRENT TEAM INFORMATION**

In [19]:
def save_team(team):
    url = TEAMS + TEAM_TO_ABBR[team] + "/2025.html"
    data = requests.get(url)

    with open("TEAMS/{}.html".format(TEAM_TO_ABBR[team]), "w+") as f:
        f.write(data.text)

for team in TEAM_TO_ABBR.keys():
    save_team(team)
    time.sleep(3.1)

### **MAPPING DATA FOR RANDOM FOREST**

##### **OFFENSIVE AND DEFENSIVE RATING**

In [20]:
# [Offensive Rating, Defensive Rating]
def scrape_team_ratings_from_game(date, team):
    year, month, day = date.split("-")
    folder_path = Path('GAMES')

    for file_path in folder_path.iterdir():
        if file_path.is_file() and file_path.name[:10] == "{}-{}-{}".format(month, day, year) and team in file_path.name:
            with open(file_path, 'r') as file:
                page = file.read()
        
                soup = BeautifulSoup(page, 'html.parser')
                comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
                for comment in comments:
                    comment_soup = BeautifulSoup(comment, 'html.parser')
                    div_four_factors = comment_soup.find('div', id='div_four_factors')
                    if div_four_factors:
                        abbr = div_four_factors.find('tbody').find('th').find('a').get_text().strip()
                        if (abbr == TEAM_TO_ABBR[team]):
                            offensive_rating = float(div_four_factors.find('tbody').find_all('tr')[0].find('td', attrs={'data-stat': 'off_rtg'}).get_text())
                            defensive_rating = float(div_four_factors.find('tbody').find_all('tr')[1].find('td', attrs={'data-stat': 'off_rtg'}).get_text())
                            return [offensive_rating, defensive_rating]
                        else:
                            offensive_rating = float(div_four_factors.find('tbody').find_all('tr')[1].find('td', attrs={'data-stat': 'off_rtg'}).get_text())
                            defensive_rating = float(div_four_factors.find('tbody').find_all('tr')[0].find('td', attrs={'data-stat': 'off_rtg'}).get_text())
                            return [offensive_rating, defensive_rating]      

def get_ratings(dates, team):
    all_data = []

    for date in dates:
        game_data = scrape_team_ratings_from_game(date, team)
        all_data.append(game_data)
    
    return all_data

def get_avg_ratings(dates, team):
    ortg = 0
    drtg = 0
    count = 0
    ratings = get_ratings(dates, team)

    for rating in ratings:
        if rating:
            ortg += rating[0]
            drtg += rating[1]
            count += 1

    ortg /= count
    drtg /= count
    return [ortg, drtg]

In [21]:
# [Offensive Rating, Defensive Rating]
def scrape_team_fgp_from_game(date, team):
    year, month, day = date.split("-")
    folder_path = Path('GAMES')

    for file_path in folder_path.iterdir():
        if file_path.is_file() and file_path.name[:10] == "{}-{}-{}".format(month, day, year) and team in file_path.name:
            with open(file_path, 'r') as file:
                page = file.read()
        
                soup = BeautifulSoup(page, 'html.parser')
                comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
                for comment in comments:
                    comment_soup = BeautifulSoup(comment, 'html.parser')
                    div_four_factors = comment_soup.find('div', id='div_four_factors')
                    if div_four_factors:
                        abbr = div_four_factors.find('tbody').find('th').find('a').get_text().strip()
                        if (abbr == TEAM_TO_ABBR[team]):
                            fgp = float(div_four_factors.find('tbody').find_all('tr')[0].find('td', attrs={'data-stat': 'efg_pct'}).get_text())
                            return fgp
                        else:
                            fgp = float(div_four_factors.find('tbody').find_all('tr')[1].find('td', attrs={'data-stat': 'efg_pct'}).get_text())
                            return fgp
                        
                 

def get_fgps(dates, team):
    all_data = []

    for date in dates:
        game_data = scrape_team_fgp_from_game(date, team)
        all_data.append(game_data)
    
    return all_data

def get_avg_fgp(dates, team):
    total = 0
    count = 0
    fgps = get_fgps(dates, team)

    for fgp in fgps:
        if fgp:
            total += fgp
            count += 1

    avg = total / count
    return avg

##### **INJURY RATIO**

In [22]:
def scrape_team_roster(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
    # add comment
    soup = BeautifulSoup(page, "html.parser")

    roster = []
    roster_data = soup.find('div', id='div_roster').find_all('tr')
    for player_data in roster_data:
        player = player_data.find('td', attrs={'data-stat': 'player'})
        if player:
            roster.append(player.find_all('a')[0].get_text())
    
    return roster

def scrape_team_injuries(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")
   
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        div_injuries = comment_soup.find('div', class_='table_container', id='div_injuries')
        if div_injuries:
            injuries = div_injuries.find('tbody').find_all('a')
            count = -1
            reserves = []
            for injury in injuries:
                count += 1
                if injury and count % 2 == 0:
                    reserves.append(injury.get_text())
            return reserves

def scrape_injuries_from_date(date, home, away):
    year, month, day = date.split("-")
    file = "GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home, away)

    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    inactives = soup.find('strong', text='Inactive:\xa0').find_parent('div').find_all('a')
    injuries = []
    for inactive in inactives:
        injuries.append(inactive.get_text())
    
    return injuries

In [23]:
def get_player_value(ppg, rpg, apg, spg, bpg):
    return ppg + (1.2 * rpg) + (1.5 * apg) + (2 * spg) + (2 * bpg)

def scrape_team_value(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    players = soup.find('table', id='per_game_stats').find('tbody').find_all('tr')

    total = 0

    for player in players:
        ppg = float(player.find('td', attrs={'data-stat': 'pts_per_g'}).get_text())
        rpg = float(player.find('td', attrs={'data-stat': 'trb_per_g'}).get_text())
        apg = float(player.find('td', attrs={'data-stat': 'ast_per_g'}).get_text())
        spg = float(player.find('td', attrs={'data-stat': 'stl_per_g'}).get_text())
        bpg = float(player.find('td', attrs={'data-stat': 'blk_per_g'}).get_text())
        total += get_player_value(ppg, rpg, apg, spg, bpg)
        
    return total

def scrape_player_value(team, player_name):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    players = soup.find('table', id='per_game_stats').find('tbody').find_all('tr')

    for player in players:
        name = player.find('td', attrs={'data-stat': 'name_display'}).find('a').get_text()
        if name == player_name: 
            ppg = float(player.find('td', attrs={'data-stat': 'pts_per_g'}).get_text())
            rpg = float(player.find('td', attrs={'data-stat': 'trb_per_g'}).get_text())
            apg = float(player.find('td', attrs={'data-stat': 'ast_per_g'}).get_text())
            spg = float(player.find('td', attrs={'data-stat': 'stl_per_g'}).get_text())
            bpg = float(player.find('td', attrs={'data-stat': 'blk_per_g'}).get_text())
            value =  get_player_value(ppg, rpg, apg, spg, bpg)
            return value

def get_injury_value(injuries, team):
    injury_value = 0
    total_value = scrape_team_value(team)
    if injuries:
        for injury in injuries:
            player_value = scrape_player_value(team, injury)
            if player_value:
                injury_value += player_value
    
    return (total_value - injury_value)

In [24]:
def scrape_team_advanced(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")

    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        advanced = comment_soup.find('table', id='advanced')
        if advanced:
            advanced = advanced.find('tbody').find_all('tr')
            total = 0
            for player in advanced:
                vorp = float(player.find('td', attrs={'data-stat': 'vorp'}).get_text())
                total += vorp

            return total
    
def scrape_player_advanced(team, player_name):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")

    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        advanced = comment_soup.find('table', id='advanced')
        if advanced:
            advanced = advanced.find('tbody').find_all('tr')
            for player in advanced:
                name = player.find('td', attrs={'data-stat': 'name_display'}).find('a').get_text()
                if name == player_name:
                    vorp = float(player.find('td', attrs={'data-stat': 'vorp'}).get_text())
                    return vorp

def get_injury_advanced(injuries, team):
    injury_advanced = 0
    total_advanced = scrape_team_advanced(team)
    if injuries:
        for injury in injuries:
            player_advanced = scrape_player_advanced(team, injury)
            if player_advanced:
                injury_advanced += player_advanced
    
    return (total_advanced - injury_advanced)

In [25]:
def scrape_home_win(date, home, away):
    year, month, day = date.split("-")
    
    file = "GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home, away)
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    scores = soup.find('div', class_='scorebox').find_all('div', class_='scores')

    away_score = float(scores[0].find('div', class_='score').get_text())
    home_score = float(scores[1].find('div', class_='score').get_text())

    if home_score > away_score:
        return 1
    else:
        return 0

### **ORGANIZE DATA**

In [29]:
RATINGS_30_DAY = {}
RATINGS_15_DAY = {}
RATINGS_7_DAY = {}

FGP_30_DAY = {}
FGP_15_DAY = {}
FGP_7_DAY = {}

for team in TEAM_TO_ABBR.keys():
    RATINGS_30_DAY[team] = get_avg_ratings(get_last_30_days(), team)
    RATINGS_15_DAY[team] = get_avg_ratings(get_last_15_days(), team)
    RATINGS_7_DAY[team] = get_avg_ratings(get_last_7_days(), team)

    FGP_30_DAY[team] = get_avg_fgp(get_last_30_days(), team)
    FGP_15_DAY[team] = get_avg_fgp(get_last_15_days(), team)
    FGP_7_DAY[team] = get_avg_fgp(get_last_7_days(), team)    



  comment_soup = BeautifulSoup(comment, 'html.parser')


In [30]:
def get_game_data_from_date(date, home, away):
    home_net_30_days = RATINGS_30_DAY[home][0] - RATINGS_30_DAY[home][1]
    away_net_30_days = RATINGS_30_DAY[away][0] - RATINGS_30_DAY[away][1]

    home_net_15_days = RATINGS_15_DAY[home][0] - RATINGS_15_DAY[home][1]
    away_net_15_days = RATINGS_15_DAY[away][0] - RATINGS_15_DAY[away][1]

    home_net_7_days = RATINGS_7_DAY[home][0] - RATINGS_7_DAY[home][1]
    away_net_7_days = RATINGS_7_DAY[away][0] - RATINGS_7_DAY[away][1]

    home_fgp_30_days = FGP_30_DAY[home]
    away_fgp_30_days = FGP_30_DAY[away]

    home_fgp_15_days = FGP_15_DAY[home]
    away_fgp_15_days = FGP_15_DAY[away]

    home_fgp_7_days = FGP_7_DAY[home]
    away_fgp_7_days = FGP_7_DAY[away]

    injuries = scrape_injuries_from_date(date, home, away)

    home_injury_value = get_injury_value(injuries, home)
    away_injury_value = get_injury_value(injuries, away)

    home_injury_advanced = get_injury_advanced(injuries, home)
    away_injury_advanced = get_injury_advanced(injuries, away)


    home_win = scrape_home_win(date, home, away)

    return [
            home, away, 
            home_net_30_days, away_net_30_days, 
            home_net_15_days, away_net_15_days,
            home_net_7_days, away_net_7_days,
            home_fgp_30_days, away_fgp_30_days, 
            home_fgp_15_days, away_fgp_15_days,
            home_fgp_7_days, away_fgp_7_days,
            home_injury_value, away_injury_value,
            home_injury_advanced, away_injury_advanced,
            home_win
            ]

def get_all_data():
    try:
        folder_path = Path('GAMES')
        folder_list = list(folder_path.iterdir())
        folder_list.sort()
        game_data = []
        for file_path in folder_list:
            if file_path.is_file() and file_path.name != ".DS_Store":
                data = file_path.name.split("-")
                date = data[2] + "-" + data[0] + "-" + data[1]
                home = data[3]
                away = data[4][:len(data[4]) - 5]
                game = get_game_data_from_date(date, home, away)
                print(game)
                game_data.append(game)
        return game_data
    
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")
        
columns = [
    "Home", "Away", 
    "30 Day Home Net Rating", "30 Day Away Net Rating", 
    "15 Day Home Net Rating", "15 Day Away Net Rating",
    "7 Day Home Net Rating", "7 Day Away Net Rating",
    "30 Day Home eFG%", "30 Day Away eFG%", 
    "15 Day Home eFG%", "15 Day Away eFG%",
    "7 Day Home eFG%", "7 Day Away eFG%",
    "Home Injury Value", "Away Injury Value",
    "Home Injury Advanced", "Away Injury Advanced",
    "Home Win"
]

df = pd.DataFrame(get_all_data(), columns=columns)

df = pd.get_dummies(df, columns=['Home', 'Away'])

df = df.astype(float)

file_path = 'DATA/30_days_data.xlsx'
df.to_excel(file_path, index=False)

  inactives = soup.find('strong', text='Inactive:\xa0').find_parent('div').find_all('a')
  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')


['Atlanta', 'Washington', -5.093333333333334, -13.900000000000006, -4.871428571428567, -15.771428571428586, -14.266666666666666, -13.933333333333337, 0.5280666666666666, 0.5164285714285716, 0.5131428571428571, 0.5204285714285714, 0.4716666666666667, 0.49366666666666664, 262.52000000000004, 246.06999999999994, 0.9, -1.0, 0]
['Boston', 'Milwaukee', 9.573333333333323, 1.0933333333333337, 11.114285714285728, 7.262500000000003, 15.533333333333331, 9.375, 0.5636, 0.5596, 0.592, 0.57625, 0.5566666666666666, 0.6100000000000001, 253.14999999999992, 258.07, 4.4, 2.3000000000000007, 1]
['Dallas', 'Utah', 6.4375, -7.746666666666698, 7.887500000000017, -4.2250000000000085, 2.566666666666663, -1.8333333333333286, 0.5560624999999999, 0.5381999999999999, 0.55325, 0.569, 0.5373333333333333, 0.5630000000000001, 274.63, 251.26000000000002, 3.5999999999999996, 0.6, 1]
['Memphis', 'Chicago', 9.146666666666661, -4.606250000000003, 5.285714285714292, -4.162499999999994, 13.766666666666666, 0.7999999999999972

### **RANDOM FOREST MODEL**

In [31]:

# Define features and target
features = [col for col in df.columns if col != 'Home Win' and col != 'Home' and col != 'Away']
target = 'Home Win'

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

home_net_30_days = RATINGS_30_DAY[HOME][0] - RATINGS_30_DAY[HOME][1]
away_net_30_days = RATINGS_30_DAY[AWAY][0] - RATINGS_30_DAY[AWAY][1]

home_net_15_days = RATINGS_15_DAY[HOME][0] - RATINGS_15_DAY[HOME][1]
away_net_15_days = RATINGS_15_DAY[AWAY][0] - RATINGS_15_DAY[AWAY][1]

home_net_7_days = RATINGS_7_DAY[HOME][0] - RATINGS_7_DAY[HOME][1]
away_net_7_days = RATINGS_7_DAY[AWAY][0] - RATINGS_7_DAY[AWAY][1]

home_fgp_30_days = FGP_30_DAY[HOME]
away_fgp_30_days = FGP_30_DAY[AWAY]

home_fgp_15_days = FGP_15_DAY[HOME]
away_fgp_15_days = FGP_15_DAY[AWAY]

home_fgp_7_days = FGP_7_DAY[HOME]
away_fgp_7_days = FGP_7_DAY[AWAY]

home_injuries = scrape_team_injuries(HOME)
away_injuries = scrape_team_injuries(AWAY)

home_injury_value = get_injury_value(home_injuries, HOME)
away_injury_value = get_injury_value(away_injuries, AWAY)

home_injury_advanced = get_injury_advanced(home_injuries, HOME)
away_injury_advanced = get_injury_advanced(away_injuries, AWAY)


input_data = { 
    "30 Day Home Net Rating": [home_net_30_days], 
    "30 Day Away Net Rating": [away_net_30_days], 
    "15 Day Home Net Rating": [home_net_15_days], 
    "15 Day Away Net Rating": [away_net_15_days],
    "7 Day Home Net Rating": [home_net_7_days], 
    "7 Day Away Net Rating": [away_net_7_days],
    "30 Day Home eFG%": [home_fgp_30_days], 
    "30 Day Away eFG%": [away_fgp_30_days], 
    "15 Day Home eFG%": [home_fgp_15_days], 
    "15 Day Away eFG%": [away_fgp_15_days],
    "7 Day Home eFG%": [home_fgp_7_days], 
    "7 Day Away eFG%": [away_fgp_7_days],
    "Home Injury Value": [home_injury_value], 
    "Away Injury Value": [away_injury_value],
    "Home Injury Advanced": [home_injury_advanced], 
    "Away Injury Advanced": [away_injury_advanced],
}


for col in X.columns:
    if col.startswith("Home_") or col.startswith("Away_"):
        input_data[col] = [0]

home = "Home_{}".format(HOME)
away = "Away_{}".format(AWAY)

input_data[home] = [1]
input_data[away] = [1]


# Convert new game data into a DataFrame
prediction_df = pd.DataFrame(input_data)

# Predict the outcome for the new game
prediction = rf_model.predict(prediction_df)

y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Model Accuracy:", accuracy)


# Output the prediction
if prediction[0] == 1:
    print(HOME + " will win")
else:
    print(AWAY + " will win")

  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')


Model Accuracy: 0.6444444444444445
Boston will win


### **Neural Network**

In [29]:
'''
# Define features and target
features = [col for col in df.columns if col != 'Home Win']
target = 'Home Win'

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Input and first hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(1, activation='sigmoid')  # Output layer (binary classification)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.2f}")

# Predict for a new game
home_ratings_30_days = RATINGS_30_DAY[HOME]
away_ratings_30_days = RATINGS_30_DAY[AWAY]

home_ratings_15_days = RATINGS_15_DAY[HOME]
away_ratings_15_days = RATINGS_15_DAY[AWAY]

home_ratings_7_days = RATINGS_7_DAY[HOME]
away_ratings_7_days = RATINGS_7_DAY[AWAY]

home_injuries = scrape_team_injuries(HOME)
away_injuries = scrape_team_injuries(AWAY)

home_injury_value = get_injury_advanced(home_injuries, HOME)
away_injury_value = get_injury_advanced(away_injuries, AWAY)


input_data = { 
    "30 Day Home Net Rating": [home_ratings_30_days[0] - home_ratings_30_days[1]], 
    "30 Day Away Net Rating": [away_ratings_30_days[0] - away_ratings_30_days[1]], 
    "15 Day Home Net Rating": [home_ratings_15_days[0] - home_ratings_15_days[1]], 
    "15 Day Away Net Rating": [away_ratings_15_days[0] - away_ratings_15_days[1]],
    "7 Day Home Net Rating": [home_ratings_7_days[0] - home_ratings_7_days[1]], 
    "7 Day Away Net Rating": [away_ratings_7_days[0] - away_ratings_7_days[1]],
    "Home Injury Advanced": [home_injury_value], 
    "Away Injury Advanced": [away_injury_value],
}


for col in X.columns:
    if col.startswith("Home_") or col.startswith("Away_"):
        input_data[col] = [0]

home = "Home_{}".format(HOME)
away = "Away_{}".format(AWAY)

input_data[home] = [1]
input_data[away] = [1]

# Set the specific teams for this game

# Convert new game data into a DataFrame
new_game_df = pd.DataFrame(input_data)

# Reorder columns to match the training data
new_game_df = new_game_df[features]

# Predict the outcome for the new game
prediction = model.predict(new_game_df)
predicted_class = (prediction > 0.5).astype(int)

# Output the prediction
if predicted_class[0][0] == 1:
    print("The model predicts the home team {} will win.".format(HOME))
else:
    print("The model predicts the away team {} will win.".format(AWAY))
'''

'\n# Define features and target\nfeatures = [col for col in df.columns if col != \'Home Win\']\ntarget = \'Home Win\'\n\nX = df[features]\ny = df[target]\n\n# Split the data\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Define the neural network model\nmodel = Sequential([\n    Dense(64, input_dim=X_train.shape[1], activation=\'relu\'),  # Input and first hidden layer\n    Dense(32, activation=\'relu\'),  # Second hidden layer\n    Dense(1, activation=\'sigmoid\')  # Output layer (binary classification)\n])\n\n# Compile the model\nmodel.compile(optimizer=\'adam\', loss=\'binary_crossentropy\', metrics=[\'accuracy\'])\n\n# Train the model\nmodel.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, validation_split=0.1)\n\n# Evaluate the model\nloss, accuracy = model.evaluate(X_test, y_test, verbose=0)\nprint(f"Test Accuracy: {accuracy:.2f}")\n\n# Predict for a new game\nhome_ratings_30_days = RATINGS_30_DAY[HOME]\naway_ratings_30_d