# **NBA PREDICTION MODEL**


### **INPUT TEAMS**

In [17]:
HOME = "Portland"
AWAY = "Sacramento"

In [18]:
TEAM_TO_ABBR = {
    "Atlanta": "ATL",
    "Boston": "BOS",
    "Brooklyn": "BRK",
    "Charlotte": "CHO",
    "Chicago": "CHI",
    "Cleveland": "CLE",
    "Dallas": "DAL",
    "Denver": "DEN",
    "Detroit": "DET",
    "Golden State": "GSW",
    "Houston": "HOU",
    "Indiana": "IND",
    "LA Clippers": "LAC",
    "LA Lakers": "LAL",
    "Memphis": "MEM",
    "Miami": "MIA",
    "Milwaukee": "MIL",
    "Minnesota": "MIN",
    "New Orleans": "NOP",
    "New York": "NYK",
    "Oklahoma City": "OKC",
    "Orlando": "ORL",
    "Philadelphia": "PHI",
    "Phoenix": "PHO",
    "Portland": "POR",
    "Sacramento": "SAC",
    "San Antonio": "SAS",
    "Toronto": "TOR",
    "Utah": "UTA",
    "Washington": "WAS"
}

In [8]:
def valid_teams():
    if HOME not in TEAM_TO_ABBR or AWAY not in TEAM_TO_ABBR:
        raise Exception("Please input valid team cities")
    else:
        return True
    
valid_teams()

True

### **INSTALL**

In [22]:
!pip3 install pandas requests scikit-learn fake_useragent selenium webdriver-manager tensorflow



### **IMPORTS**

In [9]:
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np
from bs4 import BeautifulSoup, Comment
import traceback
from pathlib import Path
import random
from fake_useragent import UserAgent

### **BASKETBALL REFERENCE LINKS**


In [10]:
BOX_SCORES = "https://www.basketball-reference.com/boxscores/"
SCORES_BY_DATE = BOX_SCORES + "?month={}&day={}&year={}"
TEAMS = "https://www.basketball-reference.com/teams/"

### **SAVE DATA**

##### **GET DATES**

In [11]:
def get_last_7_days_from_date(date):
    date = datetime.strptime(date, '%Y-%m-%d')
    last_7_days = [(date - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(7)]
    return last_7_days

def get_last_15_days_from_date(date):
    date = datetime.strptime(date, '%Y-%m-%d')
    last_15_days = [(date - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(15)]
    return last_15_days

def get_last_30_days_from_date(date):
    date = datetime.strptime(date, '%Y-%m-%d')
    last_30_days = [(date - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(30)]
    return last_30_days

def get_last_60_days():
    today = datetime.today()
    last_60_days = [(today - timedelta(days=i+1)).strftime('%Y-%m-%d') for i in range(60)]
    return last_60_days


##### **SCORES BY DATES**

In [None]:
def save_score(date):
    try:
        year, month, day = date.split("-")
        url = SCORES_BY_DATE.format(month, day, year)

        ua = UserAgent()
        headers = {'User-Agent': ua.random}
        data = requests.get(url, headers=headers)

        soup = BeautifulSoup(data.text, "html.parser")

        for ad in soup.find_all("link", href=lambda href: href and "pub.network" in href):
            ad.decompose()
        for ad_script in soup.find_all("script", src=lambda src: src and "pub.network" in src):
            ad_script.decompose()


        content = soup.prettify()
       
        with open("SCORES/{}-{}-{}.html".format(month, day, year), "w+") as f:
            f.write(content)
    
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")

def save_last_60_days_scores():
    dates = get_last_60_days()
    for date in dates:
        save_score(date)
        time.sleep(3.1)

save_last_60_days_scores()

##### **GAMES BY DATES**

In [104]:
def save_games(date):
    try:
        year, month, day = date.split("-")
        file = "SCORES/{}-{}-{}.html".format(month, day, year)
        
        with open(file) as f:   
            page = f.read()
    
        soup = BeautifulSoup(page, "html.parser")
        games = soup.find_all('div', class_="game_summary expanded nohover")
        
        for game in games:            
            home_team = game.find('table', class_="teams").find_all('tr')[1].find_all('td')[0].find('a').get_text().strip()
            away_team = game.find('table', class_="teams").find_all('tr')[0].find_all('td')[0].find('a').get_text().strip()
            
            game_url = game.find('td', class_="right gamelink").find('a')['href'][11:]
            
            url = BOX_SCORES + game_url
            
            data = requests.get(url)

            with open("GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home_team, away_team), "w+") as f:
                f.write(data.text)

            time.sleep(3.1)
                
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")

def save_last_60_days_games():
    dates = get_last_60_days()
    for date in dates:
        save_games(date)

save_last_60_days_games()

##### **CURRENT TEAM INFORMATION**

In [30]:
def save_team(team):
    url = TEAMS + TEAM_TO_ABBR[team] + "/2025.html"
    data = requests.get(url)

    with open("TEAMS/{}.html".format(TEAM_TO_ABBR[team]), "w+") as f:
        f.write(data.text)

for team in TEAM_TO_ABBR.keys():
    save_team(team)
    time.sleep(3.1)

### **MAPPING DATA FOR RANDOM FOREST**

In [12]:
# {
#   date: 
#   {
#       team: [team_stat, opp_stat],
#       team: [team_stat, opp_stat],
#       team: [team_stat, opp_stat],
#       team: [team_stat, opp_stat],
#   },
#   date: 
#   {
#       team: [team_stat, opp_stat],
#       team: [team_stat, opp_stat]
#   },
# }
#

def scrape_team_metrics_from_game(date, home, away, metric_name):
    year, month, day = date.split("-")
    file = "GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home, away)

    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, 'html.parser')
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))

    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')
        div_four_factors = comment_soup.find('div', id='div_four_factors')
        if div_four_factors:
            away_or = float(div_four_factors.find('tbody').find_all('tr')[0].find('td', attrs={'data-stat': metric_name}).get_text())
            home_or = float(div_four_factors.find('tbody').find_all('tr')[1].find('td', attrs={'data-stat': metric_name}).get_text())
            return [home_or, away_or]

def get_all_metrics(metric_name):
    folder_path = Path('GAMES')
    folder_list = list(folder_path.iterdir())
    folder_list.sort()
    folder_list = folder_list[1:]
    all_ratings = {}

    for file_path in folder_list:
        data = file_path.name.split("-")
        date = data[2] + "-" + data[0] + "-" + data[1]
        home = data[3]
        away = data[4][:len(data[4]) - 5]
        ratings = scrape_team_metrics_from_game(date, home, away, metric_name)
        
        if date not in all_ratings:
            all_ratings[date] = {}
            
        all_ratings[date][home] = ratings
        all_ratings[date][away] = [ratings[1], ratings[0]]

    return all_ratings

def get_metrics(dates, team, metric_dict):
    metrics = []
    for date in dates:
        try:
            metric = metric_dict[date][team]
        except:
            metric = None
        metrics.append(metric)
    return metrics

def get_avg_metrics(dates, team, metric_dict):
    team_metric = 0
    opp_metric = 0
    count = 0
    metrics = get_metrics(dates, team, metric_dict)
    
    for metric in metrics:
        if metric:
            team_metric += metric[0]
            opp_metric += metric[1]
            count += 1

    team_metric /= count
    opp_metric /= count
    return [team_metric, opp_metric]

##### **OFFENSIVE AND DEFENSIVE RATING**

In [None]:
RATINGS_DICT = get_all_metrics('off_rtg')

def get_avg_ratings(dates, team):
    return get_avg_metrics(dates, team, RATINGS_DICT)

In [None]:
FGP_DICT = get_all_metrics('efg_pct')

def get_avg_fgps(dates, team):
    return get_avg_metrics(dates, team, FGP_DICT)

In [None]:
TOP_DICT = get_all_metrics('tov_pct')

def get_avg_fgps(dates, team):
    return get_avg_metrics(dates, team, TOP_DICT)

##### **INJURY RATIO**

In [81]:
def scrape_team_roster(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
        
    soup = BeautifulSoup(page, "html.parser")

    roster = []
    roster_data = soup.find('div', id='div_roster').find_all('tr')
    for player_data in roster_data:
        player = player_data.find('td', attrs={'data-stat': 'player'})
        if player:
            roster.append(player.find_all('a')[0].get_text())
    
    return roster

def scrape_team_injuries(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")
   
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        div_injuries = comment_soup.find('div', class_='table_container', id='div_injuries')
        if div_injuries:
            injuries = div_injuries.find('tbody').find_all('a')
            count = -1
            reserves = []
            for injury in injuries:
                count += 1
                if injury and count % 2 == 0:
                    reserves.append(injury.get_text())
            return reserves

def scrape_injuries_from_date(date, home, away):
    year, month, day = date.split("-")
    file = "GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home, away)

    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    inactives = soup.find('strong', text='Inactive:\xa0').find_parent('div').find_all('a')
    injuries = []
    for inactive in inactives:
        injuries.append(inactive.get_text())
    
    return injuries

In [82]:
def get_player_value(ppg, rpg, apg, spg, bpg):
    return ppg + (1.2 * rpg) + (1.5 * apg) + (2 * spg) + (2 * bpg)

def scrape_team_value(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    players = soup.find('table', id='per_game_stats').find('tbody').find_all('tr')

    total = 0

    for player in players:
        ppg = float(player.find('td', attrs={'data-stat': 'pts_per_g'}).get_text())
        rpg = float(player.find('td', attrs={'data-stat': 'trb_per_g'}).get_text())
        apg = float(player.find('td', attrs={'data-stat': 'ast_per_g'}).get_text())
        spg = float(player.find('td', attrs={'data-stat': 'stl_per_g'}).get_text())
        bpg = float(player.find('td', attrs={'data-stat': 'blk_per_g'}).get_text())
        total += get_player_value(ppg, rpg, apg, spg, bpg)
        
    return total

def scrape_player_value(team, player_name):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])
    
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    players = soup.find('table', id='per_game_stats').find('tbody').find_all('tr')

    for player in players:
        name = player.find('td', attrs={'data-stat': 'name_display'}).find('a').get_text()
        if name == player_name: 
            ppg = float(player.find('td', attrs={'data-stat': 'pts_per_g'}).get_text())
            rpg = float(player.find('td', attrs={'data-stat': 'trb_per_g'}).get_text())
            apg = float(player.find('td', attrs={'data-stat': 'ast_per_g'}).get_text())
            spg = float(player.find('td', attrs={'data-stat': 'stl_per_g'}).get_text())
            bpg = float(player.find('td', attrs={'data-stat': 'blk_per_g'}).get_text())
            value =  get_player_value(ppg, rpg, apg, spg, bpg)
            return value

def get_injury_value(injuries, team):
    injury_value = 0
    total_value = scrape_team_value(team)
    if injuries:
        for injury in injuries:
            player_value = scrape_player_value(team, injury)
            if player_value:
                injury_value += player_value
    
    return (total_value - injury_value)

In [83]:
def scrape_team_advanced(team):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")

    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        advanced = comment_soup.find('table', id='advanced')
        if advanced:
            advanced = advanced.find('tbody').find_all('tr')
            total = 0
            for player in advanced:
                vorp = float(player.find('td', attrs={'data-stat': 'vorp'}).get_text())
                total += vorp

            return total
    
def scrape_player_advanced(team, player_name):
    file = "TEAMS/{}.html".format(TEAM_TO_ABBR[team])

    with open(file) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")

    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        comment_soup = BeautifulSoup(comment, 'html.parser')  
        advanced = comment_soup.find('table', id='advanced')
        if advanced:
            advanced = advanced.find('tbody').find_all('tr')
            for player in advanced:
                name = player.find('td', attrs={'data-stat': 'name_display'}).find('a').get_text()
                if name == player_name:
                    vorp = float(player.find('td', attrs={'data-stat': 'vorp'}).get_text())
                    return vorp

def get_injury_advanced(injuries, team):
    injury_advanced = 0
    total_advanced = scrape_team_advanced(team)
    if injuries:
        for injury in injuries:
            player_advanced = scrape_player_advanced(team, injury)
            if player_advanced:
                injury_advanced += player_advanced
    
    return (total_advanced - injury_advanced)

In [84]:
def scrape_home_win(date, home, away):
    year, month, day = date.split("-")
    
    file = "GAMES/{}-{}-{}-{}-{}.html".format(month, day, year, home, away)
    with open(file) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    scores = soup.find('div', class_='scorebox').find_all('div', class_='scores')

    away_score = float(scores[0].find('div', class_='score').get_text())
    home_score = float(scores[1].find('div', class_='score').get_text())

    if home_score > away_score:
        return 1
    else:
        return 0

### **ORGANIZE DATA**

In [85]:
def get_training_data_from_date(date, home, away):
    ratings = scrape_team_metrics_from_game(date, home, away, "off_rtg")
    home_off_rtg = ratings[0]
    away_off_rtg = ratings[1]

    fgps = scrape_team_metrics_from_game(date, home, away, "efg_pct")
    home_fgp = fgps[0]
    away_fgp = fgps[1]

    tops = scrape_team_metrics_from_game(date, home, away, "tov_pct")
    home_top = tops[0]
    away_top = tops[1]

    injuries = scrape_injuries_from_date(date, home, away)

    home_injury_value = get_injury_value(injuries, home)
    away_injury_value = get_injury_value(injuries, away)

    home_injury_advanced = get_injury_advanced(injuries, home)
    away_injury_advanced = get_injury_advanced(injuries, away)

    home_win = scrape_home_win(date, home, away)

    return [
            home, away, 
            home_off_rtg, away_off_rtg, 
            home_fgp, away_fgp,
            home_top, away_top, 
            home_injury_value, away_injury_value,
            home_injury_advanced, away_injury_advanced,
            home_win
            ]

def get_all_training_data():
    try:
        folder_path = Path('GAMES')
        folder_list = list(folder_path.iterdir())
        folder_list.sort()
        game_data = []
        today = datetime.today().strftime('%Y-%m-%d')
        last_30_days = get_last_30_days_from_date(today)
        last_30_days = [datetime.strptime(date, '%Y-%m-%d').strftime('%m-%d-%Y') for date in last_30_days]
        for file_path in folder_list:
            if file_path.is_file() and file_path.name != ".DS_Store" and file_path.name[:10] not in last_30_days:
                data = file_path.name.split("-")
                date = data[2] + "-" + data[0] + "-" + data[1]
                home = data[3]
                away = data[4][:len(data[4]) - 5]
                game = get_training_data_from_date(date, home, away)
                print(game)
                game_data.append(game)
        return game_data
    
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")
        
columns = [
    "Home", "Away", 
    "Home Offensive Rating", "Away Offensive Rating", 
    "Home eFG%", "Away eFG%",
    "Home TOV%", "AWAY TOV%", 
    "Home Injury Value", "Away Injury Value",
    "Home Injury Advanced", "Away Injury Advanced",
    "Home Win"
]

df_train = pd.DataFrame(get_all_training_data(), columns=columns)

file_path = 'DATA/training_data.xlsx'
df_train.to_excel(file_path, index=False)

df_train = pd.get_dummies(df_train, columns=['Home', 'Away'])

df_train = df_train.astype(float)

  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')
  inactives = soup.find('strong', text='Inactive:\xa0').find_parent('div').find_all('a')
  comment_soup = BeautifulSoup(comment, 'html.parser')


['Boston', 'New York', 147.3, 121.6, 0.658, 0.622, 265.7899999999999, 254.59, 4.6, 3.3000000000000003, 1]
['LA Lakers', 'Minnesota', 114.7, 107.4, 0.468, 0.488, 245.86999999999998, 242.52999999999997, 1.9000000000000004, 2.1999999999999997, 1]
['Atlanta', 'Brooklyn', 115.6, 111.8, 0.544, 0.533, 310.10999999999996, 255.45, 1.0, 2.0999999999999996, 1]
['Detroit', 'Indiana', 110.5, 116.6, 0.483, 0.56, 265.88, 288.49, 1.5999999999999999, 1.1999999999999995, 0]
['Houston', 'Charlotte', 108.7, 113.9, 0.432, 0.535, 258.84000000000003, 281.52000000000004, 4.5, 1.6, 0]
['LA Clippers', 'Phoenix', 101.9, 104.6, 0.5, 0.583, 252.56000000000006, 272.48, 3.3, 1.7999999999999998, 0]
['Miami', 'Orlando', 100.9, 120.7, 0.457, 0.521, 287.71, 298.66, 1.7000000000000002, 3.3, 0]
['New Orleans', 'Chicago', 113.8, 102.7, 0.536, 0.547, 309.0799999999999, 279.36, -0.9999999999999999, 1.2000000000000002, 1]
['Philadelphia', 'Milwaukee', 109.9, 125.0, 0.459, 0.638, 266.8899999999999, 259.32, 0.30000000000000004,

In [19]:
def get_input_format(date, home, away, func):
    home_30_days_avgs = func(get_last_30_days_from_date(date), home)
    away_30_days_avgs = func(get_last_30_days_from_date(date), away)
    home_30_days = (home_30_days_avgs[0] + away_30_days_avgs[1]) / 2
    away_30_days = (away_30_days_avgs[0] + home_30_days_avgs[1]) / 2 

    home_15_days_avgs = func(get_last_15_days_from_date(date), home)
    away_15_days_avgs = func(get_last_15_days_from_date(date), away)
    home_15_days = (home_15_days_avgs[0] + away_15_days_avgs[1]) / 2
    away_15_days = (away_15_days_avgs[0] + home_15_days_avgs[1]) / 2

    home_7_days_avgs = func(get_last_7_days_from_date(date), home)
    away_7_days_avgs = func(get_last_7_days_from_date(date), away)
    home_7_days = (home_7_days_avgs[0] + away_7_days_avgs[1]) / 2
    away_7_days = (away_7_days_avgs[0] + home_7_days_avgs[1]) / 2

    home_ret = (home_30_days + home_15_days + home_7_days) / 3
    away_ret = (away_30_days + away_15_days + away_7_days) / 3

    return home_ret, away_ret

In [87]:
def get_test_data_from_date(date, home, away):
    home_30_days_ratings = get_avg_ratings(get_last_30_days_from_date(date), home)
    away_30_days_ratings = get_avg_ratings(get_last_30_days_from_date(date), away)
    home_off_rtg_30_days = (home_30_days_ratings[0] + away_30_days_ratings[1]) / 2
    away_off_rtg_30_days = (away_30_days_ratings[0] + home_30_days_ratings[1]) / 2 

    home_15_days_ratings = get_avg_ratings(get_last_15_days_from_date(date), home)
    away_15_days_ratings = get_avg_ratings(get_last_15_days_from_date(date), away)
    home_off_rtg_15_days = (home_15_days_ratings[0] + away_15_days_ratings[1]) / 2
    away_off_rtg_15_days = (away_15_days_ratings[0] + home_15_days_ratings[1]) / 2

    home_7_days_ratings = get_avg_ratings(get_last_7_days_from_date(date), home)
    away_7_days_ratings = get_avg_ratings(get_last_7_days_from_date(date), away)
    home_off_rtg_7_days = (home_7_days_ratings[0] + away_7_days_ratings[1]) / 2
    away_off_rtg_7_days = (away_7_days_ratings[0] + home_7_days_ratings[1]) / 2

    home_off_rtg = (home_off_rtg_30_days + home_off_rtg_15_days + home_off_rtg_7_days) / 3
    away_off_rtg = (away_off_rtg_30_days + away_off_rtg_15_days + away_off_rtg_7_days) / 3

    home_fgps_30_days = get_avg_fgps(get_last_30_days_from_date(date), home)
    away_fgps_30_days = get_avg_fgps(get_last_30_days_from_date(date), away)
    home_fgp_30_days = (home_fgps_30_days[0] + away_fgps_30_days[1]) / 2
    away_fgp_30_days = (away_fgps_30_days[0] + home_fgps_30_days[1]) / 2

    home_fgps_15_days = get_avg_fgps(get_last_15_days_from_date(date), home)
    away_fgps_15_days = get_avg_fgps(get_last_15_days_from_date(date), away)
    home_fgp_15_days = (home_fgps_15_days[0] + away_fgps_15_days[1]) / 2
    away_fgp_15_days = (away_fgps_15_days[0] + home_fgps_15_days[1]) / 2

    home_fgps_7_days = get_avg_fgps(get_last_7_days_from_date(date), home)
    away_fgps_7_days = get_avg_fgps(get_last_7_days_from_date(date), away)
    home_fgp_7_days = (home_fgps_7_days[0] + away_fgps_7_days[1]) / 2
    away_fgp_7_days = (away_fgps_7_days[0] + home_fgps_7_days[1]) / 2

    home_fgp = (home_fgp_30_days + home_fgp_15_days + home_fgp_7_days) / 3
    away_fgp = (away_fgp_30_days + away_fgp_15_days + away_fgp_7_days) / 3

    injuries = scrape_injuries_from_date(date, home, away)

    home_injury_value = get_injury_value(injuries, home)
    away_injury_value = get_injury_value(injuries, away)

    home_injury_advanced = get_injury_advanced(injuries, home)
    away_injury_advanced = get_injury_advanced(injuries, away)

    home_win = scrape_home_win(date, home, away)

    return [
            home, away, 
            home_off_rtg, away_off_rtg, 
            home_fgp, away_fgp,
            home_injury_value, away_injury_value,
            home_injury_advanced, away_injury_advanced,
            home_win
            ]

def get_all_test_data():
    try:
        folder_path = Path('GAMES')
        folder_list = list(folder_path.iterdir())
        folder_list.sort()
        game_data = []
        today = datetime.today().strftime('%Y-%m-%d')
        last_30_days = get_last_30_days_from_date(today)
        last_30_days = [datetime.strptime(date, '%Y-%m-%d').strftime('%m-%d-%Y') for date in last_30_days]
        for file_path in folder_list:
            if file_path.is_file() and file_path.name != ".DS_Store" and file_path.name[:10] in last_30_days:
                data = file_path.name.split("-")
                date = data[2] + "-" + data[0] + "-" + data[1]
                home = data[3]
                away = data[4][:len(data[4]) - 5]
                game = get_test_data_from_date(date, home, away)
                print(game)
                game_data.append(game)
        return game_data
    
    except Exception as e:
        tb = traceback.extract_tb(e.__traceback__)
        line_number = tb[-1].lineno
        print(f"Exception occurred on line {line_number}: {e}")
        
columns = [
    "Home", "Away", 
    "Home Offensive Rating", "Away Offensive Rating", 
    "Home eFG%", "Away eFG%",
    "Home TOV%", "Away TOV%",
    "Home Injury Value", "Away Injury Value",
    "Home Injury Advanced", "Away Injury Advanced",
    "Home Win"
]

df_test = pd.DataFrame(get_all_test_data(), columns=columns)

file_path = 'DATA/test_data.xlsx'
df_test.to_excel(file_path, index=False)

df_test = pd.get_dummies(df_test, columns=['Home', 'Away'])

df_test = df_test.astype(float)

  inactives = soup.find('strong', text='Inactive:\xa0').find_parent('div').find_all('a')
  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')


['Brooklyn', 'Denver', 109.81666666666666, 105.7333333333333, 0.5156666666666667, 0.49866666666666665, 229.26999999999998, 295.14, 1.8999999999999997, 2.8999999999999995, 0]
['Golden State', 'New Orleans', 114.56666666666668, 102.08333333333333, 0.5463333333333333, 0.47033333333333327, 230.34999999999997, 333.8999999999999, 2.5000000000000004, -0.7999999999999998, 1]
['Minnesota', 'Dallas', 112.36666666666667, 110.99166666666666, 0.53775, 0.49475, 242.52999999999997, 278.50999999999993, 2.1999999999999997, 3.9999999999999996, 0]
['Utah', 'Sacramento', 110.47499999999998, 119.375, 0.5209999999999999, 0.567, 260.89, 273.52000000000004, 0.5, 2.2, 0]
['Charlotte', 'Toronto', 115.64583333333333, 112.575, 0.5315833333333333, 0.5070833333333333, 259.15000000000003, 250.02999999999997, 1.0, 0.9999999999999999, 1]
['Chicago', 'Orlando', 108.51249999999999, 113.67500000000001, 0.531625, 0.5367500000000001, 266.04, 275.75, 1.1, 2.9, 1]
['Cleveland', 'LA Lakers', 118.69999999999999, 112.1499999999

### **RANDOM FOREST MODEL**

In [88]:

# Define features and target
features = [col for col in df_train.columns if col != 'Home Win' and col != 'Home' and col != 'Away']
target = 'Home Win'

X_train = df_train[features]
y_train = df_train[target]

X_test = df_test[features]
y_test = df_test[target]

# Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

today = "2024-11-28"

home_30_days_ratings = get_avg_ratings(get_last_30_days_from_date(today), HOME)
away_30_days_ratings = get_avg_ratings(get_last_30_days_from_date(today), AWAY)
home_off_rtg_30_days = (home_30_days_ratings[0] + away_30_days_ratings[1]) / 2
away_off_rtg_30_days = (away_30_days_ratings[0] + home_30_days_ratings[1]) / 2

home_15_days_ratings = get_avg_ratings(get_last_15_days_from_date(today), HOME)
away_15_days_ratings = get_avg_ratings(get_last_15_days_from_date(today), AWAY)
home_off_rtg_15_days = (home_15_days_ratings[0] + away_15_days_ratings[1]) / 2
away_off_rtg_15_days = (away_15_days_ratings[0] + home_15_days_ratings[1]) / 2

home_7_days_ratings = get_avg_ratings(get_last_7_days_from_date(today), HOME)
away_7_days_ratings = get_avg_ratings(get_last_7_days_from_date(today), AWAY)
home_off_rtg_7_days = (home_7_days_ratings[0] + away_7_days_ratings[1]) / 2
away_off_rtg_7_days = (away_7_days_ratings[0] + home_7_days_ratings[1]) / 2

home_off_rtg = (home_off_rtg_30_days + home_off_rtg_15_days + home_off_rtg_7_days) / 3
away_off_rtg = (away_off_rtg_30_days + away_off_rtg_15_days + away_off_rtg_7_days) / 3

home_fgps_30_days = get_avg_fgps(get_last_30_days_from_date(today), HOME)
away_fgps_30_days = get_avg_fgps(get_last_30_days_from_date(today), AWAY)
home_fgp_30_days = (home_fgps_30_days[0] + away_fgps_30_days[1]) / 2
away_fgp_30_days = (away_fgps_30_days[0] + home_fgps_30_days[1]) / 2

home_fgps_15_days = get_avg_fgps(get_last_15_days_from_date(today), HOME)
away_fgps_15_days = get_avg_fgps(get_last_15_days_from_date(today), AWAY)
home_fgp_15_days = (home_fgps_15_days[0] + away_fgps_15_days[1]) / 2
away_fgp_15_days = (away_fgps_15_days[0] + home_fgps_15_days[1]) / 2

home_fgps_7_days = get_avg_fgps(get_last_7_days_from_date(today), HOME)
away_fgps_7_days = get_avg_fgps(get_last_7_days_from_date(today), AWAY)
home_fgp_7_days = (home_fgps_7_days[0] + away_fgps_7_days[1]) / 2
away_fgp_7_days = (away_fgps_7_days[0] + home_fgps_7_days[1]) / 2

home_fgp = (home_fgp_30_days + home_fgp_15_days + home_fgp_7_days) / 3
away_fgp = (away_fgp_30_days + away_fgp_15_days + away_fgp_7_days) / 3

home_injuries = scrape_team_injuries(HOME)
away_injuries = scrape_team_injuries(AWAY)

home_injury_value = get_injury_value(home_injuries, HOME)
away_injury_value = get_injury_value(away_injuries, AWAY)

home_injury_advanced = get_injury_advanced(home_injuries, HOME)
away_injury_advanced = get_injury_advanced(away_injuries, AWAY)

input_data = { 
    "Home Offensive Rating": [home_off_rtg], 
    "Away Offensive Rating": [away_off_rtg], 
    "Home eFG%": [home_fgp], 
    "Away eFG%": [away_fgp], 
    "Home Injury Value": [home_injury_value], 
    "Away Injury Value": [away_injury_value],
    "Home Injury Advanced": [home_injury_advanced], 
    "Away Injury Advanced": [away_injury_advanced],
}

for col in X_train.columns:
    if col.startswith("Home_") or col.startswith("Away_"):
        input_data[col] = [0]

home = "Home_{}".format(HOME)
away = "Away_{}".format(AWAY)

input_data[home] = [1]
input_data[away] = [1]

# Convert new game data into a DataFrame
prediction_df = pd.DataFrame(input_data)

# Predict the outcome for the new game
prediction = rf_model.predict(prediction_df)

y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Model Accuracy:", accuracy)

# Output the prediction
if prediction[0] == 1:
    print(HOME + " will win")
else:
    print(AWAY + " will win")

  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')
  comment_soup = BeautifulSoup(comment, 'html.parser')


Model Accuracy: 0.6460176991150443
Sacramento will win


### **Neural Network**

In [29]:
'''
# Define features and target
features = [col for col in df.columns if col != 'Home Win']
target = 'Home Win'

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Input and first hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(1, activation='sigmoid')  # Output layer (binary classification)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.2f}")

# Predict for a new game
home_ratings_30_days = RATINGS_30_DAY[HOME]
away_ratings_30_days = RATINGS_30_DAY[AWAY]

home_ratings_15_days = RATINGS_15_DAY[HOME]
away_ratings_15_days = RATINGS_15_DAY[AWAY]

home_ratings_7_days = RATINGS_7_DAY[HOME]
away_ratings_7_days = RATINGS_7_DAY[AWAY]

home_injuries = scrape_team_injuries(HOME)
away_injuries = scrape_team_injuries(AWAY)

home_injury_value = get_injury_advanced(home_injuries, HOME)
away_injury_value = get_injury_advanced(away_injuries, AWAY)


input_data = { 
    "30 Day Home Net Rating": [home_ratings_30_days[0] - home_ratings_30_days[1]], 
    "30 Day Away Net Rating": [away_ratings_30_days[0] - away_ratings_30_days[1]], 
    "15 Day Home Net Rating": [home_ratings_15_days[0] - home_ratings_15_days[1]], 
    "15 Day Away Net Rating": [away_ratings_15_days[0] - away_ratings_15_days[1]],
    "7 Day Home Net Rating": [home_ratings_7_days[0] - home_ratings_7_days[1]], 
    "7 Day Away Net Rating": [away_ratings_7_days[0] - away_ratings_7_days[1]],
    "Home Injury Advanced": [home_injury_value], 
    "Away Injury Advanced": [away_injury_value],
}


for col in X.columns:
    if col.startswith("Home_") or col.startswith("Away_"):
        input_data[col] = [0]

home = "Home_{}".format(HOME)
away = "Away_{}".format(AWAY)

input_data[home] = [1]
input_data[away] = [1]

# Set the specific teams for this game

# Convert new game data into a DataFrame
new_game_df = pd.DataFrame(input_data)

# Reorder columns to match the training data
new_game_df = new_game_df[features]

# Predict the outcome for the new game
prediction = model.predict(new_game_df)
predicted_class = (prediction > 0.5).astype(int)

# Output the prediction
if predicted_class[0][0] == 1:
    print("The model predicts the home team {} will win.".format(HOME))
else:
    print("The model predicts the away team {} will win.".format(AWAY))
'''

'\n# Define features and target\nfeatures = [col for col in df.columns if col != \'Home Win\']\ntarget = \'Home Win\'\n\nX = df[features]\ny = df[target]\n\n# Split the data\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Define the neural network model\nmodel = Sequential([\n    Dense(64, input_dim=X_train.shape[1], activation=\'relu\'),  # Input and first hidden layer\n    Dense(32, activation=\'relu\'),  # Second hidden layer\n    Dense(1, activation=\'sigmoid\')  # Output layer (binary classification)\n])\n\n# Compile the model\nmodel.compile(optimizer=\'adam\', loss=\'binary_crossentropy\', metrics=[\'accuracy\'])\n\n# Train the model\nmodel.fit(X_train, y_train, epochs=100, batch_size=8, verbose=1, validation_split=0.1)\n\n# Evaluate the model\nloss, accuracy = model.evaluate(X_test, y_test, verbose=0)\nprint(f"Test Accuracy: {accuracy:.2f}")\n\n# Predict for a new game\nhome_ratings_30_days = RATINGS_30_DAY[HOME]\naway_ratings_30_d