In [1]:
import os
from bs4 import BeautifulSoup
from datetime import datetime


In [2]:
DATA_DIR = "DATA"
TEAMS_DIR = os.path.join(DATA_DIR, "TEAMS")
PLAYERS_DIR = os.path.join(DATA_DIR, "PLAYERS")

teams_pages = os.listdir(TEAMS_DIR)
teams_pages =  [os.path.join(TEAMS_DIR, f) for f in teams_pages if f.endswith(".html")]

In [4]:
def parse_html(page):
    with open(page, encoding=None, errors="replace") as f:
        html = f.read()

    soup = BeautifulSoup(html, features="html.parser")

    return soup

In [23]:
def find_team(soup):
    selector = "div.TeamHeader_name__MmHlP"
    element = soup.select_one(selector)

    if element:
        text_content = element.get_text()
        return text_content
    else:
        return "Team not found"


def find_region(soup):
    selector = "div.TeamHeader_record__wzofp"
    element = soup.select_one(selector)
    
    if element:
        text_content = element.get_text().split()[-1]
        return text_content
    else:
        return "Region not found"


def find_players_info(soup):
    selector = "div.Crom_container__C45Ti"
    table_element = soup.select_one(selector)
    
    if table_element:
        rows = table_element.find_all('tr')
        table = []
        
        for row in rows:
            cells = row.find_all('td')
            row_data = [cell.get_text(strip=True) for cell in cells]
            table.append(row_data)
            
        return table
    else:
        return "Table element not found"


def find_head_coach(soup):
    soup = parse_html(teams_pages[0])
    selector = "#__next > div.Layout_base__6IeUC.Layout_justNav__2H4H0 > div.Layout_mainContent__jXliI > main > div.MaxWidthContainer_mwc__ID5AG > section.Block_block__62M07.nba-stats-content-block > div > div.w-full.sm\:w-1\/3.px-0.mx-0.mt-5.mb-1 > div > div.Crom_container__C45Ti.crom-container > table > tbody > tr:nth-child(1) > td:nth-child(2)"
    #I know this isn't a good practice, but I'm not being able to choose the correct selector
    element = soup.select_one(selector)
    
    if element:
        text_content = element.get_text(strip=True)
        return text_content
    else:
        return "Coach not found"


def create_or_update_team(soup):
    team_name = find_team(soup)
    region = find_region(soup)
    head_coach = find_head_coach(soup)

    team, created = Team.objects.get_or_create(
        name=team_name,
        defaults={'head_coach': head_coach, 'region': region}
    )

    return team

def create_or_update_player(player_data, team):
    player_name,_, position, height, weight, birth_date_str , _, _, _, _= player_data

    player_name = player_name.lower()
    
    birth_date = datetime.strptime(birth_date, "%b %d, %Y").date()

    player, created = Player.objects.get_or_create(
        name=player_name,
        defaults={'position': position, 'height': height, 'weight': weight, 'birth_date': birth_date, 'team': team}
    )

    return player

def create_or_update_game(game_data, home_team, away_team):
    date_str, location = game_data

    date = parse_date(date_str)

    game, created = Game.objects.get_or_create(
        date=date,
        home_team=home_team,
        away_team=away_team,
        defaults={'location': location}
    )

    return game

import csv
from django.core.management.base import BaseCommand
from nba_stats.models import GameStatistics, Player, Game, Team
from datetime import datetime

class Command(BaseCommand):
    help = 'Import statistics from a CSV file'

    def add_arguments(self, parser):
        parser.add_argument('csv_file', type=str, help='Path to the CSV file')

    def handle(self, *args, **kwargs):
        csv_file_path = kwargs['csv_file']

        with open(csv_file_path, 'r') as file:
            reader = csv.DictReader(file)
            
            for row in reader:
                # Assuming your CSV columns match your model fields
                player, created = Player.objects.get_or_create(name=row['Name'])
                game, created = Game.objects.get_or_create(date=datetime.strptime(row['date'], "%Y-%m-%d"),
                                                           home_team=Team.objects.get(name=row['team']),
                                                           away_team=Team.objects.get(name=row['opp']))

                GameStatistics.objects.create(
                    game=game,
                    player=player,
                    MP=float(row['MP']),
                    FG=int(row['FG']),
                    FGA=int(row['FGA']),
                    ThreeP=int(row['3P']),
                    ThreePA=int(row['3PA']),
                    FT=int(row['FT']),
                    FTA=int(row['FTA']),
                    ORB=int(row['ORB']),
                    DRB=int(row['DRB']),
                    TRB=int(row['TRB']),
                    AST=int(row['AST']),
                    STL=int(row['STL']),
                    BLK=int(row['BLK']),
                    TOV=int(row['TOV']),
                    PF=int(row['PF']),
                    PTS=int(row['PTS']),
                    plus_minus=int(row['+/-']),
                    TS_percentage=float(row['TS%']),
                    eFG_percentage=float(row['eFG%']),
                    ThreePAr=float(row['3PAr']),
                    FTr=float(row['FTr']),
                    ORB_percentage=float(row['ORB%']),
                    DRB_percentage=float(row['DRB%']),
                    TRB_percentage=float(row['TRB%']),
                    AST_percentage=float(row['AST%']),
                    STL_percentage=float(row['STL%']),
                    BLK_percentage=float(row['BLK%']),
                    TOV_percentage=float(row['TOV%']),
                    USG_percentage=float(row['USG%']),
                    ORtg=int(row['ORtg']),
                    DRtg=int(row['DRtg']),
                    BPM=float(row['BPM'])
                )

        self.stdout.write(self.style.SUCCESS('Successfully imported data from CSV.'))


In [22]:
from game in team_games:
    soup = parse_html(game)
    create_or_update_team(soup)
    players = find_players_info(soup)
    for player in players:
        if len(player) > 0:
            create_or_update_player(player)
    

Quin Snyder


['Miles Norris',
 '#0',
 'F',
 '6-7',
 '220 lbs',
 'APR 15, 2000',
 '23',
 'R',
 'California-Santa Barbara',
 'Signed on 07/06/23']