In [1]:
!pip install nba_api
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.sql.functions import lit-+-+-
from pyspark.sql.functions import when

from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams

encoding = 'utf-8'


Collecting nba_api
  Downloading nba_api-1.4.1-py3-none-any.whl.metadata (5.6 kB)
Downloading nba_api-1.4.1-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.7/261.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.4.1


In [5]:
# Spark 세션 생성
spark = SparkSession.builder \
    .appName("NBA Championship Analysis") \
    .getOrCreate()

# NBA 팀 정보 가져오기
nba_teams = teams.get_teams()

# NBA 우승 팀 데이터 프레임 생성
champion_teams = spark.createDataFrame(nba_teams)

# 우승 팀 목록
champions = ["Los Angeles Lakers", "Toronto Raptors", "Golden State Warriors", "Cleveland Cavaliers",
             "San Antonio Spurs", "Miami Heat", "Dallas Mavericks", "Los Angeles Lakers",
             "Boston Celtics", "San Antonio Spurs", "Miami Heat"]

# 해당 우승 팀이 우승한 연도
champion_years = list(range(2010, 2021))

# 우승 팀과 우승 연도를 데이터 프레임에 추가
champion_teams = champion_teams.withColumn("Champion", champion_teams['full_name'].isin(champions).cast("int"))
champion_teams = champion_teams.withColumn("ChampionYear",lit(0))  # 우승 연도를 0으로 초기화
for year in champion_years:
    champion_teams = champion_teams.withColumn("ChampionYear",
                                               when(champion_teams['full_name'] == champions[champion_years.index(year)], year)
                                               .otherwise(champion_teams["ChampionYear"]))

# NBA API를 사용하여 각 시즌의 우승 팀 경기 결과 가져오기
for year in champion_years:
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=None, date_from_nullable=f'08/01/{year - 1}',
                                                   date_to_nullable=f'07/31/{year}')
    games = gamefinder.get_data_frames()[0]
    games = games.filter(games.MATCHUP.str.contains('@'))  # 우승 팀이 원정 경기를 한 경기만 필터링
    champion_teams = champion_teams.withColumn(f"GamesPlayed_{year}", games.count())

# 결과 출력
champion_teams.select("full_name", "ChampionYear", *[f"GamesPlayed_{year}" for year in champion_years]).show()

# Spark 세션 종료
spark.stop()


PySparkTypeError: [NOT_COLUMN] Argument `col` should be a Column, got Series.

In [6]:
for year in champion_years:
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=None, date_from_nullable=f'08/01/{year - 1}',
                                                   date_to_nullable=f'07/31/{year}')
    games = gamefinder.get_data_frames()[0]
    games = games.filter(games.MATCHUP.str.contains('@'))  # 우승 팀이 원정 경기를 한 경기만 필터링
    print(games)
    #champion_teams = champion_teams.withColumn(f"GamesPlayed_{year}", games.count())

Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...]

[4463 rows x 0 columns]
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...]

[4112 rows x 0 columns]
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15