## 라이브러리 설치

In [3]:
!pip install nba_api
from pyspark.sql import SparkSession, SQLContext, Row, SparkSession

encoding = 'utf-8'

Collecting nba_api
  Downloading nba_api-1.4.1-py3-none-any.whl.metadata (5.6 kB)
Downloading nba_api-1.4.1-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.7/261.7 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.4.1


## 선수 정보 확인

### 기본 정보 확인
- 르브론 제임스의 정보 확인

In [5]:
#from nba_api.stats.static import players

p_profile = players.find_players_by_full_name("Lebron James")
print(p_profile[0])
p_id = p_profile[0]['id']

{'id': 2544, 'full_name': 'LeBron James', 'first_name': 'LeBron', 'last_name': 'James', 'is_active': True}


### 선수의 상세 정보 확인
- 위 코드에서 알게된 선수 ID를 이용해서 상세 정보를 확인할 수 있다

In [6]:
from nba_api.stats.endpoints import commonplayerinfo

player_info = commonplayerinfo.CommonPlayerInfo(player_id=p_id)
p_info = player_info.get_data_frames()
print(p_info[0])

   PERSON_ID FIRST_NAME LAST_NAME DISPLAY_FIRST_LAST DISPLAY_LAST_COMMA_FIRST  \
0       2544     LeBron     James       LeBron James            James, LeBron   

  DISPLAY_FI_LAST   PLAYER_SLUG            BIRTHDATE  \
0        L. James  lebron-james  1984-12-30T00:00:00   

                         SCHOOL COUNTRY  ...    PLAYERCODE FROM_YEAR TO_YEAR  \
0  St. Vincent-St. Mary HS (OH)     USA  ...  lebron_james      2003    2023   

   DLEAGUE_FLAG NBA_FLAG GAMES_PLAYED_FLAG DRAFT_YEAR DRAFT_ROUND  \
0             N        Y                 Y       2003           1   

   DRAFT_NUMBER GREATEST_75_FLAG  
0             1                Y  

[1 rows x 33 columns]


- 해당 코드는 선수의 정보를 list 형태로 보여준다. 리스트의 0번째 인덱스에는 이름, 키 몸무게, 출신학교 등 기본 프로필 정보를, 1번째 인덱스에는 경기 결과 통계정보를, 2번째 인덱스에는 출전했던 시즌의 id를 보여준다

### 경기 기록 정보 확인

In [7]:
from nba_api.stats.endpoints import playercareerstats

career = playercareerstats.PlayerCareerStats(player_id = p_id)
career_info = career.get_data_frames()
print(career_info[0])

    PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0        2544   2003-04        00  1610612739               CLE        19.0   
1        2544   2004-05        00  1610612739               CLE        20.0   
2        2544   2005-06        00  1610612739               CLE        21.0   
3        2544   2006-07        00  1610612739               CLE        22.0   
4        2544   2007-08        00  1610612739               CLE        23.0   
5        2544   2008-09        00  1610612739               CLE        24.0   
6        2544   2009-10        00  1610612739               CLE        25.0   
7        2544   2010-11        00  1610612748               MIA        26.0   
8        2544   2011-12        00  1610612748               MIA        27.0   
9        2544   2012-13        00  1610612748               MIA        28.0   
10       2544   2013-14        00  1610612748               MIA        29.0   
11       2544   2014-15        00  1610612739       

In [9]:
from pyspark.sql import SparkSession

#Create PySpark SparkSession
spark = SparkSession.builder \
    .master("local[1]") \
    .appName("NBA_data") \
    .getOrCreate()

#Create PySpark DataFrame from Pandas
sparkDF=spark.createDataFrame(career_info[0]) 
sparkDF.printSchema()
sparkDF.show()


root
 |-- PLAYER_ID: long (nullable = true)
 |-- SEASON_ID: string (nullable = true)
 |-- LEAGUE_ID: string (nullable = true)
 |-- TEAM_ID: long (nullable = true)
 |-- TEAM_ABBREVIATION: string (nullable = true)
 |-- PLAYER_AGE: double (nullable = true)
 |-- GP: long (nullable = true)
 |-- GS: long (nullable = true)
 |-- MIN: double (nullable = true)
 |-- FGM: long (nullable = true)
 |-- FGA: long (nullable = true)
 |-- FG_PCT: double (nullable = true)
 |-- FG3M: long (nullable = true)
 |-- FG3A: long (nullable = true)
 |-- FG3_PCT: double (nullable = true)
 |-- FTM: long (nullable = true)
 |-- FTA: long (nullable = true)
 |-- FT_PCT: double (nullable = true)
 |-- OREB: long (nullable = true)
 |-- DREB: long (nullable = true)
 |-- REB: long (nullable = true)
 |-- AST: long (nullable = true)
 |-- STL: long (nullable = true)
 |-- BLK: long (nullable = true)
 |-- TOV: long (nullable = true)
 |-- PF: long (nullable = true)
 |-- PTS: long (nullable = true)

+---------+---------+---------+--

In [10]:
sparkDF.printSchema()

root
 |-- PLAYER_ID: long (nullable = true)
 |-- SEASON_ID: string (nullable = true)
 |-- LEAGUE_ID: string (nullable = true)
 |-- TEAM_ID: long (nullable = true)
 |-- TEAM_ABBREVIATION: string (nullable = true)
 |-- PLAYER_AGE: double (nullable = true)
 |-- GP: long (nullable = true)
 |-- GS: long (nullable = true)
 |-- MIN: double (nullable = true)
 |-- FGM: long (nullable = true)
 |-- FGA: long (nullable = true)
 |-- FG_PCT: double (nullable = true)
 |-- FG3M: long (nullable = true)
 |-- FG3A: long (nullable = true)
 |-- FG3_PCT: double (nullable = true)
 |-- FTM: long (nullable = true)
 |-- FTA: long (nullable = true)
 |-- FT_PCT: double (nullable = true)
 |-- OREB: long (nullable = true)
 |-- DREB: long (nullable = true)
 |-- REB: long (nullable = true)
 |-- AST: long (nullable = true)
 |-- STL: long (nullable = true)
 |-- BLK: long (nullable = true)
 |-- TOV: long (nullable = true)
 |-- PF: long (nullable = true)
 |-- PTS: long (nullable = true)



In [11]:
sparkDF.select(sparkDF['MIN']).show()

+------+
|   MIN|
+------+
|3120.0|
|3388.0|
|3361.0|
|3190.0|
|3027.0|
|3054.0|
|2966.0|
|3063.0|
|2326.0|
|2877.0|
|2902.0|
|2493.0|
|2709.0|
|2795.0|
|3026.0|
|1937.0|
|2316.0|
|1504.0|
|2084.0|
|1954.0|
+------+
only showing top 20 rows



In [12]:
from nba_api.live.nba.endpoints import scoreboard

# Today's Score Board
games = scoreboard.ScoreBoard()

# json
games.get_json()

# dictionary
games.get_dict()

{'meta': {'version': 1,
  'request': 'https://nba-prod-us-east-1-mediaops-stats.s3.amazonaws.com/NBA/liveData/scoreboard/todaysScoreboard_00.json',
  'time': '2024-03-16 03:06:17.617',
  'code': 200},
 'scoreboard': {'gameDate': '2024-03-15',
  'leagueId': '00',
  'leagueName': 'National Basketball Association',
  'games': [{'gameId': '0022300961',
    'gameCode': '20240315/PHXCHA',
    'gameStatus': 3,
    'gameStatusText': 'Final',
    'period': 4,
    'gameClock': '',
    'gameTimeUTC': '2024-03-15T23:00:00Z',
    'gameEt': '2024-03-15T19:00:00Z',
    'regulationPeriods': 4,
    'ifNecessary': False,
    'seriesGameNumber': '',
    'gameLabel': '',
    'gameSubLabel': '',
    'seriesText': '',
    'seriesConference': '',
    'poRoundDesc': '',
    'gameSubtype': '',
    'homeTeam': {'teamId': 1610612766,
     'teamName': 'Hornets',
     'teamCity': 'Charlotte',
     'teamTricode': 'CHA',
     'wins': 17,
     'losses': 50,
     'score': 96,
     'seed': None,
     'inBonus': None,
 