## nba_apiからデータの取得

In [None]:
# %pip install nba_api

In [2]:
from nba_api.stats.endpoints import leaguedashplayerstats
import pandas as pd
import time

def fetch_season_stats(season):
    # "1980-81" の形式に整形
    season_str = f"{season}-{str(season+1)[-2:]}"
    print(f"Fetching season: {season_str}")

    try:
        stats = leaguedashplayerstats.LeagueDashPlayerStats(
            season=season_str,
            season_type_all_star='Regular Season',
            per_mode_detailed='PerGame'
        ).get_data_frames()[0]
        stats['SEASON'] = season_str
        return stats
    except Exception as e:
        print(f"Error in {season_str}: {e}")
        return pd.DataFrame()

# 10年区切りでデータ取得
all_data = pd.DataFrame()
for start_year in range(1980, 2024):  # 1980-81 ～ 2023-24
    df = fetch_season_stats(start_year)
    all_data = pd.concat([all_data, df], ignore_index=True)
    time.sleep(1)  # API制限対策

# 必要なカラムだけ抽出
df_use = all_data[['SEASON', 'PLAYER_NAME', 'PLAYER_ID', 'TEAM_ABBREVIATION', 'GP', 'MIN']].copy()

Fetching season: 1980-81
Fetching season: 1981-82
Fetching season: 1982-83
Fetching season: 1983-84
Fetching season: 1984-85
Fetching season: 1985-86
Fetching season: 1986-87
Fetching season: 1987-88
Fetching season: 1988-89
Fetching season: 1989-90
Fetching season: 1990-91
Fetching season: 1991-92
Fetching season: 1992-93
Fetching season: 1993-94
Fetching season: 1994-95
Fetching season: 1995-96
Fetching season: 1996-97




Fetching season: 1997-98
Fetching season: 1998-99
Fetching season: 1999-00
Fetching season: 2000-01
Fetching season: 2001-02
Fetching season: 2002-03
Fetching season: 2003-04
Fetching season: 2004-05
Fetching season: 2005-06
Fetching season: 2006-07
Fetching season: 2007-08
Fetching season: 2008-09
Fetching season: 2009-10
Fetching season: 2010-11
Fetching season: 2011-12
Fetching season: 2012-13
Fetching season: 2013-14
Fetching season: 2014-15
Fetching season: 2015-16
Fetching season: 2016-17
Fetching season: 2017-18
Fetching season: 2018-19
Fetching season: 2019-20
Fetching season: 2020-21
Fetching season: 2021-22
Fetching season: 2022-23
Fetching season: 2023-24


In [None]:
# %pip install tqdm

Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.




In [3]:
# test
from nba_api.stats.endpoints import commonplayerinfo
import pandas as pd
import time
import json
import os
from tqdm import tqdm

# df_use の先頭50人のPLAYER_IDを取得
test_ids = df_use['PLAYER_ID'].unique()[:50]

# キャッシュファイルのパス
CACHE_PATH = "player_info_cache.json"

# 既に取得済みならそれをロード
if os.path.exists(CACHE_PATH):
    with open(CACHE_PATH, "r") as f:
        player_info_dict = json.load(f)
else:
    player_info_dict = {}

# データ取得関数
def get_player_height_and_position(player_id):
    try:
        info = commonplayerinfo.CommonPlayerInfo(player_id=player_id).get_data_frames()[0]
        height = info['HEIGHT'].values[0]  # 例: "6-5"
        position = info['POSITION'].values[0]  # 例: "G-F"
        return height, position
    except:
        return None, None

# 選手情報取得ループ（キャッシュ付き）
for pid in tqdm(test_ids):
    pid_str = str(pid)
    if pid_str in player_info_dict:
        continue
    height, pos = get_player_height_and_position(pid)
    player_info_dict[pid_str] = {'HEIGHT': height, 'POSITION': pos}
    time.sleep(0.5)
    with open(CACHE_PATH, "w") as f:
        json.dump(player_info_dict, f)

# 結果をDataFrameに変換
info_df = pd.DataFrame.from_dict(player_info_dict, orient='index').reset_index().rename(columns={'index': 'PLAYER_ID'})
info_df['PLAYER_ID'] = info_df['PLAYER_ID'].astype(int)  # マージのため型揃える
df_test = pd.merge(df_use, info_df, on='PLAYER_ID', how='left')


100%|██████████| 50/50 [00:37<00:00,  1.33it/s]


In [None]:
# # フル
# from nba_api.stats.endpoints import commonplayerinfo

# def get_player_height_and_position(player_id):
#     try:
#         info = commonplayerinfo.CommonPlayerInfo(player_id=player_id).get_data_frames()[0]
#         height = info['HEIGHT'].values[0]  # 例: "6-5"
#         position = info['POSITION'].values[0]  # 例: "G-F"
#         return height, position
#     except:
#         return None, None

# # 一意のPLAYER_IDでループ
# from tqdm import tqdm

# player_info_dict = {}
# for pid in tqdm(df_use['PLAYER_ID'].unique()):
#     height, pos = get_player_height_and_position(pid)
#     player_info_dict[pid] = {'HEIGHT': height, 'POSITION': pos}
#     time.sleep(0.5)  # API制限回避のためのウェイト

# # DataFrame化してマージ
# info_df = pd.DataFrame.from_dict(player_info_dict, orient='index').reset_index().rename(columns={'index': 'PLAYER_ID'})
# df_final = pd.merge(df_use, info_df, on='PLAYER_ID', how='left')

 24%|██▍       | 649/2652 [48:40<15:04:17, 27.09s/it]

In [6]:
# フル（キャッシュ付き）

from nba_api.stats.endpoints import commonplayerinfo
import time
import json
import os
from tqdm import tqdm
import pandas as pd

# キャッシュファイルの保存先
CACHE_PATH = "player_info_cache.json"

# データ準備（前提：df_useが全期間のプレイヤーデータ）
player_ids = df_use['PLAYER_ID'].unique()

# キャッシュがあればロード
if os.path.exists(CACHE_PATH):
    with open(CACHE_PATH, "r") as f:
        player_info_dict = json.load(f)
else:
    player_info_dict = {}

# プレイヤー情報取得関数
def get_player_height_and_position(player_id):
    try:
        info = commonplayerinfo.CommonPlayerInfo(player_id=player_id).get_data_frames()[0]
        height = info['HEIGHT'].values[0]
        position = info['POSITION'].values[0]
        return height, position
    except:
        return None, None

# 本番ループ（未取得のみ）
for pid in tqdm(player_ids):
    pid_str = str(pid)
    if pid_str in player_info_dict:
        continue  # キャッシュ済みはスキップ
    height, pos = get_player_height_and_position(pid)
    player_info_dict[pid_str] = {'HEIGHT': height, 'POSITION': pos}
    time.sleep(0.6)  # API対策でゆっくり
    with open(CACHE_PATH, "w") as f:
        json.dump(player_info_dict, f)


 22%|██▏       | 587/2652 [20:56<15:00:01, 26.15s/it]

In [None]:
# dict → DataFrame化してマージ
info_df = pd.DataFrame.from_dict(player_info_dict, orient='index').reset_index().rename(columns={'index': 'PLAYER_ID'})
info_df['PLAYER_ID'] = info_df['PLAYER_ID'].astype(int)  # マージのため型合わせ
df_merged = pd.merge(df_use, info_df, on='PLAYER_ID', how='left')

In [None]:
# inch->cm
def height_to_cm(height_str):
    try:
        feet, inches = map(int, height_str.split('-'))
        return round((feet * 12 + inches) * 2.54, 1)
    except:
        return None

def simplify_position(pos_str):
    try:
        if 'Guard' in pos_str:
            return 'PG' if 'Point' in pos_str else 'G'
        elif 'Forward' in pos_str:
            return 'F'
        elif 'Center' in pos_str:
            return 'C'
    except:
        return None

df_merged['HEIGHT_CM'] = df_merged['HEIGHT'].apply(height_to_cm)
df_merged['POSITION_SIMPLE'] = df_merged['POSITION'].apply(simplify_position)

In [None]:
# Lv.1 / Lv.2 / Lv.1&2 フィルター用のフラグ追加
df_merged['Lv1'] = df_merged['GP'] >= 41
df_merged['Lv2'] = df_merged['MIN'] >= 20
df_merged['Lv1_and_Lv2'] = df_merged['Lv1'] & df_merged['Lv2']

In [None]:
# confirm
df_merged[['SEASON', 'PLAYER_NAME', 'HEIGHT', 'HEIGHT_CM', 'POSITION', 'POSITION_SIMPLE', 'Lv1', 'Lv2', 'Lv1_and_Lv2']].head(10)