In [11]:
import pandas as pd
import numpy as np
df = pd.read_csv("./data/nba_valuation.csv", index_col=0)

In [12]:
# long Format
df_reset = df.reset_index() 
df_reset.rename(columns={"Team": "Year"}, inplace=True) 
df_long = df_reset.melt(id_vars="Year", var_name="Team", value_name="Valuation")  
df_long.to_csv("./data/nba_valuation_long.csv", index=False)

In [14]:
# add league average valuation
# リーグ平均をYearごとに計算して追加
df_long['Valuation'] = df_long['Valuation'].replace('[\$,]', '', regex=True).astype(float)
df_long['LeagueAverage'] = df_long.groupby('Year')['Valuation'].transform('mean')
df_long.to_csv("./data/nba_valuation_with_league_average.csv", index=False)

In [15]:
# Inflation Adjustment using CPI
cpi_data = {
    2011: 224.9, 2012: 229.6, 2013: 233.0, 2014: 236.7, 2015: 237.0,
    2016: 240.0, 2017: 245.1, 2018: 251.1, 2019: 255.7, 2020: 258.8,
    2021: 271.0, 2022: 292.7, 2023: 304.7, 2024: 313.7, 2025: 322.3
}

df_long['CPI'] = df_long['Year'].map(cpi_data)
cpi_2025 = cpi_data[2025]
df_long['RealValuation'] = df_long['Valuation'] * (cpi_2025 / df_long['CPI'])

# log
df_long['LogValuation'] = df_long['RealValuation'].apply(lambda x: pd.NA if x <= 0 else np.log(x))

df_long.to_csv("./data/nba_valuation_long_inflated.csv", index=False)

In [16]:
# merge star_transfer data to valuation data

# flags
df_transfer = pd.read_csv("./data/star_transfers.csv")
df_transfer["StarArrival"] = 1
df_arrivals = df_transfer[["ToTeam", "Year", "StarArrival"]].rename(columns={"ToTeam": "Team"})

# merge
df_merged = pd.merge(df_long, df_arrivals, how="left", on=["Team", "Year"])
df_merged["StarArrival"] = df_merged["StarArrival"].fillna(0).astype(int)
df_merged.to_csv("./data/nba_valuation_with_star_flag.csv", index=False)

In [17]:
# 分析用に、nba_valuation_with_league_average.csvをdataとして保存
df_merged.to_csv("./data/data.csv", index=False)