In [1]:
import pandas as pd
import scipy.stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
teams=pd.read_csv("data/teams.csv")
score=pd.read_csv("data/games_summary_202021.csv")

In [3]:
teams=teams[teams["Season"]=="2020-21"]
teams=teams[teams["League"]=="B1"]

In [4]:
score=teams.merge(score,on="TeamId")

In [5]:
score=score.sort_values("ScheduleKey")
score=score.reset_index()

In [6]:
score_g=score[::2]
score_k=score[1::2]

In [7]:
win=[]

for pts_k,pts_g in zip(score_k["PTS"],score_g["PTS"]):
    if pts_k<pts_g:#0行目から始まるので不等号はこの向き
        win.append(1)
        win.append(0)
    else:
        win.append(0)
        win.append(1)
        
score["win"]=pd.DataFrame(win)     

In [8]:
#eFGを計算
eFG=[]
for f2gm,f3gm,f2ga,f3ga in zip(score["F2GM"],score["F3GM"],score["F2GA"],score["F3GA"]):
    eFG.append((f2gm+1.5*f3gm)/(f2ga+f3ga))
    
score["eFG"]=pd.DataFrame(eFG)

In [9]:
#フリースロー率を計算
FTR=[]
for fta,f2ga,f3ga in zip(score["FTA"],score["F2GA"],score["F3GA"]):
    FTR.append(fta/(f2ga+f3ga))
    
score["FTR"]=pd.DataFrame(FTR)

In [10]:
#オフェンスリバウンド率を計算
ORBP=[]
for orb_g,orb_k,drb_g,drb_k in zip(score_g["OR"],score_k["OR"],score_g["DR"],score_k["DR"]):
    ORBP.append(orb_g/(orb_g+drb_k))
    ORBP.append(orb_k/(orb_k+drb_g))
    
score["ORBP"]=pd.DataFrame(ORBP)

In [11]:
#ターンオーバー率を計算
TOP=[]
for tov,f2ga,f3ga,fta in zip(score["TO"],score["F2GA"],score["F3GA"],score["FTA"]):
    TOP.append(tov/(f2ga+f3ga+fta*0.44+tov))
    
score["TOP"]=pd.DataFrame(TOP)

In [12]:
#千葉・宇都宮・川崎・琉球のデータを抽出
best4=score[(score["NameShort"]=="千葉")|(score["NameShort"]=="宇都宮")|(score["NameShort"]=="川崎")|(score["NameShort"]=="琉球")]
best4=best4.reset_index()

In [31]:
#Four Factorsの相関係数を確認
best4.loc[:,["win","eFG","FTR","ORBP","TOP"]].corr()

Unnamed: 0,win,eFG,FTR,ORBP,TOP
win,1.0,0.401992,0.074832,0.241427,-0.118817
eFG,0.401992,1.0,-0.00917,0.042499,0.086154
FTR,0.074832,-0.00917,1.0,0.034259,0.262626
ORBP,0.241427,0.042499,0.034259,1.0,7.9e-05
TOP,-0.118817,0.086154,0.262626,7.9e-05,1.0


In [58]:
#Four Factorsを標準化
best4["eFG_z"]=pd.DataFrame(scipy.stats.zscore(best4["eFG"],ddof=1))
best4["FTR_z"]=pd.DataFrame(scipy.stats.zscore(best4["FTR"],ddof=1))
best4["ORBP_z"]=pd.DataFrame(scipy.stats.zscore(best4["ORBP"],ddof=1))
best4["TOP_z"]=pd.DataFrame(scipy.stats.zscore(best4["TOP"],ddof=1))

In [48]:
#ダミー変数を用意する
utsunomiya=[]
for team in best4["NameShort"]:
    if team=="宇都宮":
        utsunomiya.append(1)
    else:
        utsunomiya.append(0)

best4["utsunomiya"]=pd.DataFrame(utsunomiya)

In [49]:
#ダミー変数を用意する
kawasaki=[]
for team in best4["NameShort"]:
    if team=="川崎":
        kawasaki.append(1)
    else:
        kawasaki.append(0)

best4["kawasaki"]=pd.DataFrame(kawasaki)

In [50]:
#ダミー変数を用意する
ryukyu=[]
for team in best4["NameShort"]:
    if team=="琉球":
        ryukyu.append(1)
    else:
        ryukyu.append(0)

best4["ryukyu"]=pd.DataFrame(ryukyu)

In [59]:
#ロジスティック回帰モデルを立てる
model=smf.glm(formula="win~eFG_z+TOP_z+FTR_z+ORBP_z+eFG_z:utsunomiya+TOP_z:utsunomiya+FTR_z:utsunomiya+ORBP_z:utsunomiya+eFG_z:kawasaki+TOP_z:kawasaki+FTR_z:kawasaki+ORBP_z:kawasaki+eFG_z:ryukyu+TOP_z:ryukyu+FTR_z:ryukyu+ORBP_z:ryukyu",
              data=best4,family=sm.families.Binomial()).fit()
model.summary()

0,1,2,3
Dep. Variable:,win,No. Observations:,232.0
Model:,GLM,Df Residuals:,215.0
Model Family:,Binomial,Df Model:,16.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-86.04
Date:,"Sun, 22 Aug 2021",Deviance:,172.08
Time:,09:33:01,Pearson chi2:,210.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.9933,0.291,6.846,0.000,1.423,2.564
eFG_z,2.1067,0.534,3.943,0.000,1.060,3.154
TOP_z,-0.6561,0.418,-1.571,0.116,-1.475,0.163
FTR_z,0.2348,0.411,0.572,0.568,-0.571,1.040
ORBP_z,0.0619,0.398,0.155,0.876,-0.718,0.842
eFG_z:utsunomiya,-0.5925,0.617,-0.961,0.337,-1.801,0.616
TOP_z:utsunomiya,-0.2759,0.651,-0.424,0.672,-1.552,1.000
FTR_z:utsunomiya,0.2304,0.656,0.351,0.725,-1.056,1.516
ORBP_z:utsunomiya,0.2919,0.645,0.453,0.651,-0.972,1.556
