# ベイジアンネットワーク

## 流れ
1. データ取得
2. 要約統計量の算出
3. 犠打数,得点数,出塁率,打率,長打率の相関係数を算出
4. 犠打数と得点数の出塁率を固定した偏相関を算出
5. ベイジアンネットワークを作成

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import japanize_matplotlib
import statsmodels as sm

# ベイジアンネットワーク用のライブラリは後で追加する

# データ取得

In [4]:
df = pd.read_csv('team_batting_14_23.csv')

# 先頭数行確認
df.head(10)

Unnamed: 0,チーム,打 率,試 合,打 席,打 数,得 点,安 打,二 塁 打,三 塁 打,本 塁 打,...,犠 打,犠 飛,四 球,故 意 四,死 球,三 振,併 殺 打,長 打 率,出 塁 率,年度
0,オリックス,0.25,143,5324,4782,508,1194,211,17,109,...,83,25,371,8,63,986,106,0.369,0.311,2023
1,ソフトバンク,0.248,143,5454,4786,536,1185,195,15,104,...,107,38,470,14,53,1053,88,0.36,0.319,2023
2,楽　天,0.244,143,5369,4667,513,1140,168,25,104,...,125,33,490,9,52,937,108,0.358,0.321,2023
3,ロッテ,0.239,143,5414,4744,505,1135,220,12,100,...,116,39,453,20,62,1011,79,0.354,0.311,2023
4,西　武,0.233,143,5222,4672,435,1088,188,21,90,...,90,28,387,9,45,1045,97,0.34,0.296,2023
5,日本ハム,0.231,143,5248,4688,464,1082,195,18,100,...,84,25,397,10,54,1111,78,0.344,0.297,2023
6,巨　人,0.252,143,5352,4826,523,1218,204,14,164,...,93,31,365,38,37,1111,94,0.402,0.308,2023
7,DeNA,0.247,143,5331,4783,520,1182,230,18,105,...,106,31,355,22,56,846,103,0.369,0.305,2023
8,阪　神,0.247,143,5479,4775,555,1180,179,34,84,...,106,47,494,35,57,1173,92,0.352,0.322,2023
9,広　島,0.246,143,5241,4728,493,1165,197,18,96,...,96,20,349,25,48,1032,115,0.357,0.304,2023


## 要約統計量算出

In [5]:
df.describe()

Unnamed: 0,打 率,試 合,打 席,打 数,得 点,安 打,二 塁 打,三 塁 打,本 塁 打,塁 打,...,犠 打,犠 飛,四 球,故 意 四,死 球,三 振,併 殺 打,長 打 率,出 塁 率,年度
count,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,...,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0
mean,0.251017,140.8,5320.9,4697.683333,554.05,1179.85,203.258333,21.3,117.333333,1777.708333,...,102.633333,30.233333,440.933333,16.766667,49.25,1020.041667,95.966667,0.378275,0.319875,2018.5
std,0.010391,6.968838,290.064768,256.780912,72.770602,90.213253,21.671922,6.512561,30.172427,163.582652,...,24.661723,6.810439,62.001997,8.685672,11.766103,90.002651,12.618469,0.025603,0.013187,2.884324
min,0.229,120.0,4437.0,3840.0,390.0,902.0,152.0,7.0,62.0,1365.0,...,48.0,15.0,306.0,2.0,22.0,811.0,50.0,0.326,0.285,2014.0
25%,0.24475,143.0,5305.5,4704.75,505.75,1145.25,188.75,17.0,94.75,1676.75,...,85.75,26.0,399.75,10.0,42.75,956.0,86.75,0.3595,0.31075,2016.0
50%,0.2505,143.0,5386.0,4775.0,550.0,1190.0,205.0,20.5,111.5,1773.0,...,102.5,30.0,442.0,15.5,48.0,1022.0,95.0,0.3775,0.32,2018.5
75%,0.257,143.0,5469.0,4826.5,594.5,1232.25,217.0,26.0,135.25,1873.75,...,115.0,34.0,484.0,22.25,56.25,1084.5,105.0,0.39625,0.329,2021.0
max,0.28,144.0,5663.0,5023.0,792.0,1401.0,260.0,39.0,202.0,2241.0,...,178.0,47.0,599.0,40.0,87.0,1234.0,129.0,0.454,0.352,2023.0


## 興味のある変数を取り出して相関係数を算出

In [7]:
selected = df[['打  率', '得  点', '犠  打', '出 塁 率', '長 打 率']]
selected.corr()

Unnamed: 0,打 率,得 点,犠 打,出 塁 率,長 打 率
打 率,1.0,0.733783,0.178846,0.775797,0.704033
得 点,0.733783,1.0,0.054809,0.785257,0.826041
犠 打,0.178846,0.054809,1.0,0.122307,-0.178494
出 塁 率,0.775797,0.785257,0.122307,1.0,0.655911
長 打 率,0.704033,0.826041,-0.178494,0.655911,1.0
