In [9]:
import pandas as pd
import pickle
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

import xgboost as xgb

## 4/7 경기 예측

In [76]:
filename = 'C:/workspace/p-tag/baseball_2022_for_20230407_vol2.csv'
data = pd.read_csv(filename,encoding='cp949')

In [83]:
baseball_data = data.copy()
baseball_data

Unnamed: 0,T_ID,VS_T_ID,PA,AB,RUN,OBP,OOO,Unnamed: 7
0,KT,LT,37.75,32.8125,4.125,0.331,0.25,
1,LT,KT,37.125,33.8125,3.9375,0.32,0.275,
2,OB,KIA,39.125,34.0625,4.5,0.345,0.266,
3,KIA,OB,39.5,34.9375,5.0,0.351,0.281,
4,SSG,HH,39.375,33.0625,6.0,0.369,0.274,
5,HH,SSG,37.6875,33.75,4.25,0.336,0.269,
6,SS,LG,38.1875,34.4375,3.9375,0.327,0.272,
7,LG,SS,39.25,35.1875,6.25,0.356,0.291,
8,WO,NC,41.4375,34.9375,5.375,0.371,0.268,
9,NC,WO,39.1875,34.5,3.8125,0.322,0.25,


In [84]:
baseball_data.columns

Index(['T_ID', 'VS_T_ID', 'PA', 'AB', 'RUN', 'OBP', 'OOO', 'Unnamed: 7'], dtype='object')

In [85]:
baseball_data['T_ID'].iloc[0]

'KT'

In [86]:
len(baseball_data['T_ID'])

10

In [87]:
team_list = ['HH', 'KIA', 'KT', 'LG', 'LT', 'NC', 'OB', 'SSG', 'SS', 'WO']
number = len(baseball_data['T_ID'])

for index in range(number):
    baseball_data['T_ID'] = baseball_data['T_ID'].replace(team_list[index],index)
    baseball_data['VS_T_ID'] = baseball_data['VS_T_ID'].replace(team_list[index],index)

In [88]:
baseball_data

Unnamed: 0,T_ID,VS_T_ID,PA,AB,RUN,OBP,OOO,Unnamed: 7
0,2,4,37.75,32.8125,4.125,0.331,0.25,
1,4,2,37.125,33.8125,3.9375,0.32,0.275,
2,6,1,39.125,34.0625,4.5,0.345,0.266,
3,1,6,39.5,34.9375,5.0,0.351,0.281,
4,7,0,39.375,33.0625,6.0,0.369,0.274,
5,0,7,37.6875,33.75,4.25,0.336,0.269,
6,8,3,38.1875,34.4375,3.9375,0.327,0.272,
7,3,8,39.25,35.1875,6.25,0.356,0.291,
8,9,5,41.4375,34.9375,5.375,0.371,0.268,
9,5,9,39.1875,34.5,3.8125,0.322,0.25,


In [89]:
test_x = baseball_data[['RUN', 'AB', 'PA', 'OOO', 'T_ID', 'VS_T_ID', 'OBP']]

In [90]:
# RandomForest pred probs 구하기
pickled_RandomForest_model = pickle.load(open('C:/workspace/p-tag/model/random_forest.pkl', 'rb'))
pred_probs_RandomForest = pickled_RandomForest_model.predict_proba(test_x)

# XGBoost pred probs 구하기
pickled_XGBoost_model = pickle.load(open('C:/workspace/p-tag/model/xgboost.pkl', 'rb'))
dtest = xgb.DMatrix(data=test_x)
percent = pickled_XGBoost_model.predict(dtest)
pred_probs_xgboost = np.array([arr for arr in zip(1-percent,percent)])

# GBM pred probs 구하기
pickled_GBM_model = pickle.load(open('C:/workspace/p-tag/model/GBM.pkl', 'rb'))
pred_probs_GBM = pickled_GBM_model.predict_proba(test_x)

# LightGBM pred probs 구하기
pickled_LightGBM_model = pickle.load(open('C:/workspace/p-tag/model/LightGBM.pkl', 'rb'))
pred_probs_LightGBM = pickled_LightGBM_model.predict_proba(test_x)

# DeepLearning pred probs 구하기
pickled_DeepLearning_model = pickle.load(open('C:/workspace/p-tag/model/DeepLearning.pkl', 'rb'))
pred_probs_DeepLearning = pickled_DeepLearning_model.predict(test_x)



In [91]:
# 각 모델들의 pred probs를 합산하여 승부 예측
total_pred_probs = (pred_probs_RandomForest + pred_probs_xgboost + pred_probs_GBM + pred_probs_LightGBM + pred_probs_DeepLearning)/5
preds = [1 if total_pred_probs[index][0] < total_pred_probs[index][1] else 0 for index in range(len(total_pred_probs))]
preds

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]

In [92]:
total_pred_probs

array([[0.79001257, 0.20998742],
       [0.75785758, 0.24214243],
       [0.509688  , 0.490312  ],
       [0.70583774, 0.29416226],
       [0.16851304, 0.83148695],
       [0.79396656, 0.20603344],
       [0.74650047, 0.25349954],
       [0.50005818, 0.49994182],
       [0.6468026 , 0.3531974 ],
       [0.81166141, 0.18833859]])

- KT vs LT : LT 승
- OB vs KIA : OB 승
- SSG vs HH : SSG 승
- SS vs LG : LG 승
- WO vs NC : WO 승

## 4/6 경기 예측

In [66]:
filename = 'C:/workspace/p-tag/baseball_2022_for_20230406_rev.csv'
data = pd.read_csv(filename,encoding='cp949')
data

Unnamed: 0,T_ID,VS_T_ID,PA,AB,RUN,OBP,OOO,Unnamed: 7
0,OB,NC,37.944444,33.722222,6.583333,0.321,0.249,
1,NC,OB,39.388889,34.277778,4.944444,0.356,0.272,
2,SS,HH,38.941176,33.588235,5.529412,0.335,0.254,
3,HH,SS,37.882353,34.058824,3.470588,0.299,0.238,
4,WO,LG,37.944444,34.055556,3.277778,0.29,0.22,
5,LG,WO,38.888889,34.555556,4.777778,0.33,0.264,


In [67]:
baseball_data = data.copy()

In [68]:
team_list = ['HH', 'KIA', 'KT', 'LG', 'LT', 'NC', 'OB', 'SSG', 'SS', 'WO']
number = len(team_list)

for index in range(number):
    baseball_data['T_ID'] = baseball_data['T_ID'].replace(team_list[index],index)
    baseball_data['VS_T_ID'] = baseball_data['VS_T_ID'].replace(team_list[index],index)

In [69]:
baseball_data

Unnamed: 0,T_ID,VS_T_ID,PA,AB,RUN,OBP,OOO,Unnamed: 7
0,6,5,37.944444,33.722222,6.583333,0.321,0.249,
1,5,6,39.388889,34.277778,4.944444,0.356,0.272,
2,8,0,38.941176,33.588235,5.529412,0.335,0.254,
3,0,8,37.882353,34.058824,3.470588,0.299,0.238,
4,9,3,37.944444,34.055556,3.277778,0.29,0.22,
5,3,9,38.888889,34.555556,4.777778,0.33,0.264,


In [70]:
test_x = baseball_data[['RUN', 'AB', 'PA', 'OOO', 'T_ID', 'VS_T_ID', 'OBP']]

In [71]:
# RandomForest pred probs 구하기
pickled_RandomForest_model = pickle.load(open('C:/workspace/p-tag/model/random_forest.pkl', 'rb'))
pred_probs_RandomForest = pickled_RandomForest_model.predict_proba(test_x)

# XGBoost pred probs 구하기
pickled_XGBoost_model = pickle.load(open('C:/workspace/p-tag/model/xgboost.pkl', 'rb'))
dtest = xgb.DMatrix(data=test_x)
percent = pickled_XGBoost_model.predict(dtest)
pred_probs_xgboost = np.array([arr for arr in zip(1-percent,percent)])

# GBM pred probs 구하기
pickled_GBM_model = pickle.load(open('C:/workspace/p-tag/model/GBM.pkl', 'rb'))
pred_probs_GBM = pickled_GBM_model.predict_proba(test_x)

# LightGBM pred probs 구하기
pickled_LightGBM_model = pickle.load(open('C:/workspace/p-tag/model/LightGBM.pkl', 'rb'))
pred_probs_LightGBM = pickled_LightGBM_model.predict_proba(test_x)

# DeepLearning pred probs 구하기
pickled_DeepLearning_model = pickle.load(open('C:/workspace/p-tag/model/DeepLearning.pkl', 'rb'))
pred_probs_DeepLearning = pickled_DeepLearning_model.predict(test_x)



In [72]:
# 각 모델들의 pred probs를 합산하여 승부 예측
total_pred_probs = (pred_probs_RandomForest + pred_probs_xgboost + pred_probs_GBM + pred_probs_LightGBM + pred_probs_DeepLearning)/5
preds = [1 if total_pred_probs[index][0] < total_pred_probs[index][1] else 0 for index in range(len(total_pred_probs))]
preds

[0, 0, 1, 0, 0, 0]

In [73]:
total_pred_probs

array([[0.51225582, 0.48774418],
       [0.65866678, 0.34133322],
       [0.39885111, 0.6011489 ],
       [0.87020795, 0.12979204],
       [0.80865611, 0.1913439 ],
       [0.71545524, 0.28454476]])

=> OB vs NC : NC 승         
=> SS vs HH : HH 승          
=> WO vs LG : WO 승             