In [148]:
import pymysql
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine
import yaml

In [149]:
# db정보 가져오기
with open('../yamls/sql_info.yaml') as f:

    info = yaml.load(f, Loader=yaml.FullLoader)

host = info['MARIADB']['IP']
user = info['MARIADB']['USER']
passwd=info['MARIADB']['PASSWD']
db = info['MARIADB']['DB']
port = info['MARIADB']['PORT']

# 선수 테이블 생성하기 

In [170]:
conn = pymysql.connect(host = host, user = user, passwd=passwd, db = db, charset='utf8', port = port,cursorclass=pymysql.cursors.DictCursor)
cur = conn.cursor()
sql = '''select * from batting_info
'''
cur.execute(sql)

result = cur.fetchall()
batting = pd.DataFrame(result)

sql = '''select * from pitching_info
'''
cur.execute(sql)

result = cur.fetchall()
pitching = pd.DataFrame(result)

conn.close()

In [171]:
# 투수, 타자 df에서 필요한 컬럼만 가져오기
batting = batting[['player_name','player_birth']].drop_duplicates(['player_name','player_birth'])
pitching = pitching[['player_name','player_birth']].drop_duplicates(['player_name','player_birth'])

# 투수, 타자 정보 concat 후 중복제거하기
player = pd.concat([batting,pitching]).drop_duplicates(['player_name','player_birth'])
player = pd.concat([batting,pitching])

# 완성된 데이터 적재 
engine = create_engine(f"mysql+pymysql://{user}:{passwd}@{host}:{port}/{db}?charset=utf8")
conn = engine.connect()
player.to_sql(name = 'player_info', con = engine, if_exists = 'append', index=False)
conn.close()

In [163]:
batting = batting[['player_name','player_birth','team']].drop_duplicates(['player_name','player_birth','team'])
pitching = pitching[['player_name','player_birth','team']].drop_duplicates(['player_name','player_birth','team'])

In [167]:
batting = batting[['player_name','player_birth']].drop_duplicates(['player_name','player_birth'])
pitching = pitching[['player_name','player_birth']].drop_duplicates(['player_name','player_birth'])

In [156]:
173 + 194 

367

In [169]:
pitching

Unnamed: 0,player_name,player_birth
0,강재민,1997-04-03
1,고봉재,1993-05-14
2,고효준,1983-02-08
3,구승민,1990-06-12
4,김광현,1988-07-22
...,...,...
711,원태인,2000-04-06
730,최이준,1999-04-10
768,이승호,1999-02-08
775,장현식,1995-02-24


## week 컬럼생성

In [21]:

conn = pymysql.connect(host = host, user = user, passwd=passwd, db = db, charset='utf8', port = port,cursorclass=pymysql.cursors.DictCursor)
cur = conn.cursor()
sql = '''select * from batting_info
'''
cur.execute(sql)

result = cur.fetchall()
batting = pd.DataFrame(result)

sql = '''select * from pitching_info
'''
cur.execute(sql)

result = cur.fetchall()
pitching = pd.DataFrame(result)

conn.close()




In [136]:
# 규정 타석, 이닝을 계산하기위한 주별 팀 경기수 변수 생성
team_games = batting.groupby(['week','team','yyyymmdd']).count().reset_index()
team_games = team_games.groupby(['week','team']).count().reset_index()[['week','team','yyyymmdd']]
team_games['game_count'] = team_games['yyyymmdd']
team_games = team_games[['week','team','game_count']]

In [137]:
# isocalendar 모듈을 사용하여 week 계산
batting['week'] = batting['yyyymmdd'].astype(str).apply(lambda x :  x[0:4] + str(datetime.strptime(x[0:4] + '-'+ x[4:6]+'-'+x[6:8] ,'%Y-%m-%d').isocalendar().week))
pitching['week'] = pitching['yyyymmdd'].astype(str).apply(lambda x :  x[0:4] + str(datetime.strptime(x[0:4] + '-'+ x[4:6]+'-'+x[6:8] ,'%Y-%m-%d').isocalendar().week))



# 주별 합산 지표 컬럼만 추리기
batting_week = batting.groupby(['week','player_name','player_birth','team']).sum()[['TPA','AB','R','H', 'HR','RBI','BB', 'HBP','SO','GO','FO','PIT','GDP','LOB']].reset_index()
pitching_week = pitching.groupby(['week','player_name','player_birth','team']).sum()[['today_type','IP','TBF','H','R','ER','BB','HBP','K','HR']]

In [138]:
batting_week = batting_week.merge(team_games, on = ['week', 'team'], how = 'left')
pitching_week = pitching_week.merge(team_games, on = ['week', 'team'], how = 'left')

In [144]:
batting_week['RTPA'] = batting_week['game_count']*3.1
batting_week['AVG'] = batting_week['H']/batting_week['AB']
batting_week['AVG'] =batting_week['AVG'].fillna(0.0) 
batting_week = batting_week[['week', 'player_name', 'player_birth', 'team', 'TPA','RTPA', 'AB', 'R', 'H',
       'HR', 'RBI', 'BB', 'HBP', 'SO', 'GO', 'FO', 'PIT', 'GDP', 'LOB',
         'AVG','game_count']]


pitching_week['RIP'] = pitching_week['game_count'] * 1.0 

[1,
 1,
 0,
 1,
 1,
 5,
 0,
 1,
 0,
 0,
 1,
 5,
 1,
 0,
 0,
 2,
 1,
 2,
 1,
 1,
 2,
 6,
 1,
 3,
 0,
 2,
 0,
 0,
 0,
 2,
 3,
 2,
 2,
 6,
 1,
 2,
 1,
 1,
 2,
 5,
 1,
 6,
 4,
 6,
 5,
 1,
 1,
 1,
 1,
 1,
 0,
 5,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 2,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 7,
 1,
 1,
 5,
 1,
 2,
 0,
 1,
 5,
 2,
 7,
 1,
 12,
 1,
 4,
 3,
 2,
 1,
 0,
 2,
 6,
 4,
 3,
 5,
 1,
 1,
 2,
 0,
 2,
 1,
 5,
 1,
 1,
 2,
 0,
 1,
 1,
 2,
 7,
 1,
 1,
 2,
 6,
 6,
 2,
 1,
 4,
 0,
 0,
 2,
 6,
 0,
 6,
 1,
 1,
 6,
 1,
 8,
 3,
 2,
 11,
 3,
 7,
 6,
 6,
 1,
 0,
 0,
 7,
 6,
 7,
 5,
 0,
 0,
 7,
 5,
 1,
 3,
 1,
 0,
 5,
 2,
 0,
 0,
 3,
 3,
 3,
 3,
 0,
 3,
 1,
 0,
 0,
 3,
 2,
 3,
 1,
 5,
 0,
 4,
 1,
 1,
 3,
 4,
 1,
 1,
 3,
 1,
 3,
 5,
 6,
 1,
 1,
 7,
 5,
 8,
 11,
 2,
 1,
 2,
 1,
 5,
 2,
 2,
 4,
 1,
 11,
 3,
 4,
 0,
 5,
 2,
 1,
 7,
 3,
 8,
 2,
 2,
 1,
 1,
 4,
 2,
 2,
 2,
 5,
 4,
 2,
 2,
 2,
 3,
 0,
 2,
 1,
 5,
 1,
 3,
 3,
 0,
 2,
 2,
 2,
 5,
 2,
 1,
 7,
 5,
 6,
 1,
 5,
 0,
 5,
 1,
 2,
 3,
 1,
 4,
 5,
 2,


In [126]:
engine = create_engine(f"mysql+pymysql://{user}:{passwd}@{host}:{port}/{db}?charset=utf8")
conn = engine.connect()
batting_week.to_sql(name = 'weekly_batting_info', con = engine, if_exists = 'append', index=False)
conn.close()

In [130]:
pitching.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,871,872,873,874,875,876,877,878,879,880
yyyymmdd,20230401,20230401,20230401,20230401,20230401,20230401,20230401,20230401,20230401,20230401,...,20230427,20230427,20230427,20230427,20230427,20230427,20230427,20230427,20230427,20230427
player_name,강재민,고봉재,고효준,구승민,김광현,김기훈,김도규,김명신,김민,김범수,...,임창민,장현식,전상현,정해영,조이현,최민준,플럿코,하영민,함덕주,후라도
player_birth,1997-04-03,1993-05-14,1983-02-08,1990-06-12,1988-07-22,2000-01-03,1998-07-11,1993-11-29,1999-04-14,1995-10-03,...,1985-08-25,1995-02-24,1996-04-18,2001-08-23,1995-06-27,1999-06-11,1991-10-03,1995-05-07,1995-01-13,1996-01-30
team,한화,두산,SSG,롯데,SSG,KIA,롯데,두산,KT,한화,...,키움,KIA,KIA,KIA,KT,SSG,LG,키움,LG,키움
today_type,,,홀,블론,승,,,,,,...,홀,,승,,패,패,승,홀,홀,승
IP,1,1,1,0,5,0,0,0,0,1,...,1,1,1,1,2,1,6,1,1,5
TBF,3,3,3,6,22,4,3,5,2,5,...,5,6,4,3,12,6,27,5,3,25
H,0,0,1,2,4,0,2,2,0,3,...,2,2,1,0,5,3,6,1,0,4
R,0,0,0,2,1,0,2,3,1,0,...,0,0,0,0,3,1,3,0,0,1
ER,0,0,0,1,1,0,2,3,1,0,...,0,0,0,0,2,1,3,0,0,0
