In [1]:
import pandas as pd
import numpy as np

import gc
from tqdm import tqdm
import time
import os
import warnings
from datetime import datetime

from chicken_dinner.pubgapi import PUBG
from chicken_dinner.constants import map_dimensions

import pymysql
from sqlalchemy import create_engine

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

warnings.filterwarnings(action = 'ignore')

path = os.getcwd()
data_path = os.path.join('/Users/thkim/Documents/pubg', 'data')

## functions for web scrap damage object

In [15]:
def change_date_format(timestamp):
    timestamp = pd.Timestamp(timestamp).to_pydatetime().replace(tzinfo=None)
    return timestamp

In [8]:
def get_telemetry(match_id, map_name):

    current_match = pubg.match(match_id)
    telemetry = current_match.get_telemetry()

    if map_name == 'Tiger_Main':
        mapx, mapy = map_dimensions['Desert_Main']
    else:
        mapx, mapy = map_dimensions[map_name]
        
    return telemetry, mapy

In [20]:
def get_damage_df(telemetry, mapy):

    damage_sample_df = pd.DataFrame()
    damages = telemetry.filter_by('log_player_take_damage')

    # get damage dataframe of a match
    for damage in damages:
        damage_sample_df = pd.concat([damage_sample_df, pd.json_normalize(damage.to_dict())], axis = 0, ignore_index = True)

    # match table을 참조하는 Foreign key column
    damage_sample_df['match_id'] = match_id

    # rename columns
    damage_sample_df.columns = list(map(lambda x: x.replace('.', '_'), damage_sample_df.columns.tolist()))
    damage_sample_df = damage_sample_df.rename(columns = {'_D': 'log_created_time'
                                                         ,'common_is_game': 'is_game'})

    # DB에 삽입하기 위해 single string or None이 포함된 list를 string으로 변환
    damage_sample_df.loc[:, 'attacker_zone'] = damage_sample_df.loc[:, 'attacker_zone'][damage_sample_df.loc[:, 'attacker_zone'].notnull()].apply(lambda x: x[0] if len(x) > 0 else np.NaN)
    damage_sample_df.loc[:, 'victim_zone'] = damage_sample_df.loc[:, 'victim_zone'].apply(lambda x: x[0] if len(x) > 0 else np.NaN)

    # is_game의 0.1 값이 0.1000000001과 같은 값으로 저장되어 있어서 round 처리
    damage_sample_df.loc[:, 'is_game'] = round(damage_sample_df.loc[:, 'is_game'], 1)

    # map 시각화를 위해 y axis dimension 변경
    damage_sample_df.loc[:, 'attacker_location_y'] = mapy - damage_sample_df.loc[:, 'attacker_location_y']
    damage_sample_df.loc[:, 'victim_location_y'] = mapy - damage_sample_df.loc[:, 'victim_location_y']

    # date format 변경
    damage_sample_df.loc[:, 'log_created_time'] = damage_sample_df.loc[:, 'log_created_time'].apply(lambda x: change_date_format(x))

    # 'None' string으로 저장된 Null values를 np.nan으로 변경
    damage_sample_df.loc[damage_sample_df['damage_reason'] == 'None', 'damage_reason'] = np.nan

    # 불필요한 컬럼 제거
    damage_sample_df = damage_sample_df.drop(['_T', 'attacker', 'attack_id'], axis = 1)

    return damage_sample_df

## Get damage table by match id's from match_data

In [21]:
# PUBG api authorize
pubg = PUBG(api_key, shard = 'kakao')

damage_df = pd.DataFrame()
match_json = pd.read_json(os.path.join(data_path, 'match_data_27.json'))

for idx in tqdm(range(len(match_json))):

    match_id = match_json['data.id'][idx]
    map_name = match_json['data.attributes.mapName'][idx]
    
    telemetry, mapy = get_telemetry(match_id, map_name)
    
    damage_df = pd.concat([damage_df, get_damage_df(telemetry, mapy)], axis = 0, ignore_index = True)
    
print(f'shape of Damage df: {damage_df.shape}')

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [17:23<00:00, 22.20s/it]


## Insert into database

In [None]:
# # local
# user = 'root'
# password = 'mysql'
# host = 'localhost'
# port = 3306
# database = 'pubg'

# gcp
user = ''
password = ''
host = ''
port = 
database = ''

In [None]:
def insert_data_to_db(data, table_name, connection, if_exist = 'append'):
    
    data.to_sql(index = False,
                name = table_name,
                con = connection,
                if_exists = if_exist,
                method = 'multi')

In [None]:
engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}', encoding = 'utf-8')
engine_conn = engine_connect()

insert_data_to_db(damage_df, 'damage', engine_conn)
engine_conn.close()