In [1]:
import pandas as pd
import numpy as np

import gc
from tqdm import tqdm
import time
import os
import warnings
from datetime import datetime

from chicken_dinner.pubgapi import PUBG
from chicken_dinner.constants import map_dimensions

import pymysql
from sqlalchemy import create_engine

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

warnings.filterwarnings(action = 'ignore')

path = os.getcwd()
data_path = os.path.join('/Users/thkim/Documents/pubg', 'data')

## Functions for web scrap vehicle object

In [6]:
def change_date_format(timestamp):
    timestamp = pd.Timestamp(timestamp).to_pydatetime().replace(tzinfo=None)
    return timestamp

In [7]:
def get_telemetry(match_id, map_name):

    current_match = pubg.match(match_id)
    telemetry = current_match.get_telemetry()

    if map_name == 'Tiger_Main':
        mapx, mapy = map_dimensions['Desert_Main']
    else:
        mapx, mapy = map_dimensions[map_name]
        
    return telemetry, mapy

In [8]:
def rename_column_names(column_names):
    if len(column_names.split('.')) > 2:
        return ('_').join(column_names.split('.')[-2:])
    else:
        return column_names.split('.')[-1]

In [13]:
def get_first_vehicle_df(telemetry, mapy):

    first_vehicle_sample_df = pd.DataFrame()
    used_id = []
    
    vehicles = telemetry.filter_by('log_vehicle_ride')

    # get first ride a vehicle dataframe of a match
    for vehicle in vehicles:
        if (vehicle['vehicle']['vehicle_type'] == 'WheeledVehicle') & (vehicle['common']['is_game'] > 0) & (vehicle['character']['name'] not in used_id):
            first_vehicle_sample_df = pd.concat([first_vehicle_sample_df, pd.json_normalize(vehicle.to_dict())], axis = 0, ignore_index = True)
            used_id.append(vehicle['character']['name'])

    # match table을 참조하는 Foreign key column
    first_vehicle_sample_df['match_id'] = match_id

    # rename columns
    first_vehicle_sample_df.columns = list(map(lambda x: rename_column_names(x), first_vehicle_sample_df.columns.tolist()))
    first_vehicle_sample_df = first_vehicle_sample_df.rename(columns = {'_D': 'log_created_time'})

    # DB에 삽입하기 위해 single string or None이 포함된 list를 string으로 변환
    first_vehicle_sample_df.loc[:, 'zone'] = first_vehicle_sample_df.loc[:, 'zone'].apply(lambda x: x[0] if len(x) > 0 else np.nan)

    # is_game의 0.1 값이 0.1000000001과 같은 값으로 저장되어 있어서 round 처리
    first_vehicle_sample_df.loc[:, 'is_game'] = round(first_vehicle_sample_df.loc[:, 'is_game'], 1)

    # map 시각화를 위해 y axis dimension 변경
    first_vehicle_sample_df.loc[:, 'location_y'] = mapy - first_vehicle_sample_df.loc[:, 'location_y']

    # date format 변경
    first_vehicle_sample_df.loc[:, 'log_created_time'] = first_vehicle_sample_df.loc[:, 'log_created_time'].apply(lambda x: change_date_format(x))

    first_vehicle_sample_df['num_fellow_passengers'] = first_vehicle_sample_df.loc[:, 'fellow_passengers'].apply(lambda x: len(x))

    # json 내 중복된 columns 제거(seat_index와 vehicle.seat_index가 동일)
    first_vehicle_sample_df = first_vehicle_sample_df.drop('seat_index', axis = 1)
    
    # 불필요한 컬럼 제거
    first_vehicle_sample_df = first_vehicle_sample_df.drop(['_T', 'fellow_passengers'], axis = 1)

    return first_vehicle_sample_df

## Get first vehicle table by match id's from match_data

In [14]:
%%time

# PUBG api authorize
pubg = PUBG(api_key, shard = 'kakao')

first_vehicle_df = pd.DataFrame()
match_json = pd.read_json(os.path.join(data_path, 'match_data_27.json'))

for idx in tqdm(range(len(match_json))):

    match_id = match_json['data.id'][idx]
    map_name = match_json['data.attributes.mapName'][idx]
    
    telemetry, mapy = get_telemetry(match_id, map_name)
    
    first_vehicle_df = pd.concat([first_vehicle_df, get_first_vehicle_df(telemetry, mapy)], axis = 0, ignore_index = True)
    
print(f'shape of Vehicle df: {first_vehicle_df.shape}')

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [04:48<00:00,  6.15s/it]


## Insert into database

In [None]:
# # local
# user = 'root'
# password = 'mysql'
# host = 'localhost'
# port = 3306
# database = 'pubg'

# gcp
user = ''
password = ''
host = ''
port = 
database = ''

In [None]:
def insert_data_to_db(data, table_name, connection, if_exist = 'append'):
    
    data.to_sql(index = False,
                name = table_name,
                con = connection,
                if_exists = if_exist,
                method = 'multi')

In [None]:
%%time

engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}', encoding = 'utf-8')
engine_conn = engine_connect()

insert_data_to_db(damage_df, 'damage', engine_conn)
engine_conn.close()