In [1]:
import pandas as pd
import numpy as np
import requests
import gc
import time 
import warnings

from tqdm import tqdm
from pandas import json_normalize
from chicken_dinner.pubgapi import PUBG

warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

## Request Status Check

In [2]:
# api_key = 
accept = 'application/vnd.api+json'

In [3]:
url = 'https://api.pubg.com/shards/kakao/samples?filter[createdAt-start]=2022-09-27T00%3A00%3A00Z'
header = {
    'Authorization' : api_key,
    'Accept' : accept
}

request = requests.get(url, headers = header)
print(request)

<Response [200]>


## Match Id

In [4]:
# maximum is 2 weeks (13 days)

def get_match_id(start_date, end_date):
    json_lst = []
    i = 1
    for day in tqdm(range(start_date, end_date + 1)):
        if day < 10:
            url = f'https://api.pubg.com/shards/kakao/samples?filter[createdAt-start]=2022-09-0{day}T00%3A00%3A00Z'
        else:
            url = f'https://api.pubg.com/shards/kakao/samples?filter[createdAt-start]=2022-09-{day}T00%3A00%3A00Z'
        request = requests.get(url, headers = header)
        
        if request.status_code == 400:
            print(f"day {day} is not updated or outdated")
            return json_lst
        
        json_lst.append(request.json())
        print(f"day {day} done")
        if i % 10 == 0:
            print("waiting to refresh rate limit")
            time.sleep(60)
        i += 1
        print("status code: ", request.status_code)
    return json_lst
    

In [5]:
json_lst = get_match_id(27, 27)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.22it/s]

day 27 done
status code:  200





### get match_id

In [6]:
def get_match_id_df(match_json_lst):
    match_id_df = pd.DataFrame()
    for i in range(len(match_json_lst)):
        temp = pd.DataFrame(match_json_lst[i]["data"]["relationships"]["matches"]["data"])
        match_id_df = pd.concat([match_id_df, temp], axis= 0)
    return match_id_df.reset_index(drop = True)


In [7]:
match_id_df = get_match_id_df(json_lst)

In [8]:
url = "https://api.pubg.com/shards/kakao/matches/23c15aa7-d87d-494d-b926-9796940baed9"
header = {
    'Authorization' : api_key,
    'Accept' : accept
}
request = requests.get(url, headers = header)

In [9]:
match_data = pd.DataFrame()
for m_id in tqdm((match_id_df["id"])):
    url = f"https://api.pubg.com/shards/kakao/matches/{m_id}"
    request = requests.get(url, headers = header)
    if request.status_code == 200:
        match_data = pd.concat([match_data,json_normalize(request.json())], axis = 0)
    elif request.status_code == 404:
        print("The specified resource was not found")
match_json = match_data.loc[match_data["data.attributes.matchType"] == "competitive",:].reset_index(drop = True)
match_json[:2]

100%|█████████████████████████████████████████| 218/218 [00:10<00:00, 20.54it/s]


Unnamed: 0,included,data.type,data.id,data.attributes.createdAt,data.attributes.shardId,data.attributes.tags,data.attributes.mapName,data.attributes.isCustomMatch,data.attributes.duration,data.attributes.stats,data.attributes.gameMode,data.attributes.titleId,data.attributes.matchType,data.attributes.seasonState,data.relationships.assets.data,data.relationships.rosters.data,data.links.schema,data.links.self,links.self
0,"[{'type': 'participant', 'id': 'b0d85fbe-f44f-...",match,079b9626-0e73-489b-9001-91044247dd08,2022-09-27T03:06:50Z,kakao,,Desert_Main,False,1798,,squad,bluehole-pubg,competitive,progress,"[{'type': 'asset', 'id': 'e1d4eb7b-3e15-11ed-9...","[{'type': 'roster', 'id': '2ce608cc-83a1-4327-...",,https://api.pubg.com/shards/kakao/matches/079b...,https://api-origin.playbattlegrounds.com/shard...
1,"[{'type': 'participant', 'id': '477262d3-1126-...",match,33eceeba-4a2e-403b-83e0-1223080b6775,2022-09-27T03:14:38Z,kakao,,Tiger_Main,False,1780,,squad,bluehole-pubg,competitive,progress,"[{'type': 'asset', 'id': 'edcafa93-3e16-11ed-a...","[{'type': 'roster', 'id': '29c0fc29-0b94-4dfa-...",,https://api.pubg.com/shards/kakao/matches/33ec...,https://api-origin.playbattlegrounds.com/shard...


### Split to Roster_df & Participant_df

In [12]:
# pip install chicken_dinner

In [10]:
%%time

pubg = PUBG(api_key, shard = "kakao")

participant_dict = {}
roster_dict = {}
participant_df = pd.DataFrame()

for match in tqdm(range(len(match_json))):
    
    # get roster_id
    match_id = match_json.iloc[match, ]['data.id']
    # match_id = '4a9e8350-c38b-4552-ba24-ef3311912fdc'
    current_match = pubg.match(match_id)
    roster_dict.update(current_match.participant_to_roster)
    
    # get participant_id
    for included in match_json.iloc[match, ]['included']:
        if included['type'] == 'participant':

            participants = pd.json_normalize(included['attributes']['stats'])
            participants['participant_id'] = included['id']
            participant_df = pd.concat([participant_df, participants], axis = 0, ignore_index = True)

participant_df = participant_df[['participant_id', 'DBNOs', 'assists', 'boosts', 'damageDealt', 'deathType',
                                 'headshotKills', 'heals', 'killPlace', 'killStreaks', 'kills',
                                 'longestKill', 'name', 'playerId', 'revives', 'rideDistance',
                                 'roadKills', 'swimDistance', 'teamKills', 'timeSurvived',
                                 'vehicleDestroys', 'walkDistance', 'weaponsAcquired', 'winPlace']]

# roster_df = pd.DataFrame({'participant_id' : roster_dict.keys(), 'roster_id' : roster_dict.values()})
# participant_df = pd.merge(participant_df, roster_df)
participant_df

100%|███████████████████████████████████████████| 47/47 [00:02<00:00, 18.59it/s]

CPU times: user 2.08 s, sys: 32.7 ms, total: 2.12 s
Wall time: 2.53 s





Unnamed: 0,participant_id,DBNOs,assists,boosts,damageDealt,deathType,headshotKills,heals,killPlace,killStreaks,kills,longestKill,name,playerId,revives,rideDistance,roadKills,swimDistance,teamKills,timeSurvived,vehicleDestroys,walkDistance,weaponsAcquired,winPlace
0,b0d85fbe-f44f-45cf-a297-00e24ed21c1e,2,2,3,287.676730,byplayer,0,1,20,1,1,41.63401,14K-Jalhae,account.279199103dc547f89683a59632b47110,0,6559.7600,0,0.0,0,1109,0,1658.52940,5,10
1,18022219-5e38-4fbd-a7e2-4dcb6b01549a,0,0,0,53.060320,byplayer,0,0,57,0,0,0.00000,sexy-kdh,account.35a0b81dcc444561ab65c310898f3dc7,0,0.0000,0,0.0,0,213,0,240.23311,4,15
2,506631a3-71ef-4450-af80-d2a2969fb149,0,0,1,18.449999,byplayer,0,0,30,0,0,0.00000,AXIS_MOON2,account.f5e2d7016ec842a7821a90bc03708ec1,0,1938.5726,0,0.0,0,507,0,702.10490,4,3
3,5952413c-9548-45da-845b-4c8864f23216,0,0,5,425.887900,byplayer,0,6,33,0,0,0.00000,14K-fightingsoo,account.14e3c139bc6b479abb331a89e9909980,0,4144.7400,0,0.0,0,1531,0,1395.89940,5,5
4,ca7946cd-16a0-49f4-9131-078d3de57ef0,0,0,5,60.277714,byplayer,0,3,49,0,0,0.00000,S0-H0T,account.abfc382406a044199ff3b06d92dfc224,0,3269.2507,0,0.0,0,744,0,828.21234,5,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2876,2fa1349d-5f10-4dab-8268-f96dfae5040b,1,1,10,449.390230,alive,0,6,9,1,2,83.17705,blackdragon12345,account.b8c84afae55f4251a81d0bc60f7c67f7,0,6331.7305,0,0.0,0,1810,0,3325.77540,4,1
2877,8076f279-81ec-4794-b8fa-5ea793a00651,1,0,5,411.608520,byplayer,0,2,44,0,0,0.00000,This_Is_KOR,account.f045cdfe4ce14119a7a1d72af6edf3d3,0,0.0000,0,0.0,0,1119,0,2030.63500,7,12
2878,44d3b106-45ea-49de-8bcc-ee77279f9da7,0,1,7,18.810000,byplayer,0,1,35,0,0,0.00000,DDOHAo_ov,account.02e98b4eb4674aaea3c62536cd607084,0,6141.2007,0,0.0,0,1480,0,2468.64650,5,4
2879,d021d03b-f8d8-4f6a-a55a-31ce09018ca5,2,0,1,136.643020,byplayer,1,0,13,1,2,23.25779,18K-TheHaGi,account.207725b74cec4d77919295d2cb05453f,0,0.0000,0,0.0,0,268,0,277.62695,3,16


### Number of unique players

In [11]:
print("Number of players in total: ", participant_df.shape[0])
print("Number of unique players: ", participant_df["playerId"].nunique())

Number of players in total:  2881
Number of unique players:  2594


## SeasonId

### Current Season of Pubg(Kakao)

In [12]:
url = "https://api.pubg.com/shards/kakao/seasons"
request = requests.get(url, headers = header)
request.json()["data"]
season_id = [d['id'] for d in request.json()["data"] if ("pc" in d["id"]) & d["attributes"]["isCurrentSeason"] == True][0]
account_id_lst = participant_df["playerId"].drop_duplicates(keep = "last").reset_index(drop = True).to_list()

print("number of accounts (unique players) : ",len(account_id_lst))
print("current season: ", season_id)

number of accounts (unique players) :  2594
current season:  division.bro.official.pc-2018-19


## Rankedplayer Table

In [21]:
%%time
ranked_df = pd.DataFrame()
for account_id in tqdm(account_id_lst[:1297]):
    
    url = f"https://api.pubg.com/shards/kakao/players/{account_id}/seasons/{season_id}/ranked"

    try:        
        request = requests.get(url, headers = header)
    except JSONDecodeError:
        print("JSONDecodeError occured")
        time.sleep(60)
        request = requests.get(url, headers = header)
        
    if request.status_code == 429:
        time.sleep(60)
        request = requests.get(url, headers = header)
    ranked_df = pd.concat([ranked_df, json_normalize(request.json()["data"])], axis = 0, ignore_index = True)
    


100%|█████████████████████████████████████| 1297/1297 [2:21:10<00:00,  6.53s/it]

CPU times: user 43.7 s, sys: 5.13 s, total: 48.8 s
Wall time: 2h 21min 10s





In [27]:
print(ranked_df.shape)
ranked_df = ranked_df.loc[ranked_df["relationships.player.data.id"].drop_duplicates(keep = "last").index,:]
ranked_df = ranked_df.reset_index(drop = True)
print(ranked_df.shape)

(2594, 36)
(2594, 36)


### reset column names

In [30]:
def get_column_name(df):
    column_lst = []
    for col in df.columns:
        if col.split(".")[0] == "attributes":
            if (col.split(".")[-1] == "tier") | (col.split(".")[-1] == "subTier") :
                column_lst.append("_".join(col.split(".")[-2:]))
            else: column_lst.append(col.split(".")[-1])
        elif col.split(".")[0] == "relationships":
            column_lst.append("_".join(np.array(col.split("."))[[1,3]]))
        else: column_lst.append(col)
    return column_lst

In [31]:
ranked_df.columns = get_column_name(ranked_df)
ranked_df.head()

Unnamed: 0,type,currentTier_tier,currentTier_subTier,currentRankPoint,bestTier_tier,bestTier_subTier,bestRankPoint,roundsPlayed,avgRank,avgSurvivalTime,top10Ratio,winRatio,assists,wins,kda,kdr,kills,deaths,roundMostKills,longestKill,headshotKills,headshotKillRatio,damageDealt,dBNOs,reviveRatio,revives,heals,boosts,weaponsAcquired,teamKills,playTime,killStreak,player_type,player_id,season_type,season_id
0,rankedplayerstats,Diamond,4,3182.0,Diamond,3,3244.0,342.0,8.488304,0.0,0.631579,0.078947,190.0,27.0,1.996904,0.0,455.0,323.0,0.0,0.0,0.0,0.0,85861.48,464.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,player,account.279199103dc547f89683a59632b47110,season,division.bro.official.pc-2018-19
1,rankedplayerstats,Silver,2,1890.0,Platinum,4,2607.0,138.0,10.304348,0.0,0.463768,0.07971,112.0,11.0,1.837209,0.0,125.0,129.0,0.0,0.0,0.0,0.0,28125.125,130.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,player,account.f5e2d7016ec842a7821a90bc03708ec1,season,division.bro.official.pc-2018-19
2,rankedplayerstats,Diamond,5,3077.0,Diamond,3,3276.0,380.0,7.442105,0.0,0.702632,0.060526,154.0,23.0,1.049724,0.0,226.0,362.0,0.0,0.0,0.0,0.0,62115.945,268.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,player,account.14e3c139bc6b479abb331a89e9909980,season,division.bro.official.pc-2018-19
3,rankedplayerstats,Platinum,2,2878.0,Platinum,1,2937.0,114.0,7.175438,0.0,0.736842,0.070175,49.0,8.0,1.216981,0.0,80.0,106.0,0.0,0.0,0.0,0.0,19972.184,93.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,player,account.f0e302f6c5bf462e8a0e79eb42465ec7,season,division.bro.official.pc-2018-19
4,rankedplayerstats,Platinum,4,2664.0,Platinum,3,2734.0,254.0,9.240157,0.0,0.582677,0.059055,116.0,15.0,1.560166,0.0,260.0,241.0,0.0,0.0,0.0,0.0,48243.703,267.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,player,account.986028213eb94250b0783b0289c45344,season,division.bro.official.pc-2018-19


### Drop columns

In [40]:
def drop_columns(df):
    drop_col_lst = []
    for col in df.columns[:-1]:
        if len(df[col].value_counts()) == 1:
            drop_col_lst.append(col)
    return df.drop(drop_col_lst, axis = 1)



def change_type(df):
    df = df.dropna(axis = 0)
    num_columns = df.select_dtypes(exclude = "object").columns.to_list()
    int_columns = list(set(num_columns) - set(["avgRank", "top10Ratio", "winRatio", "kda", "damageDealt"]))
    df[int_columns] = df[int_columns].astype(int)
    return df


In [41]:
ranked_df = drop_columns(ranked_df)
rank_stats_df = change_type(ranked_df)
rank_stats_df

Unnamed: 0,currentTier_tier,currentTier_subTier,currentRankPoint,bestTier_tier,bestTier_subTier,bestRankPoint,roundsPlayed,avgRank,top10Ratio,winRatio,assists,wins,kda,kills,deaths,damageDealt,dBNOs,player_id,season_id
0,Silver,4,1687,Silver,1,1921,65,10.892307,0.461538,0.061538,18,4,1.083333,47,60,8589.566,43,account.6394bd8602da4beaaba53455e0d7cb48,division.bro.official.pc-2018-19
1,Silver,1,1981,Gold,4,2136,142,10.091549,0.478873,0.028169,52,4,1.130435,104,138,24746.360,134,account.79dbea3461384834bdf3f95c67bf78c1,division.bro.official.pc-2018-19
2,Diamond,4,3183,Diamond,3,3221,87,6.080460,0.781609,0.195402,90,17,3.608108,177,74,40527.180,168,account.70ba22c3c1cc4d6481f7b4db7cd77bda,division.bro.official.pc-2018-19
3,Platinum,1,2912,Diamond,5,3080,224,8.415178,0.620536,0.098214,151,22,2.354369,334,206,77854.730,370,account.35a4eb4b796d463390cdba1ae37e89b5,division.bro.official.pc-2018-19
4,Diamond,4,3128,Diamond,2,3318,200,8.240000,0.620000,0.105000,209,21,3.502732,432,183,94867.360,440,account.acc6129f46a449698d755b300ff7cafa,division.bro.official.pc-2018-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38733,Diamond,1,3457,Diamond,1,3458,122,7.122951,0.713115,0.188525,125,23,3.990196,282,102,66273.460,292,account.b8c84afae55f4251a81d0bc60f7c67f7,division.bro.official.pc-2018-19
38734,Gold,4,2170,Platinum,5,2584,538,9.842008,0.522305,0.027881,178,15,1.264258,487,526,96695.660,540,account.f045cdfe4ce14119a7a1d72af6edf3d3,division.bro.official.pc-2018-19
38735,Gold,3,2279,Platinum,3,2771,281,8.370107,0.619217,0.081851,104,23,0.727273,88,264,23116.568,89,account.02e98b4eb4674aaea3c62536cd607084,division.bro.official.pc-2018-19
38736,Gold,1,2430,Platinum,2,2821,605,8.687603,0.621488,0.044628,254,27,1.080480,377,584,90484.780,454,account.207725b74cec4d77919295d2cb05453f,division.bro.official.pc-2018-19


---

# Insert into databse

In [None]:
# !pip install pymysql
# !pip install sqlalchemy

In [57]:
import pandas as pd
import pymysql

from sqlalchemy import create_engine

### local
# host = "localhost"
# user = "root"
# password = "mysql"
# db = "pubg"
# charset = "utf8"

### gcp

# host is the public ip address for the db in gcp
host = 
# user for the db
user = 
# db password
password = 
# db or schema name
db = 
charset = "utf8"


In [58]:
engine = create_engine(f"mysql+pymysql://{user}:" + password + f"@{host}:3306/{db}", encoding = "utf-8")
engine_conn = engine.connect()

# rankrank_stats_df_df
rank_stats_df.to_sql(name = "rank_stats", con = engine_conn, if_exists = "replace", index = False)

# match_id
match_id_df.to_sql(name = "match_id", con = engine_conn, if_exists = "replace", index = False)

# sqlalchemy close
engine_conn.close()
