# 준비

In [0]:
import sys
IN_COLAB = 'google.colab' in sys.modules

## 패키지 설치

In [2]:
!pip install pandas numpy tqdm==4.43.0 bayesian-optimization lightgbm



## 라이브러리 임포트

In [0]:
import pandas as pd                         # 데이터 분석 라이브러리
import numpy as np                          # 계산 라이브러리
from tqdm.auto import tqdm                  # 진행바
from sklearn.metrics import roc_auc_score   # AUC 스코어 계산
from sklearn.model_selection import KFold   # K-fold CV    
from bayes_opt import BayesianOptimization  # 베이지안 최적화 라이브러리  
from functools import partial               # 함수 변수 고정
import lightgbm as lgb                      # LightGBM 라이브러리
import gc
import warnings                             
warnings.filterwarnings("ignore")           # 경고 문구 미표시
tqdm.pandas()

## 상수 정의

In [0]:
DATA_DIR = 'data'

## Colab 설정

In [5]:
if IN_COLAB:
    DRIVE_DIR = '/content/drive'
    from google.colab import drive
    drive.mount(DRIVE_DIR)

    import os
    os.symlink(f'{DRIVE_DIR}/My Drive/data', DATA_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


FileExistsError: ignored

# 데이터 샘플링

In [0]:
train.to_feather('data/train.feather')

In [0]:
train = pd.read_csv('data/train.csv')

In [0]:
train = pd.read_feather('data/train.feather')

In [7]:
train.head()

Unnamed: 0,game_id,winner,time,player,species,event,event_contents
0,0,1,0.0,0,T,Camera,"at (145.25, 21.5078125)"
1,0,1,0.0,1,T,Camera,"at (22.75, 147.0078125)"
2,0,1,0.02,0,T,Selection,['OrbitalCommand [3080001]']
3,0,1,0.02,0,T,Ability,(1360) - TrainSCV
4,0,1,0.14,0,T,Camera,"at (142.99609375, 24.50390625)"


In [0]:
train.shape

(67091776, 7)

In [0]:
len(train['game_id'].unique())

38872

In [0]:
def sample_data(df, n_games, seed=None):
    if seed is not None:
        np.random.seed(seed)

    game_ids = df['game_id'].unique()
    sampled_game_ids = np.random.choice(game_ids, size=n_games, replace=False)

    return df[df['game_id'].isin(sampled_game_ids)]

In [0]:
sampled_train = sample_data(train, n_games=300, seed=0)

In [0]:
sampled_train.to_csv('data/sampled_train.csv', index=False)

# 특징 추출

In [0]:
train = pd.read_csv('data/sampled_train.csv')

## 승리자 추출

In [0]:
def extract_winner(df):
    return df.groupby(['game_id'])['winner'].first()

## 종족 추출

In [0]:
def species_converter(string):
    if string == 'T':
        return 0
    elif string == 'P':
        return 1
    elif string == 'Z':
        return 2
    else:
        raise ValueError

def extract_species(df):
    species = df.groupby(['game_id', 'player'])['species'].first()

    species_df = species.unstack(level=-1)
    species_df.columns = species_df.columns.map(lambda x: f'p{x}_species')
    species_df.columns.name = None

    species_df = species_df.applymap(species_converter)

    return species_df

## 플레이어별 이벤트 횟수

In [0]:
def extract_event_counts(df):
    events = ['Ability', 'AddToControlGroup', 'Camera', 'ControlGroup', 'GetControlGroup', 'Right Click', 'Selection', 'SetControlGroup']

    event_counts = df.groupby(['game_id', 'player'])['event'].value_counts()
    event_counts = event_counts.unstack(level=-1).unstack(level=-1)
    event_counts.columns = event_counts.columns.map(lambda x: f'p{x[1]}_event_{x[0]}')
    event_counts = event_counts.fillna(0)

    result = pd.DataFrame(index=event_counts.index)

    for player in ['p0', 'p1']:
        for event in events:
            result[f'{player}_event_{event}'] = event_counts.get(f'{player}_event_{event}', 0.0)

    for event in events:
        result[f'delta_event_{event}'] = result[f'p0_event_{event}'] - result[f'p1_event_{event}']

    return result

## 게임 시간 추출

In [0]:
def extract_playtime(df):
    def min_to_sec(t):
        m = int(t)
        s = (t - m) * 100
        return (m * 60) + s

    return df.groupby(['game_id'])['time'].max().apply(min_to_sec)

## 시뮬레이션 후 특징 추출

In [0]:
class GameState:
    def init(self):
        pass

    def update(self, game_id, time, player, species, event, event_contents):
        pass

    def to_dict(self):
        return {}


class GameStateManager:
    def __init__(self):
        self._states = []

    def add(self, game_state):
        self._states.append(game_state)

    def init(self):
        for state in self._states:
            state.init()

    def update(self, game_id, time, player, species, event, event_contents):
        for state in self._states:
            state.update(game_id, time, player, species, event, event_contents)

    def to_dict(self):
        ret = {}
        for state in self._states:
            ret.update(state.to_dict())
        return ret


def extract_game_states(df, game_states):
    mat = df.to_numpy()

    data = {}

    cur_game_id = -1

    game_state = GameStateManager()
    for state in game_states:
        game_state.add(state)

    for row in tqdm(mat):
        game_id, time, player, species, event, event_contents = row

        if game_id != cur_game_id:
            if cur_game_id != -1:
                data[cur_game_id] = game_state.to_dict()

            cur_game_id = game_id
            game_state.init()

        game_state.update(game_id, time, player, species, event, event_contents)

    if cur_game_id != -1:
        data[cur_game_id] = game_state.to_dict()

    del mat
    gc.collect()

    return pd.DataFrame.from_dict(data, orient='index')

### 카메라 분산

In [0]:
class CameraState(GameState):
    def init(self):
        self.p0_camera_x = []
        self.p0_camera_y = []
        self.p1_camera_x = []
        self.p1_camera_y = []

    def update(self, game_id, time, player, species, event, event_contents):
        if event == 'Camera':
            camera_x, camera_y = CameraState.parse_at(event_contents)

            if player == 0:
                self.p0_camera_x.append(camera_x)
                self.p0_camera_y.append(camera_y)
            else:
                self.p1_camera_x.append(camera_x)
                self.p1_camera_y.append(camera_y)

    def to_dict(self):
        return {
            'p0_camera_x_var': np.var(self.p0_camera_x),
            'p0_camera_y_var': np.var(self.p0_camera_y),
            'p1_camera_x_var': np.var(self.p1_camera_x),
            'p1_camera_y_var': np.var(self.p1_camera_y),
        }

    @staticmethod
    def parse_at(event_contents):
        mid = event_contents.find(',', 4)
        x = float(event_contents[4:mid])
        y = float(event_contents[mid+2:-1])
        return x, y

### Ability 개수

In [0]:
class AbilityCounts(GameState):
    abilities = ['(1360) - TrainSCV', '(1021) - BuildSupplyDepot', '(480) - Stop', '(1022) - BuildRefinery', '(1023) - BuildBarracks', '(1020) - BuildCommandCenter', '(13E0) - TrainMarine', '(1C60) - UpgradeToOrbitalCommand', '(102A) - BuildFactory', '(5A0) - Attack', '(1261) - BuildBarracksReactor', '(E20) - ExtraSupplies', '(12A0) - BuildFactoryTechLab', '(4E40) - KD8Charge', '(1000) - SCVRepair', '(1401) - BuildSiegeTank', '(102B) - BuildStarport', '(1024) - BuildEngineeringBay', '(1418) - BuildWidowMine', '(10E0) - SiegeMode', '(1420) - TrainMedivac', '(1446) - UpgradeTerranInfantryArmor1', '(F61) - CancelSlot', '(B40) - CalldownMULE', '(1280) - LiftBarracks', '(1260) - BuildBarracksTechLab', '(1300) - LiftStarport', '(1380) - LandBarracks', '(15E0) - TrainProbe', '(1541) - BuildPylon', '(1543) - BuildGateway', '(1542) - BuildAssimilator', '(22A0) - CancelLast', '(13E1) - TrainReaper', '(154E) - BuildCyberneticsCore', '(1586) - TrainAdept', '(2720) - TrainMothershipCore', '(1D86) - ResearchWarpGate', '(154D) - BuildRoboticsFacility', '(14A0) - ResearchStimpack', '(13A0) - LowerSupplyDepot', '(4C1) - Patrol', '(1160) - ScannerSweep', '(15C0) - TrainWarpPrism', '(154C) - BuildRoboticsBay', '(4B80) - AdeptPhaseShift', '(E80) - ChronoBoost', '(12E0) - BuildStarportTechLab', '(13C0) - RaiseSupplyDepot', '(16A2) - ResearchGraviticDrive', '(F60) - CancelLast', '(3040) - BurrowWidowMine', '(1C80) - TransformToWarpGate', '(1544) - BuildForge', '(3480) - MothershipCorePurifyNexus', '(15D2) - TrainDisruptor', '(1AC6) - TrainAdept', '(1540) - BuildNexus', '(1546) - BuildTwilightCouncil', '(1680) - UpgradeGroundWeapons1', '(1563) - UnloadUnitWarpPrism', '(4120) - PurificationNovaTargeted', '(1025) - BuildMissileTurret', '(1AC1) - WarpInStalker', '(1442) - UpgradeTerranInfantryWeapons1', '(1562) - UnloadTargetWarpPrism', '(15C2) - TrainColossus', '(1DA1) - ResearchBlink', '(1220) - LiftCommandCenter', '(1240) - LandCommandCenter', '(15C1) - TrainObserver', '(1820) - MorphDrone', '(16E3) - BuildSpawningPool', '(1822) - MorphOverlord', '(1E60) - TrainQueen', '(1821) - MorphZergling', '(1581) - TrainStalker', '(17C1) - EvolveMetabolicBoost', '(16E0) - BuildHatchery', '(DE0) - SpawnLarva', '(4C2) - HoldPosition', '(16ED) - BuildRoachWarren', '(15A8) - TrainOracle', '(16E4) - BuildEvolutionChamber', '(16EF) - BuildSporeCrawler', '(2080) - CreepTumor', '(2120) - BuildCreepTumor', '(1740) - UpgradeToLair', '(1D80) - UpgradeAirWeapons1', '(1726) - ResearchZergMissileWeaponsLevel1', '(16E2) - BuildExtractor', '(4AE0) - BuildOracleStasisTrap', '(43E0) - PrismaticAlignment', '(15A4) - TrainVoidRay', '(16E5) - BuildHydraliskDen', '(1545) - BuildFleetBeacon', '(1723) - ResearchZergGroundArmorsLevel1', '(17E1) - ResearchEvolveMuscularAugments', '(1823) - MorphHydralisk', '(3380) - Revelation', '(1547) - BuildPhotonCannon', '(F00) - Gather', '(31A0) - MedivacSpeedBoost', '(1143) - UnloadUnitMedivac', '(1426) - TrainLiberator', '(13E3) - TrainMarauder', '(1142) - UnloadTargetMedivac', '(1B40) - Blink', '(154A) - BuildTemplarArchive', '(1683) - UpgradeGroundArmor1', '(15C3) - TrainImmortal', '(12C0) - LiftFactory', '(1320) - LandFactory', '(1405) - BuildHellion', '(1340) - LandStarport', '(1421) - TrainBanshee', '(14E0) - ResearchCloakingField', '(FE0) - CancelBuilding', '(1580) - TrainZealot', '(1549) - BuildStargate', '(1D81) - UpgradeAirWeapons2', '(F80) - CancelLast', '(1026) - BuildBunker', '(1DA0) - ResearchCharge', '(12A1) - BuildFactoryReactor', '(15A0) - TrainPhoenix', '(17A1) - EvolvePneumatizedCarapace', '(F81) - CancelSlot', '(1829) - MorphRoach', '(16EE) - BuildSpineCrawler', '(D61) - EvolveGlialReconstitution', '(102D) - BuildArmory', '(1720) - ResearchZergMeleeWeaponsLevel1', '(2060) - GenerateCreep', '(3160) - UpgradeToLurkerDenMP', '(1BA0) - MorphToOverseer', '(1525) - UpgradeVehicleWeapons1', '(1701) - ReturnCargo', '(F01) - ReturnCargo', '(EE1) - ReturnCargo', '(102F) - BuildFusionCore', '(154B) - BuildDarkShrine', '(12BE)', '(1AC4) - WarpInDarkTemplar', '(4C60) - LiberatorAGTarget', '(1422) - TrainRaven', '(1CC0) - LiftOrbitalCommand', '(11E1) - UnloadAllBunker', '(1424) - TrainViking', '(1440) - ResearchHiSecAutoTracking', '(16E8) - BuildInfestationPit', '(1204) - LoadAllCommandCenter', '(1201) - UnloadAllCommandCenter', '(3060) - UnburrowWidowMine', '(42C0) - OracleWeapon', '(16EA) - BuildBanelingNest', '(920) - TrainBaneling', '(16E6) - BuildSpire', '(4EC0) - AdeptShadePhaseShiftCancel', '(103E) - CancelTerranBuilding', '(1DA2) - ResearchAdeptPiercingAttack', '(1D40) - TransportMode', '(40A0) - RavagerCorrosiveBile', '(4020) - MorphToRavager', '(4C80) - LiberatorAATarget', '(12E1) - BuildStarportReactor', '(1C00) - EvolveCentrifugalHooks', '(14A1) - ResearchCombatShield', '(2C80) - BattleMode', '(22A1) - CancelSlot', '(4C3) - ScanMove', '(7E0) - SalvageShared', '(1100) - TankMode', '(42C1) - OracleWeaponOff', '(D41) - SetWorkerRally', '(D40) - SetUnitRally', '(CC0) - SetRallyPoint', '(14A2) - ResearchConcussiveShells', '(1060) - UseStimpack', '(1407) - TrainCyclone', '(1AC0) - WarpInZealot', '(1D20) - PhasingMode', '(1AC3) - WarpInHighTemplar', '(1CE0) - LandOrbitalCommand', '(1824) - MorphMutalisk', '(20A0) - UprootSpineCrawler', '(20E0) - RootSpineCrawler', '(152E) - ResearchTerranVehicleAndShipArmorsLevel1', '(D20) - SetWorkerRally', '(F40) - CancelLast', '(1EA0) - QueenTransfusion', '(2160) - ArchonWarpSelection', '(1AC5) - WarpInSentry', '(1585) - TrainSentry', '(1721) - ResearchZergMeleeWeaponsLevel2', '(BE0) - SpawnChangeling', '(4160) - LockOn', '(15A2) - TrainCarrier', '(1BC0) - UpgradeToPlanetaryFortress', '(5640) - MorphToTransportOverlord', '(1A62) - UnloadTargetOverlord', '(127E)', '(1120) - CloakBanshee', '(14E9) - ResearchBansheeSpeed', '(1121) - DecloakBanshee', '(2140) - BuildAutoTurret', '(14ED) - ResearchRavenRecalibratedExplosives', '(4EA0) - AdeptPhaseShiftCancel', '(12FE)', '(14C1) - ResearchInfernalPreIgniter', '(A40) - HallucinatePhoenix', '(1681) - UpgradeGroundWeapons2', '(16A5) - ResearchExtendedThermalLance', '(17A3) - EvolveBurrow', '(1D00) - ForceField', '(39A0) - TemporalField', '(2100) - RootSporeCrawler', '(1443) - UpgradeTerranInfantryWeapons2', '(1686) - UpgradeShields1', '(1404) - BuildThor', '(213E)', '(900) - MULERepair', '(14E3) - ResearchCorvidReactor', '(2B00) - MassRecallMothershipCore', '(1500) - ResearchPersonalCloaking', '(13E2) - TrainGhost', '(20E1) - CancelRootSpineCrawler', '(1C61) - CancelUpgradeToOrbitalCommand', '(1724) - ResearchZergGroundArmorsLevel2', '(1FC0) - Attack', '(1583) - TrainHighTemplar', '(5700) - ResearchDarkTemplarBlinkUpgrade', '(FE1) - HaltBuilding', '(FC0) - CancelLast', '(11E3) - UnloadUnitBunker', '(182B) - MorphCorruptor', '(16E9) - BuildNydusNetwork', '(1960) - BurrowZergling', '(1028) - BuildSensorTower', '(B60) - GravitonBeam', '(1029) - BuildGhostAcademy', '(D62) - EvolveTunnelingClaws', '(20C0) - UprootSporeCrawler', '(14C4) - ResearchDrillingClaws', '(1741) - CancelUpgradeToLair', '(11A0) - AssaultMode', '(11C0) - FighterMode', '(D00) - SetWorkerRally', '(1727) - ResearchZergMissileWeaponsLevel2', '(16C4) - ResearchPsiStormTech', '(880) - Explode', '(1687) - UpgradeShields2', '(8E0) - GuardianShield', '(1526) - UpgradeVehicleWeapons2', '(780) - SprayTerran', '(1A63) - UnloadUnitOverlord', '(182E) - MorphSwarmHost', '(1BE2) - EvolvePathogenGlands', '(1441) - UpgradeStructureArmor', '(1760) - UpgradeToHive', '(8A0) - ResearchFluxVanes', '(4C0) - Move', '(2DE0) - Attack', '(1700) - Gather', '(1800) - EvolveFlyerAttacks1', '(1447) - UpgradeTerranInfantryArmor2', '(1684) - UpgradeGroundArmor2', '(2180) - BuildNydusCanal', '(42A1) - UnloadAll', '(3180) - HallucinateOracle', '(55E0) - SpawnLocustsTargeted', '(2DA0) - UnburrowSwarmHost', '(FC1) - CancelSlot', '(1406) - BuildBattleHellion', '(EE0) - Gather', '(2B40) - StrikeMode', '(F41) - CancelSlot', '(1203) - UnloadUnitCommandCenter', '(1D83) - UpgradeAirArmor1', '(7C0) - SprayProtoss', '(1423) - TrainBattlecruiser', '(41E0) - Hyperjump', '(1180) - YamatoGun', '(15A9) - TrainTempest', '(1D60) - ResearchWeaponRefit', '(2F60) - DisableVolatileBurst', '(1780) - MorphToGreaterSpire', '(1840) - MorphToBroodLord', '(4220) - ThorAPMode', '(152B) - UpgradeShipWeapons1', '(14EE) - ResearchMedivacIncreaseSpeedBoost', '(1445) - ResearchNeosteelFrame', '(26C0) - UpgradeToMothership', '(18E0) - BurrowHydralisk', '(1AE0) - BurrowQueen', '(1B00) - UnburrowQueen', '(1900) - UnburrowHydralisk', '(E00) - UseStimpack', '(14EF) - ResearchLiberatorAGRangeUpgrade', '(1BA1) - CancelMorphToOverseer', '(1B21) - UnloadAllNydus', '(1803) - EvolveFlyerCarapace1', '(1920) - BurrowRoach', '(1080) - CloakGhost', '(1081) - DecloakGhost', '(2040) - StopRedirect', '(1480) - TrainNuke', '(182A) - MorphInfestor', '(22C1) - CancelSlot', '(16A1) - ResearchGraviticBoosters', '(5641) - Cancel', '(B20) - SeekerMissile', '(1B23) - UnloadUnitNydus', '(1140) - LoadTargetMedivac', '(4021) - Cancel', '(1CA0) - TransformToGateway', '(5600) - VoidRaySwarmDamageBoostCancel', '(B61) - CancelGravitonBeam', '(1584) - TrainDarkTemplar', '(1980) - UnburrowZergling', '(1860) - BurrowBaneling', '(7A0) - SprayZerg', '(3120) - BurrowLurker', '(8A2) - ResearchAnionPulseCrystals', '(4040) - MorphToLurker', '(16E7) - BuildUltraliskCavern', '(EA2) - EvolveChitinousPlating', '(1826) - MorphUltralisk', '(3140) - UnburrowLurker', '(56A0) - ChannelSnipe', '(B01) - ReturnCargo', '(1BC1) - CancelUpgradeToPlanetaryFortress', '(483) - Dance', '(18C0) - UnburrowDrone', '(4380) - LocustMPFlyingSwoop', '(3161) - Cancel', '(41A0) - LockOnCancel', '(9A0) - BuildPointDefenseDrone', '(A80) - HallucinateStalker', '(2D80) - BurrowSwarmHost', '(18A0) - BurrowDrone', '(960) - Feedback', '(4240) - ThorNormalMode', '(3FC0) - CausticSpray', '(26C1) - CancelUpgradeToMothership', '(1E20) - EMPRound', '(1761) - CancelUpgradeToHive', '(1940) - UnburrowRoach', '(2101) - Cancel', '(1B60) - BurrowInfestor', '(1DC0) - TacticalNukeStrike', '(10C0) - HealMedivac', '(1880) - UnburrowBaneling', '(9E0) - HallucinateColossus', '(42A3)', '(1660) - TrainInterceptor', '(820) - HoldFireGhost', '(840) - GWeaponsFreeGhost', '(1FE0) - StimpackRedirect', '(2061) - StopGenerateCreep', '(AC0) - HallucinateWarpPrism', '(152C) - UpgradeShipWeapons2', '(1560) - LoadTarget', '(1801) - EvolveFlyerAttacks2', '(17C0) - EvolveAdrenalGlands', '(AA0) - HallucinateVoidRay', '(4AFE)', '(11E0) - LoadTargetBunker', '(2240) - Contaminate', '(1682) - UpgradeGroundWeapons3', '(2F61) - EnableVolatileBurst', '(40C0) - BurrowRavagerDown', '(40E0) - BurrowRavagerUp', '(1A60) - LoadTargetOverlord', '(B00) - Gather', '(152F) - ResearchTerranVehicleAndShipArmorsLevel2', '(22C0) - CancelLast', '(5560) - DarkTemplarBlink', '(1600) - PsionicStorm', '(AE0) - HallucinateZealot', '(1BE3) - EvolveNeuralParasite', '(4041) - Cancel', '(9C0) - HallucinateArchon', '(DA0) - SpawnInfestedTerran']
    ability_set = set(abilities)

    def init(self):
        self.counts = {
            0: {ability: 0 for ability in AbilityCounts.abilities},
            1: {ability: 0 for ability in AbilityCounts.abilities},
        }

    def update(self, game_id, time, player, species, event, event_contents):
        if event == 'Ability':
            contents = AbilityCounts.parse_contents(event_contents)
            ability = contents[0]
            if ability in AbilityCounts.ability_set:
                self.counts[player][ability] += 1

    def to_dict(self):
        p0_counts = {f'p0_ability_{ability}': self.counts[0][ability] for ability in AbilityCounts.abilities}
        p1_counts = {f'p1_ability_{ability}': self.counts[1][ability] for ability in AbilityCounts.abilities}
        return {**p0_counts, **p1_counts}

    @staticmethod
    def parse_contents(event_contents):
        contents = event_contents.split(';')
        return contents

384

In [0]:
df = train.drop(columns=['winner'])

In [16]:
extract_game_states(df, [
    CameraState(),
])

HBox(children=(FloatProgress(value=0.0, max=67091776.0), HTML(value='')))




Unnamed: 0,p0_camera_x,p0_camera_y,p1_camera_x,p1_camera_y
0,763.434578,1029.515414,24.314657,78.746863
1,671.434895,837.446806,1412.369713,2348.367561
2,1493.570080,628.467848,1435.172816,550.632165
3,1327.289239,1272.379892,1433.182454,1184.508068
4,1457.014632,1279.786625,650.662409,754.768815
...,...,...,...,...
38867,1246.874820,1214.428014,1914.465605,1433.958132
38868,1014.613696,2252.216101,596.899852,1344.258487
38869,1713.881894,721.842525,2348.298356,1414.454222
38870,6.834018,137.199129,668.822659,668.614059


## 실험

In [0]:
df = train

## Feature Engineering

In [0]:
def prepare_x_data(df):
    if 'winner' in df.columns:
        df = df.drop(columns=['winner'])

    features = []
    features.append(extract_playtime(df))
    features.append(extract_species(df))
    features.append(extract_event_counts(df))
    features.append(extract_game_states(df, [
        CameraState(),
        AbilityCounts(),
    ]))

    return pd.concat(features, axis=1)

In [0]:
def prepare_y_data(df):
    winners = extract_winner(df)
    return np.array(winners)

In [44]:
x_train = prepare_x_data(train)
y_train = prepare_y_data(train)
x_train.head()

HBox(children=(FloatProgress(value=0.0, max=67091776.0), HTML(value='')))




Unnamed: 0,time,p0_species,p1_species,p0_event_Ability,p0_event_AddToControlGroup,p0_event_Camera,p0_event_ControlGroup,p0_event_GetControlGroup,p0_event_Right Click,p0_event_Selection,p0_event_SetControlGroup,p1_event_Ability,p1_event_AddToControlGroup,p1_event_Camera,p1_event_ControlGroup,p1_event_GetControlGroup,p1_event_Right Click,p1_event_Selection,p1_event_SetControlGroup,delta_event_Ability,delta_event_AddToControlGroup,delta_event_Camera,delta_event_ControlGroup,delta_event_GetControlGroup,delta_event_Right Click,delta_event_Selection,delta_event_SetControlGroup,p0_camera_x_var,p0_camera_y_var,p1_camera_x_var,p1_camera_y_var,p0_ability_(1360) - TrainSCV,p0_ability_(1021) - BuildSupplyDepot,p0_ability_(480) - Stop,p0_ability_(1022) - BuildRefinery,p0_ability_(1023) - BuildBarracks,p0_ability_(1020) - BuildCommandCenter,p0_ability_(13E0) - TrainMarine,p0_ability_(1C60) - UpgradeToOrbitalCommand,p0_ability_(102A) - BuildFactory,...,p1_ability_(26C1) - CancelUpgradeToMothership,p1_ability_(1E20) - EMPRound,p1_ability_(1761) - CancelUpgradeToHive,p1_ability_(1940) - UnburrowRoach,p1_ability_(2101) - Cancel,p1_ability_(1B60) - BurrowInfestor,p1_ability_(1DC0) - TacticalNukeStrike,p1_ability_(10C0) - HealMedivac,p1_ability_(1880) - UnburrowBaneling,p1_ability_(9E0) - HallucinateColossus,p1_ability_(42A3),p1_ability_(1660) - TrainInterceptor,p1_ability_(820) - HoldFireGhost,p1_ability_(840) - GWeaponsFreeGhost,p1_ability_(1FE0) - StimpackRedirect,p1_ability_(2061) - StopGenerateCreep,p1_ability_(AC0) - HallucinateWarpPrism,p1_ability_(152C) - UpgradeShipWeapons2,p1_ability_(1560) - LoadTarget,p1_ability_(1801) - EvolveFlyerAttacks2,p1_ability_(17C0) - EvolveAdrenalGlands,p1_ability_(AA0) - HallucinateVoidRay,p1_ability_(4AFE),p1_ability_(11E0) - LoadTargetBunker,p1_ability_(2240) - Contaminate,p1_ability_(1682) - UpgradeGroundWeapons3,p1_ability_(2F61) - EnableVolatileBurst,p1_ability_(40C0) - BurrowRavagerDown,p1_ability_(40E0) - BurrowRavagerUp,p1_ability_(1A60) - LoadTargetOverlord,p1_ability_(B00) - Gather,p1_ability_(152F) - ResearchTerranVehicleAndShipArmorsLevel2,p1_ability_(22C0) - CancelLast,p1_ability_(5560) - DarkTemplarBlink,p1_ability_(1600) - PsionicStorm,p1_ability_(AE0) - HallucinateZealot,p1_ability_(1BE3) - EvolveNeuralParasite,p1_ability_(4041) - Cancel,p1_ability_(9C0) - HallucinateArchon,p1_ability_(DA0) - SpawnInfestedTerran
0,444.0,0,0,34.0,2.0,444.0,0.0,24.0,35.0,50.0,3.0,34.0,0.0,425.0,0.0,3.0,28.0,57.0,1.0,0.0,2.0,19.0,0.0,21.0,7.0,-7.0,2.0,763.434578,1029.515414,24.314657,78.746863,9,4,1,2,2,1,3,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,599.0,1,0,77.0,1.0,627.0,0.0,162.0,160.0,186.0,10.0,67.0,0.0,858.0,0.0,31.0,131.0,116.0,8.0,10.0,1.0,-231.0,0.0,131.0,29.0,70.0,2.0,671.434895,837.446806,1412.369713,2348.367561,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,599.0,1,2,69.0,6.0,413.0,0.0,99.0,160.0,90.0,14.0,85.0,5.0,725.0,2.0,109.0,204.0,232.0,9.0,-16.0,1.0,-312.0,-2.0,-10.0,-44.0,-142.0,5.0,1493.57008,628.467848,1435.172816,550.632165,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,599.0,0,1,82.0,0.0,713.0,0.0,132.0,276.0,180.0,6.0,89.0,0.0,388.0,0.0,710.0,268.0,148.0,19.0,-7.0,0.0,325.0,0.0,-578.0,8.0,32.0,-13.0,1327.289239,1272.379892,1433.182454,1184.508068,7,6,8,1,3,2,9,2,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,343.0,0,2,57.0,1.0,430.0,0.0,224.0,177.0,67.0,10.0,36.0,4.0,272.0,0.0,99.0,106.0,126.0,8.0,21.0,-3.0,158.0,0.0,125.0,71.0,-59.0,2.0,1457.014632,1279.786625,650.662409,754.768815,14,2,2,2,1,1,3,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
x_train['p0_ability_(1360) - TrainSCV']

0    9
1    0
2    0
3    7
4    9
Name: p0_ability_(1360) - TrainSCV, dtype: int64

In [43]:
x_train['p1_ability_(1360) - TrainSCV']

0     6
1    17
2     0
3     0
4     0
Name: p1_ability_(1360) - TrainSCV, dtype: int64

# 변수 선택 및 모델 구축

In [47]:
# train = pd.read_csv('data/train.csv')
train = pd.read_csv('data/sampled_train.csv')
x_train = prepare_x_data(train)
y_train = prepare_y_data(train)
x_train.head()

HBox(children=(FloatProgress(value=0.0, max=520835.0), HTML(value='')))




Unnamed: 0,time,p0_species,p1_species,p0_event_Ability,p0_event_AddToControlGroup,p0_event_Camera,p0_event_ControlGroup,p0_event_GetControlGroup,p0_event_Right Click,p0_event_Selection,p0_event_SetControlGroup,p1_event_Ability,p1_event_AddToControlGroup,p1_event_Camera,p1_event_ControlGroup,p1_event_GetControlGroup,p1_event_Right Click,p1_event_Selection,p1_event_SetControlGroup,delta_event_Ability,delta_event_AddToControlGroup,delta_event_Camera,delta_event_ControlGroup,delta_event_GetControlGroup,delta_event_Right Click,delta_event_Selection,delta_event_SetControlGroup,p0_camera_x_var,p0_camera_y_var,p1_camera_x_var,p1_camera_y_var,p0_ability_(1360) - TrainSCV,p0_ability_(1021) - BuildSupplyDepot,p0_ability_(480) - Stop,p0_ability_(1022) - BuildRefinery,p0_ability_(1023) - BuildBarracks,p0_ability_(1020) - BuildCommandCenter,p0_ability_(13E0) - TrainMarine,p0_ability_(1C60) - UpgradeToOrbitalCommand,p0_ability_(102A) - BuildFactory,...,p1_ability_(26C1) - CancelUpgradeToMothership,p1_ability_(1E20) - EMPRound,p1_ability_(1761) - CancelUpgradeToHive,p1_ability_(1940) - UnburrowRoach,p1_ability_(2101) - Cancel,p1_ability_(1B60) - BurrowInfestor,p1_ability_(1DC0) - TacticalNukeStrike,p1_ability_(10C0) - HealMedivac,p1_ability_(1880) - UnburrowBaneling,p1_ability_(9E0) - HallucinateColossus,p1_ability_(42A3),p1_ability_(1660) - TrainInterceptor,p1_ability_(820) - HoldFireGhost,p1_ability_(840) - GWeaponsFreeGhost,p1_ability_(1FE0) - StimpackRedirect,p1_ability_(2061) - StopGenerateCreep,p1_ability_(AC0) - HallucinateWarpPrism,p1_ability_(152C) - UpgradeShipWeapons2,p1_ability_(1560) - LoadTarget,p1_ability_(1801) - EvolveFlyerAttacks2,p1_ability_(17C0) - EvolveAdrenalGlands,p1_ability_(AA0) - HallucinateVoidRay,p1_ability_(4AFE),p1_ability_(11E0) - LoadTargetBunker,p1_ability_(2240) - Contaminate,p1_ability_(1682) - UpgradeGroundWeapons3,p1_ability_(2F61) - EnableVolatileBurst,p1_ability_(40C0) - BurrowRavagerDown,p1_ability_(40E0) - BurrowRavagerUp,p1_ability_(1A60) - LoadTargetOverlord,p1_ability_(B00) - Gather,p1_ability_(152F) - ResearchTerranVehicleAndShipArmorsLevel2,p1_ability_(22C0) - CancelLast,p1_ability_(5560) - DarkTemplarBlink,p1_ability_(1600) - PsionicStorm,p1_ability_(AE0) - HallucinateZealot,p1_ability_(1BE3) - EvolveNeuralParasite,p1_ability_(4041) - Cancel,p1_ability_(9C0) - HallucinateArchon,p1_ability_(DA0) - SpawnInfestedTerran
18,295.0,2,2,26.0,0.0,228.0,0.0,47.0,68.0,103.0,5.0,42.0,0.0,82.0,0.0,41.0,126.0,95.0,7.0,-16.0,0.0,146.0,0.0,6.0,-58.0,8.0,-2.0,768.272004,921.704322,1785.817478,1072.86221,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
759,332.0,1,2,35.0,0.0,375.0,0.0,191.0,118.0,95.0,7.0,25.0,0.0,311.0,0.0,167.0,71.0,281.0,1.0,10.0,0.0,64.0,0.0,24.0,47.0,-186.0,6.0,1971.391818,1514.360742,1937.170697,1055.699525,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
937,463.0,2,1,52.0,0.0,434.0,0.0,49.0,209.0,184.0,1.0,56.0,0.0,205.0,0.0,174.0,290.0,78.0,4.0,-4.0,0.0,229.0,0.0,-125.0,-81.0,106.0,-3.0,1590.372118,1108.987084,3023.422341,1418.252719,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
954,573.0,0,1,26.0,0.0,265.0,0.0,14.0,156.0,69.0,0.0,57.0,0.0,483.0,0.0,0.0,157.0,59.0,0.0,-31.0,0.0,-218.0,0.0,14.0,-1.0,10.0,0.0,46.170852,216.606557,1443.99294,1242.162422,5,1,0,0,3,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1044,599.0,2,1,64.0,6.0,344.0,0.0,88.0,268.0,167.0,4.0,46.0,0.0,899.0,0.0,73.0,185.0,85.0,7.0,18.0,6.0,-555.0,0.0,15.0,83.0,82.0,-3.0,1248.83247,897.182721,996.457435,562.700024,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [0]:
def lgb_cv(num_leaves, learning_rate, n_estimators, subsample, colsample_bytree, reg_alpha, reg_lambda, bagging_fraction, feature_fraction, x_data=None, y_data=None, n_splits=5, output='score'):
    score = 0
    kf = KFold(n_splits=n_splits)
    models = []
    for train_index, valid_index in kf.split(x_data):
        x_train, y_train = x_data.iloc[train_index], y_data[train_index]
        x_valid, y_valid = x_data.iloc[valid_index], y_data[valid_index]
        
        model = lgb.LGBMClassifier(
            num_leaves = int(num_leaves), 
            learning_rate = learning_rate, 
            n_estimators = int(n_estimators), 
            subsample = np.clip(subsample, 0, 1), 
            colsample_bytree = np.clip(colsample_bytree, 0, 1), 
            reg_alpha = reg_alpha, 
            reg_lambda = reg_lambda,
            bagging_fraction = bagging_fraction,
            feature_fraction = feature_fraction,
        )
        
        model.fit(x_train, y_train)
        models.append(model)
        
        pred = model.predict_proba(x_valid)[:, 1]
        true = y_valid
        score += roc_auc_score(true, pred)/n_splits
    
    if output == 'score':
        return score
    if output == 'model':
        return models

In [49]:
# 모델과 관련없는 변수 고정
func_fixed = partial(lgb_cv, x_data=x_train, y_data=y_train, n_splits=5, output='score') 
# 베이지안 최적화 범위 설정
lgbBO = BayesianOptimization(
    func_fixed, 
    {
        'num_leaves': (16, 128),        # num_leaves,       범위(16~1024)
        'learning_rate': (0.0001, 0.1),  # learning_rate,    범위(0.0001~0.1)
        'n_estimators': (16, 1024),      # n_estimators,     범위(16~1024)
        'subsample': (0, 0.2),             # subsample,        범위(0~1)
        'colsample_bytree': (0, 1),      # colsample_bytree, 범위(0~1)
        'reg_alpha': (0, 10),            # reg_alpha,        범위(0~10)
        'reg_lambda': (0, 50),           # reg_lambda,       범위(0~50)
        'bagging_fraction': (0.1, 1.0),
        'feature_fraction': (0.1, 1.0),
    }, 
    random_state=4321                    # 시드 고정
)
lgbBO.maximize(init_points=5, n_iter=30) # 처음 5회 랜덤 값으로 score 계산 후 30회 최적화

# 이 예제에서는 7개 하이퍼 파라미터에 대해 30회 조정을 시도했습니다.
# 다양한 하이퍼 파라미터, 더 많은 iteration을 시도하여 최상의 모델을 얻어보세요!
# LightGBM Classifier: https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html

|   iter    |  target   | baggin... | colsam... | featur... | learni... | n_esti... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.6321  [0m | [0m 0.1637  [0m | [0m 0.8151  [0m | [0m 0.7911  [0m | [0m 0.02871 [0m | [0m 210.6   [0m | [0m 125.6   [0m | [0m 4.062   [0m | [0m 37.89   [0m | [0m 0.01783 [0m |
| [95m 2       [0m | [95m 0.6535  [0m | [95m 0.3789  [0m | [95m 0.6189  [0m | [95m 0.5139  [0m | [95m 0.02191 [0m | [95m 684.8   [0m | [95m 92.01   [0m | [95m 9.503   [0m | [95m 14.06   [0m | [95m 0.124   [0m |
| [0m 3       [0m | [0m 0.6422  [0m | [0m 0.445   [0m | [0m 0.4004  [0m | [0m 0.9484  [0m | [0m 0.093   [0m | [0m 972.0   [0m | [0m 58.05   [0m | [0m 3.423   [0m | [0m 33.24   [0m | [0m 0.008464[0m |
| [0m 4       [0m | [0m 0.6127  [0m | [0m 0.309   

# 모델 학습 및 검증

In [0]:
params = lgbBO.max['params']
models = lgb_cv(
    params['num_leaves'], 
    params['learning_rate'], 
    params['n_estimators'], 
    params['subsample'], 
    params['colsample_bytree'], 
    params['reg_alpha'], 
    params['reg_lambda'],
    params['bagging_fraction'],
    params['feature_fraction'],
    x_data=x_train, y_data=y_train, n_splits=5, output='model')

In [60]:
draw = pd.DataFrame({'name':x_train.columns,'importance':models[0].feature_importances_})
draw = draw.sort_values(by=['importance'],ascending=False)
draw.head(20)

Unnamed: 0,name,importance
27,p0_camera_x_var,286
11,p1_event_Ability,74
7,p0_event_GetControlGroup,69
19,delta_event_Ability,43
23,delta_event_GetControlGroup,1
18,p1_event_SetControlGroup,1
0,time,1
508,p1_ability_(15A4) - TrainVoidRay,0
509,p1_ability_(16E5) - BuildHydraliskDen,0
529,p1_ability_(1340) - LandStarport,0


In [55]:
test = pd.read_csv('data/test.csv')
x_test = prepare_x_data(test)

HBox(children=(FloatProgress(value=0.0, max=28714849.0), HTML(value='')))




In [56]:
preds = []
for model in models:
    pred = model.predict_proba(x_test)[:, 1]
    preds.append(pred)
pred = np.mean(preds, axis=0)

submission = pd.read_csv('data/sample_submission.csv', index_col=0)
submission['winner'] = submission['winner'] + pred
submission.to_csv('mini_submission.csv')
submission.head()

Unnamed: 0_level_0,winner
game_id,Unnamed: 1_level_1
38872,0.546101
38873,0.475787
38874,0.481688
38875,0.458655
38876,0.4717
