## 종족전 분리된 데이터로 예측

In [2]:
import pandas as pd

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
from tqdm import tqdm 

In [59]:
PT_train = pd.read_csv("PT.csv")

In [10]:
#Feature Engineering
def species_converter(string):
    if string == 'T':
        return 0
    elif string == 'P':
        return 1
    elif string == 'Z':
        return 2
    else:
        raise ValueError

def convert(time):
    second = int(time * 100) % 100
    minutes = int(time)
    tot_sec = minutes * 60 + second

    return tot_sec

def convert_outlier(time):
    if time > 29.0:
        return 50.0
    return time


In [70]:
PT_train

Unnamed: 0,game_id,winner,time,player,species,event,event_contents
0,1,1,0.00,1,T,Camera,"at (28.0, 18.5078125)"
1,1,1,0.00,0,P,Camera,"at (140.0, 162.0078125)"
2,1,1,0.00,1,T,Selection,['OrbitalCommand [3340001]']
3,1,1,0.01,0,P,Selection,['Nexus [3000001]']
4,1,1,0.01,1,T,Ability,(1360) - TrainSCV
...,...,...,...,...,...,...,...
14785834,38868,0,9.58,0,T,Camera,"at (28.0, 33.62109375)"
14785835,38868,0,9.59,0,T,Camera,"at (28.0, 35.234375)"
14785836,38868,0,9.59,0,T,Camera,"at (28.0, 38.6640625)"
14785837,38868,0,9.59,0,T,Camera,"at (28.0, 39.15234375)"


In [81]:
winners = PT_train.groupby(['game_id']).winner.max()

In [71]:
species = PT_train.groupby(['game_id', 'player']).species.unique()

In [82]:
winners

game_id
1        1
3        0
6        1
10       0
16       0
        ..
38860    0
38861    0
38863    0
38867    1
38868    0
Name: winner, Length: 8691, dtype: int64

In [83]:
species

game_id  player
1        0         [P]
         1         [T]
3        0         [T]
         1         [P]
6        0         [P]
                  ... 
38863    1         [T]
38867    0         [T]
         1         [P]
38868    0         [T]
         1         [P]
Name: species, Length: 17382, dtype: object

In [69]:
winners.unique()

array(['T'], dtype=object)

In [104]:
def data_preparation_species(df,species_dict={0: 'T',1: 'P'},covert_dict={'T': 0,'P': 1} ,answer=False):
    df['time'] = df['time'].apply(convert)
    df['time_diff'] = df.sort_values(['game_id', 'species', 'time']).groupby(['game_id', 'species'])['time'].diff()
    df['time_diff'] = df['time_diff'].fillna(0.0)
    df['time_diff'] = df['time_diff'].apply(convert_outlier)
    df['time_diff'] = df['time_diff'].astype(int).astype(str)


    game_ids = df['game_id'].unique()
    events = ['Ability', 'AddToControlGroup', 'Camera', 'ControlGroup', 'GetControlGroup', 'Right Click', 'Selection', 'SetControlGroup']
    times = []
    for i in range(30):
        times.append(str(i))
    times.append(str(50))

    #print(times)

    unique_event_S0, unique_event_S1, delta_event = {}, {}, {}
    
    for event in events:
        unique_event_S0[species_dict[0]+'_' + event] = 0
        unique_event_S1[species_dict[1]+'_' + event] = 0
        delta_event['delta_' + event] = 0
        
    for time in times:
        unique_event_S0[species_dict[0]+'_' + time] = 0
        unique_event_S1[species_dict[1]+'_' + time] = 0
        delta_event['delta_' + time] = 0
        
   
    event_count = df.groupby(['game_id', 'species']).event.value_counts()
    time_count = df.groupby(['game_id', 'species']).time_diff.value_counts()

    #print(time_count[:50])
    if answer:
        winners = df.groupby(['game_id']).winner.max()
        species_player=df.groupby(['game_id', 'player']).species.unique()
    player_dict = df.groupby(['game_id', 'species']).player.unique()
    x_data, y_data  = [], []
    for game_id in tqdm(game_ids):
        df_event_count = event_count[game_id].unstack(level=-1)
        df_time_count = time_count[game_id].unstack(level=-1)
        df = pd.concat([df_event_count, df_time_count], axis=1)   
        df = df.fillna(0)
        df_S0_event = unique_event_S0.copy()
        for column in df.columns:
            df_S0_event[species_dict[0]+'_' + column] = df.loc[species_dict[0]][column]
        df_S0_event = pd.DataFrame(pd.Series(df_S0_event)).T

        df_S1_event = unique_event_S1.copy()
        for column in df.columns:
            df_S1_event[species_dict[1]+'_' + column] = df.loc[species_dict[1]][column]
        df_S1_event = pd.DataFrame(pd.Series(df_S1_event)).T
        
        df_delta_event = delta_event.copy()
        for column in df.columns:
            df_delta_event['delta_' + column] = df_S0_event[species_dict[0]+'_'+ column][0] - df_S1_event[species_dict[1]+'_' + column][0]
        df_delta_event = pd.DataFrame(pd.Series(df_delta_event)).T

        out = pd.concat([df_S0_event, df_S1_event, df_delta_event], axis=1)
        out.index = [game_id]
        out.index.name = 'game_id'
        
        x_data.append(out)
        if answer:
            y_data.append(covert_dict[species_player[game_id][winners[game_id]][0]])  
            
    x_data = pd.concat(x_data)
    y_data = np.array(y_data)
    return x_data, y_data, player_dict

In [97]:
species_dict={0: 'T',1: 'P'}

In [105]:
x_data, y_data, player = data_preparation_species(PT_train,answer=True)

100%|█████████████████████████████████████████████████████████████████████████████| 8691/8691 [00:55<00:00, 155.25it/s]


In [107]:
y_data

array([0, 0, 0, ..., 1, 1, 0])

In [108]:
player

game_id  species
1        P          [0]
         T          [1]
3        P          [1]
         T          [0]
6        P          [0]
                   ... 
38863    T          [1]
38867    P          [1]
         T          [0]
38868    P          [1]
         T          [0]
Name: player, Length: 17382, dtype: object

In [110]:
PT_train[PT_train['game_id']==3]

Unnamed: 0,game_id,winner,time,player,species,event,event_contents,time_diff
2434,3,0,0,0,T,Camera,"at (127.25, 27.5078125)",0
2435,3,0,0,1,P,Camera,"at (24.75, 137.0078125)",0
2436,3,0,777600000,0,T,Selection,['OrbitalCommand [3380001]'],50
2437,3,0,777600000,0,T,Ability,(1360) - TrainSCV,0
2438,3,0,777600000,1,P,Selection,['Nexus [36C0001]'],50
...,...,...,...,...,...,...,...,...
5440,3,0,465004800000,0,T,Right Click,"Location: (117.689208984375, 44.470947265625, ...",50
5441,3,0,465004800000,0,T,Right Click,Target: SupplyDepotLowered [04200001]; Locatio...,0
5442,3,0,465782400000,0,T,Camera,"at (112.55859375, 42.0546875)",50
5443,3,0,465782400000,0,T,Camera,"at (112.55859375, 43.28515625)",0


In [113]:
from catboost import CatBoostClassifier

In [114]:
from sklearn.model_selection import train_test_split

In [115]:
x_train,x_valid ,y_train, y_valid = train_test_split(x_data,y_data)

In [117]:
y_valid

array([0, 0, 1, ..., 1, 1, 0])

In [119]:
d_model= CatBoostClassifier(task_type="GPU")

In [120]:
d_model.fit(x_train,y_train)

Learning rate set to 0.030409
0:	learn: 0.6909465	total: 39.4ms	remaining: 39.3s
1:	learn: 0.6886313	total: 76.3ms	remaining: 38.1s
2:	learn: 0.6864709	total: 113ms	remaining: 37.4s
3:	learn: 0.6845135	total: 151ms	remaining: 37.7s
4:	learn: 0.6827499	total: 188ms	remaining: 37.5s
5:	learn: 0.6811470	total: 226ms	remaining: 37.5s
6:	learn: 0.6795409	total: 262ms	remaining: 37.2s
7:	learn: 0.6777860	total: 300ms	remaining: 37.2s
8:	learn: 0.6760805	total: 337ms	remaining: 37.1s
9:	learn: 0.6745062	total: 373ms	remaining: 37s
10:	learn: 0.6728768	total: 411ms	remaining: 37s
11:	learn: 0.6715107	total: 449ms	remaining: 37s
12:	learn: 0.6703141	total: 486ms	remaining: 36.9s
13:	learn: 0.6690515	total: 524ms	remaining: 36.9s
14:	learn: 0.6678502	total: 561ms	remaining: 36.8s
15:	learn: 0.6667651	total: 597ms	remaining: 36.7s
16:	learn: 0.6656854	total: 636ms	remaining: 36.8s
17:	learn: 0.6645386	total: 673ms	remaining: 36.7s
18:	learn: 0.6635365	total: 710ms	remaining: 36.7s
19:	learn: 0.66

160:	learn: 0.6148404	total: 5.92s	remaining: 30.9s
161:	learn: 0.6146250	total: 5.96s	remaining: 30.8s
162:	learn: 0.6144228	total: 6s	remaining: 30.8s
163:	learn: 0.6141964	total: 6.04s	remaining: 30.8s
164:	learn: 0.6139810	total: 6.07s	remaining: 30.7s
165:	learn: 0.6138952	total: 6.11s	remaining: 30.7s
166:	learn: 0.6137758	total: 6.15s	remaining: 30.7s
167:	learn: 0.6136285	total: 6.18s	remaining: 30.6s
168:	learn: 0.6135903	total: 6.21s	remaining: 30.6s
169:	learn: 0.6133023	total: 6.25s	remaining: 30.5s
170:	learn: 0.6132650	total: 6.28s	remaining: 30.4s
171:	learn: 0.6131124	total: 6.32s	remaining: 30.4s
172:	learn: 0.6129998	total: 6.35s	remaining: 30.4s
173:	learn: 0.6127345	total: 6.38s	remaining: 30.3s
174:	learn: 0.6125729	total: 6.42s	remaining: 30.3s
175:	learn: 0.6124435	total: 6.45s	remaining: 30.2s
176:	learn: 0.6122161	total: 6.49s	remaining: 30.2s
177:	learn: 0.6120234	total: 6.53s	remaining: 30.1s
178:	learn: 0.6117389	total: 6.56s	remaining: 30.1s
179:	learn: 0.6

319:	learn: 0.5977076	total: 11.3s	remaining: 24s
320:	learn: 0.5976530	total: 11.3s	remaining: 23.9s
321:	learn: 0.5976478	total: 11.4s	remaining: 23.9s
322:	learn: 0.5976437	total: 11.4s	remaining: 23.9s
323:	learn: 0.5974136	total: 11.4s	remaining: 23.9s
324:	learn: 0.5971733	total: 11.5s	remaining: 23.8s
325:	learn: 0.5971720	total: 11.5s	remaining: 23.8s
326:	learn: 0.5969088	total: 11.5s	remaining: 23.7s
327:	learn: 0.5968786	total: 11.6s	remaining: 23.7s
328:	learn: 0.5967667	total: 11.6s	remaining: 23.7s
329:	learn: 0.5967643	total: 11.6s	remaining: 23.6s
330:	learn: 0.5965930	total: 11.7s	remaining: 23.6s
331:	learn: 0.5963992	total: 11.7s	remaining: 23.6s
332:	learn: 0.5963822	total: 11.7s	remaining: 23.5s
333:	learn: 0.5963801	total: 11.8s	remaining: 23.5s
334:	learn: 0.5963760	total: 11.8s	remaining: 23.4s
335:	learn: 0.5961710	total: 11.8s	remaining: 23.4s
336:	learn: 0.5961110	total: 11.9s	remaining: 23.4s
337:	learn: 0.5959957	total: 11.9s	remaining: 23.3s
338:	learn: 0.

478:	learn: 0.5787268	total: 17s	remaining: 18.4s
479:	learn: 0.5785150	total: 17s	remaining: 18.4s
480:	learn: 0.5785124	total: 17s	remaining: 18.4s
481:	learn: 0.5785099	total: 17.1s	remaining: 18.3s
482:	learn: 0.5784054	total: 17.1s	remaining: 18.3s
483:	learn: 0.5781862	total: 17.1s	remaining: 18.3s
484:	learn: 0.5781837	total: 17.2s	remaining: 18.2s
485:	learn: 0.5781255	total: 17.2s	remaining: 18.2s
486:	learn: 0.5781228	total: 17.2s	remaining: 18.1s
487:	learn: 0.5781205	total: 17.2s	remaining: 18.1s
488:	learn: 0.5781181	total: 17.3s	remaining: 18.1s
489:	learn: 0.5778404	total: 17.3s	remaining: 18s
490:	learn: 0.5776156	total: 17.4s	remaining: 18s
491:	learn: 0.5776135	total: 17.4s	remaining: 18s
492:	learn: 0.5775190	total: 17.4s	remaining: 17.9s
493:	learn: 0.5775045	total: 17.5s	remaining: 17.9s
494:	learn: 0.5775024	total: 17.5s	remaining: 17.8s
495:	learn: 0.5773139	total: 17.5s	remaining: 17.8s
496:	learn: 0.5771748	total: 17.6s	remaining: 17.8s
497:	learn: 0.5771729	to

637:	learn: 0.5669197	total: 22.6s	remaining: 12.8s
638:	learn: 0.5669193	total: 22.6s	remaining: 12.8s
639:	learn: 0.5669190	total: 22.6s	remaining: 12.7s
640:	learn: 0.5668706	total: 22.7s	remaining: 12.7s
641:	learn: 0.5668700	total: 22.7s	remaining: 12.7s
642:	learn: 0.5667204	total: 22.8s	remaining: 12.6s
643:	learn: 0.5667201	total: 22.8s	remaining: 12.6s
644:	learn: 0.5665230	total: 22.8s	remaining: 12.6s
645:	learn: 0.5664303	total: 22.9s	remaining: 12.5s
646:	learn: 0.5662089	total: 22.9s	remaining: 12.5s
647:	learn: 0.5661541	total: 22.9s	remaining: 12.5s
648:	learn: 0.5661532	total: 23s	remaining: 12.4s
649:	learn: 0.5658681	total: 23s	remaining: 12.4s
650:	learn: 0.5657777	total: 23s	remaining: 12.4s
651:	learn: 0.5657770	total: 23.1s	remaining: 12.3s
652:	learn: 0.5657765	total: 23.1s	remaining: 12.3s
653:	learn: 0.5656258	total: 23.1s	remaining: 12.2s
654:	learn: 0.5652791	total: 23.2s	remaining: 12.2s
655:	learn: 0.5649020	total: 23.2s	remaining: 12.2s
656:	learn: 0.5647

796:	learn: 0.5531291	total: 28.3s	remaining: 7.2s
797:	learn: 0.5530251	total: 28.3s	remaining: 7.17s
798:	learn: 0.5528303	total: 28.4s	remaining: 7.13s
799:	learn: 0.5528235	total: 28.4s	remaining: 7.1s
800:	learn: 0.5527805	total: 28.4s	remaining: 7.06s
801:	learn: 0.5527375	total: 28.5s	remaining: 7.03s
802:	learn: 0.5527150	total: 28.5s	remaining: 6.99s
803:	learn: 0.5526259	total: 28.5s	remaining: 6.96s
804:	learn: 0.5526067	total: 28.6s	remaining: 6.92s
805:	learn: 0.5525595	total: 28.6s	remaining: 6.89s
806:	learn: 0.5524916	total: 28.7s	remaining: 6.85s
807:	learn: 0.5524854	total: 28.7s	remaining: 6.82s
808:	learn: 0.5523778	total: 28.7s	remaining: 6.78s
809:	learn: 0.5520668	total: 28.8s	remaining: 6.75s
810:	learn: 0.5518624	total: 28.8s	remaining: 6.71s
811:	learn: 0.5518146	total: 28.8s	remaining: 6.68s
812:	learn: 0.5517955	total: 28.9s	remaining: 6.64s
813:	learn: 0.5517572	total: 28.9s	remaining: 6.61s
814:	learn: 0.5516172	total: 29s	remaining: 6.57s
815:	learn: 0.55

955:	learn: 0.5405083	total: 34.2s	remaining: 1.57s
956:	learn: 0.5404919	total: 34.2s	remaining: 1.54s
957:	learn: 0.5402763	total: 34.2s	remaining: 1.5s
958:	learn: 0.5402698	total: 34.3s	remaining: 1.46s
959:	learn: 0.5401016	total: 34.3s	remaining: 1.43s
960:	learn: 0.5400375	total: 34.3s	remaining: 1.39s
961:	learn: 0.5399354	total: 34.4s	remaining: 1.36s
962:	learn: 0.5396357	total: 34.4s	remaining: 1.32s
963:	learn: 0.5394929	total: 34.5s	remaining: 1.29s
964:	learn: 0.5394889	total: 34.5s	remaining: 1.25s
965:	learn: 0.5393216	total: 34.5s	remaining: 1.22s
966:	learn: 0.5393048	total: 34.6s	remaining: 1.18s
967:	learn: 0.5392006	total: 34.6s	remaining: 1.14s
968:	learn: 0.5390362	total: 34.6s	remaining: 1.11s
969:	learn: 0.5390317	total: 34.7s	remaining: 1.07s
970:	learn: 0.5390182	total: 34.7s	remaining: 1.04s
971:	learn: 0.5387451	total: 34.7s	remaining: 1s
972:	learn: 0.5386877	total: 34.8s	remaining: 965ms
973:	learn: 0.5385504	total: 34.8s	remaining: 929ms
974:	learn: 0.53

<catboost.core.CatBoostClassifier at 0x22f447b57c8>

In [140]:
pred_y =d_model.predict_proba(x_valid)

In [141]:
pred_y

array([[0.49136233, 0.50863767],
       [0.58460855, 0.41539145],
       [0.38518977, 0.61481023],
       ...,
       [0.46256217, 0.53743783],
       [0.40178034, 0.59821966],
       [0.52065713, 0.47934287]])

In [125]:
from sklearn import metrics

In [142]:
fpr, tpr, thresholds = metrics.roc_curve(y_valid, pred_y[:,1], pos_label=1)

In [145]:
metrics.auc(fpr, tpr)

0.6586088591832067

In [137]:
thresholds

array([1.97395526, 0.97395526, 0.0100682 ])