In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.decomposition import LatentDirichletAllocation, NMF, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import f1_score, fbeta_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import warnings
from datetime import datetime
import lightgbm as lgb

warnings.filterwarnings('ignore')

In [None]:
d = pd.read_pickle('train_total_2.pkl')
df_ = pd.read_csv('label.csv')
pos_uin = list(df_[df_['label'] == 1]['uin'])
neg_uin = list(df_[df_['label'] == 0]['uin'])
sample_uin = neg_uin[18723:26723]
df = d[d['uin'].isin(sample_uin)]
del d

In [5]:
f = 'pitch_r'
df_temp = df.groupby(['uin', 'kill_time'])[f'{f}'].agg([
    (f'{f}_range_diff', 'diff'), 
     ])
df[f'{f}_range_diff'] = df_temp[f'{f}_range_diff']
agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['uin', 'kill_time', f'{f}_range_diff']]
    df_temp = df__.groupby(['kill_time'])[f'{f}_range_diff'].agg([
    (f'{f}_range_max', 'max'),     
    (f'{f}_range_min', 'min'),
    (f'{f}_range_mean', 'mean'),
    (f'{f}_range_std', 'std'),
    (f'{f}_range_skew', 'skew'),
        ]).reset_index()
    df_temp['uin'] = uin
    agg_list.append(df_temp)
del df[f'{f}_range_diff']
df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin'], how='left')
del df_temp

100%|██████████████████████████████████████| 8000/8000 [06:40<00:00, 19.99it/s]


In [6]:
f = 'yaw_r'
df_temp = df.groupby(['uin', 'kill_time'])[f'{f}'].agg([
    (f'{f}_range_diff', 'diff'), 
     ])
df[f'{f}_range_diff'] = df_temp[f'{f}_range_diff']
agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['uin', 'kill_time', f'{f}_range_diff']]
    df_temp = df__.groupby(['kill_time'])[f'{f}_range_diff'].agg([
    (f'{f}_range_max', 'max'),     
    (f'{f}_range_min', 'min'),
    (f'{f}_range_mean', 'mean'),
    (f'{f}_range_std', 'std'),
    (f'{f}_range_skew', 'skew'),
        ]).reset_index()
    df_temp['uin'] = uin
    agg_list.append(df_temp)
del df[f'{f}_range_diff']

df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
del df_temp

100%|██████████████████████████████████████| 8000/8000 [07:16<00:00, 18.32it/s]


In [7]:
agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['kill_time', 'weapon_id']]
    df_temp = df__.groupby(['kill_time'])['weapon_id'].agg([
        ('weapon_id_nunique', 'nunique'),     
        ])
    df_temp['uin'] = uin
    df_temp = pd.merge(df_temp, df__.drop_duplicates('kill_time'), on=['kill_time'], how='left')
    agg_list.append(df_temp)
df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
del df_temp

100%|██████████████████████████████████████| 8000/8000 [07:12<00:00, 18.50it/s]


In [8]:
for f in ['yaw_r', 'pitch_r']:
    agg_list = []
    for uin in tqdm(set(df['uin'])):
        df__ = df[df['uin'] == uin][['uin', 'kill_time', f]]
        df_temp = df__.groupby(['kill_time'])[f].agg([
        (f'{f}_max', 'max'),     
        (f'{f}_min', 'min'),
        (f'{f}_mean', 'mean'),
        (f'{f}_std', 'std'),
        (f'{f}_skew', 'skew'),
            ]).reset_index()
        df_temp['uin'] = uin
        agg_list.append(df_temp)
    df_temp = pd.concat(agg_list, axis=0)
    del agg_list
    df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
    del df_temp

100%|██████████████████████████████████████| 8000/8000 [07:17<00:00, 18.30it/s]
100%|██████████████████████████████████████| 8000/8000 [07:16<00:00, 18.32it/s]


In [9]:
agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['kill_time', 'type', 'index']]
    df__.reset_index(drop=True, inplace=True)
    
    df_temp = df__.groupby(['kill_time'])['type'].agg([
        ('type_nunique', 'nunique'),     
        ])
    df_temp['uin'] = uin
    agg_list.append(df_temp)
df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
del df_temp

100%|██████████████████████████████████████| 8000/8000 [06:39<00:00, 20.04it/s]


In [10]:
target = 'type'
df_w2v = pd.read_csv('type_w2v.csv')
df = pd.merge(df, df_w2v, on=target, how='left')

agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['kill_time'] +['w2v_%s_%d'%(target, x) for x in range(10)]]
    df__.reset_index(drop=True, inplace=True)
    df_temp = df__.drop_duplicates('kill_time')[['kill_time']]
    for i in range(10):
        d = df__.groupby(['kill_time'])['w2v_%s_%d'%(target, i)].agg([
            (f'{target}_w2v_mean_{i}', 'mean'),     
            ])
        df_temp = pd.merge(df_temp, d, on='kill_time', how='left')
    df_temp['uin'] = uin
    agg_list.append(df_temp)
df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
del df_temp

100%|██████████████████████████████████████| 8000/8000 [12:20<00:00, 10.80it/s]


In [11]:
df_temp = df_.groupby('uin')['kill_time'].agg([
        ('kill_time_max', 'max'),
        ('kill_time_min', 'min'),    
        ('kill_time_range_max', lambda x: x.diff().max()), 
        ('kill_time_range_min', lambda x: x.diff().min()),
        ('kill_time_range_mean', lambda x: x.diff().mean()),
        ('kill_time_range_std', lambda x: x.diff().std()),
        ('kill_time_range_skew', lambda x: x.diff().skew()),
        ])
df_ = pd.merge(df_, df_temp, on='uin', how='left')
df_['kill_time_diff'] = df_['kill_time_max'] - df_['kill_time_min']
df_['kill_count'] = df_.groupby('uin')['kill_time'].transform('count')
df_['kill_time_ratio'] = df_['kill_time_diff'] / df_['kill_count']
del df_temp

In [12]:
df['yaw_difference'] = df['yaw'] - df['yaw_r']
f = 'yaw'
df_temp = df.groupby(['uin', 'kill_time'])[f'{f}'].agg([
    (f'{f}_range_diff', 'diff'), 
     ])
df[f'{f}_range_diff'] = df_temp[f'{f}_range_diff']
agg_list = []
for uin in tqdm(set(df['uin'])):
    df__ = df[df['uin'] == uin][['uin', 'kill_time', f'{f}_range_diff']]
    df_temp = df__.groupby(['kill_time'])[f'{f}_range_diff'].agg([
    (f'{f}_range_max', 'max'),     
    (f'{f}_range_min', 'min'),
    (f'{f}_range_mean', 'mean'),
    (f'{f}_range_std', 'std'),
    (f'{f}_range_skew', 'skew'),
        ]).reset_index()
    df_temp['uin'] = uin
    agg_list.append(df_temp)
del df[f'{f}_range_diff']

df_temp = pd.concat(agg_list, axis=0)
del agg_list
df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
del df_temp

for f in ['yaw', 'yaw_difference']:
    agg_list = []
    for uin in tqdm(set(df['uin'])):
        df__ = df[df['uin'] == uin][['uin', 'kill_time', f]]
        df_temp = df__.groupby(['kill_time'])[f].agg([
        (f'{f}_max', 'max'),     
        (f'{f}_min', 'min'),
        (f'{f}_mean', 'mean'),
        (f'{f}_std', 'std'),
        (f'{f}_skew', 'skew'),
            ]).reset_index()
        df_temp['uin'] = uin
        agg_list.append(df_temp)
    df_temp = pd.concat(agg_list, axis=0)
    del agg_list
    df_ = pd.merge(df_, df_temp, on=['uin', 'kill_time'], how='left')
    del df_temp

100%|██████████████████████████████████████| 8000/8000 [07:24<00:00, 18.01it/s]
100%|██████████████████████████████████████| 8000/8000 [07:20<00:00, 18.16it/s]
100%|██████████████████████████████████████| 8000/8000 [06:48<00:00, 19.60it/s]


In [13]:
df_.to_pickle('train_feature_30000_2.pkl')