# 변수 모으기

user_id 기반으로 유의미한 feature를 하나씩 모아보겠습니다. 다 합쳐서 써보죠 뭐.

# IMPORT & LOAD DATA

In [1]:
import load_dtypes as ld
import warnings
warnings.filterwarnings(action='ignore')

import os, sys
import time
import datetime as dt

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import auc, roc_auc_score, roc_curve, precision_recall_curve, recall_score, precision_score

import lightgbm as lgb
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import StratifiedKFold

In [2]:
#-------------------------------------------------------------------------------------
# validation auc score를 확인하기 위해 정의
def f_pr_auc(probas_pred, y_true):
    labels=y_true.get_label()
    p, r, _ = precision_recall_curve(labels, probas_pred)
    score=auc(r,p) 
    return "pr_auc", score, True
#-------------------------------------------------------------------------------------

In [3]:
TRAIN_P_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/train_problem_data.csv'
TRAIN_Q_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/train_quality_data.csv'
TRAIN_E_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/train_err_data.csv'
TEST_Q_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/test_quality_data.csv'
TEST_E_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/test_err_data.csv'
SUBMISSION_PATH = r'C:\Users\Wyatt\wyatt37/Data/systemError/sample_submission.csv'

In [4]:
%%time
train_p = ld.load_dtypes(TRAIN_P_PATH)
train_q = ld.load_dtypes(TRAIN_Q_PATH)
train_e = ld.load_dtypes(TRAIN_E_PATH)
test_q = ld.load_dtypes(TEST_Q_PATH)
test_e = ld.load_dtypes(TEST_E_PATH)
submission = pd.read_csv(SUBMISSION_PATH)

C:\Users\Wyatt\wyatt37/Data/systemError/train_problem_data.csv
C:\Users\Wyatt\wyatt37/Data/systemError/train_quality_data.csv
C:\Users\Wyatt\wyatt37/Data/systemError/train_err_data.csv
C:\Users\Wyatt\wyatt37/Data/systemError/test_quality_data.csv
C:\Users\Wyatt\wyatt37/Data/systemError/test_err_data.csv
Wall time: 1min 17s


In [5]:
train_p.shape, train_q.shape, train_e.shape, test_q.shape, test_e.shape, submission.shape

((5429, 2),
 (828624, 16),
 (16554663, 6),
 (747972, 16),
 (16532648, 6),
 (14999, 2))

In [6]:
def preprocessing_problem(df, object_='binary'):
    """
    definition:
    train_problem 테이블을 받아서 target 값으로 변환
    1. {0, 1}의 binary로 변환
    2. {0 ~ n}의 multiclass로 변환
    """
    
    # 10001부터 24999까지의 index를 만들어줍니다.
    user_id_idx = np.array(range(10000, 25000, 1))
    
    # train_new_p라는 새로운 df를 만들고 index는 위에서 만든 user_id_idx 로 지정해줍니다.
    new_p = pd.DataFrame(index = user_id_idx)
    new_p['target'] = 0
    
    if object_ == 'binary':
        new_p.iloc[df.user_id.unique()-10000] = 1
        new_p = new_p.reset_index()
        new_p.rename({'index':'user_id'}, axis=1, inplace=True)
        
    elif object_ == 'multi':
        # multi는 count()로 집계를 해줍니다.
        new_p['target'] = df.groupby('user_id')['time'].count()
        new_p = new_p.fillna(0)
        new_p = new_p.reset_index()
        new_p.rename({'index':'user_id'}, axis=1, inplace=True)
        
    return new_p

In [7]:
train_b_p = preprocessing_problem(train_p, 'binary')
train_m_p = preprocessing_problem(train_p, 'multi')

In [8]:
def preprocessing_quality(df):
    """
    definition:
    EDA를 통해 알아낸 정보로 train_q, test_q를 정리해서 내뿜어줍니다.
    1. qaulity_3, quality_4 를 drop 합니다.(단일 value)
    2. qaulity_k 변수들을 정수로 encoding 합니다.
    """
    # 먼저 3, 4번을 drop 합니다.
    df.drop(['quality_3', 'quality_4'], axis=1, inplace=True)
    
    # qual 변수만 할당해주고, 정수로 형변환 해줍니다.
    columns = train_q.columns[train_q.columns.str.contains('quality')]
    # for문을 통해 각 column을 반복 작업해줍니다.
    for col in columns:
        try:
            if df[col].dtype == 'float32': # 기존에 float은 패스
                df[col] = df[col].fillna(-2)
            elif df[col].dtype == 'int8' or df[col].dtype == 'int16': # 기존에 int도 패스
                df[col] = df[col].fillna(-2)
                #print(col)
            else:
                df[col] = df[col].astype('object')
                # nan값이 있으면 float으로 갈 수 없으니 '-2' 으로 채워줍니다.
                df[col] = df[col].fillna('-2')
                df[col] = df[col].apply(lambda x: x.replace(',' , ''))
                df[col] = df[col].astype(np.float32)
        except:
            pass
        
    # fwver 에서 null 값이 꽤 있습니다. missing으로 채우겠습니다.
    df.fwver = df.fwver.astype('object')
    df.fwver = df.fwver.fillna('missing')
    df.fwver = df.fwver.astype('category')
        
    return df

In [9]:
train_q = preprocessing_quality(train_q)
test_q = preprocessing_quality(test_q)

In [10]:
def preprocessing_fwver(df):
    """
    definition:
    별건 아니고, e-set에 fwver 변수에서 '10' 이라는 값이 있는데, 이게 errtype이랑 겹쳐요.
    그래서 10을 -> 8.5.2 으로 바꿔줄 겁니다.
    굳이 이렇게 바꾸는 이유는, 해당 fw가 8.5.3버전과 같은 model_nm을 공유하기 때문입니다.
    """
    df.fwver = df.fwver.replace('10', '8.5.2')
        
    return df

In [11]:
train_e = preprocessing_fwver(train_e)
test_e = preprocessing_fwver(test_e)

In [12]:
def make_datetime(df, column_name):
    # df 와 column_name을 받아서 datetime 컬럼을 반환해주는 함수
    
    df['year'] = df[column_name].apply(lambda x: str(x)[:4])
    df['month'] = df[column_name].apply(lambda x: str(x)[4:6])
    df['day'] = df[column_name].apply(lambda x: str(x)[6:8])
    df['hour'] = df[column_name].apply(lambda x: str(x)[8:10])
    df['minute'] = '00' # minute을 넣어주지 않으면 datetime이 완성이 안되니, 00으로 넣어줍니다.
    
    df[column_name] = pd.to_datetime(df.year + df.month + df.day + df.hour + df.minute)
    
    return df

In [13]:
train_p = make_datetime(train_p, 'time')
train_q = make_datetime(train_q, 'time')
test_q = make_datetime(test_q, 'time')
train_e = make_datetime(train_e, 'time')
test_e = make_datetime(test_e, 'time')

# Feature Collection

## err_count based on user_id by train_e

유저 당 err가 몇 번이나 떴는가에 대한 단순 집계입니다. 시리즈로 뽑겠습니다.

In [15]:
train_err_count = train_e.groupby('user_id')['errcode'].count()
test_err_count = test_e.groupby('user_id')['errcode'].count()

In [16]:
train_err_count

user_id
10000     317
10001    2365
10002     306
10003     306
10004     777
         ... 
24995     194
24996       4
24997     826
24998     155
24999     570
Name: errcode, Length: 15000, dtype: int64

In [17]:
test_err_count

user_id
30000     2750
30001      284
30002      941
30003      371
30004      881
         ...  
44994     1115
44995      515
44996     2233
44997    24671
44998      873
Name: errcode, Length: 14998, dtype: int64

## ddddddddddddd

In [22]:
train_e.groupby(['user_id', 'model_nm']).count().reset_index().dropna()

Unnamed: 0,user_id,model_nm,time,fwver,errtype,errcode,year,month,day,hour,minute
3,10000,model_3,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0,317.0
11,10001,model_2,2365.0,2365.0,2365.0,2365.0,2365.0,2365.0,2365.0,2365.0,2365.0
21,10002,model_3,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
29,10003,model_2,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
36,10004,model_0,777.0,777.0,777.0,777.0,777.0,777.0,777.0,777.0,777.0
...,...,...,...,...,...,...,...,...,...,...,...
134957,24995,model_2,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0,194.0
134967,24996,model_3,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
134973,24997,model_0,826.0,826.0,826.0,826.0,826.0,826.0,826.0,826.0,826.0
134982,24998,model_0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0


In [29]:
train_e.groupby(['fwver', 'model_nm']).count().dropna().reset_index()

Unnamed: 0,fwver,model_nm,user_id,time,errtype,errcode,year,month,day,hour,minute
0,03.11.1149,model_4,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0,2114.0
1,03.11.1167,model_4,1505659.0,1505659.0,1505659.0,1505659.0,1505659.0,1505659.0,1505659.0,1505659.0,1505659.0
2,04.16.3553,model_1,5237816.0,5237816.0,5237816.0,5237816.0,5237816.0,5237816.0,5237816.0,5237816.0,5237816.0
3,04.16.3571,model_1,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0
4,04.22.1684,model_0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0
5,04.22.1750,model_0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0
6,04.22.1778,model_0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0
7,04.33.1149,model_2,3272.0,3272.0,3272.0,3272.0,3272.0,3272.0,3272.0,3272.0,3272.0
8,04.33.1171,model_2,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0
9,04.33.1185,model_2,963736.0,963736.0,963736.0,963736.0,963736.0,963736.0,963736.0,963736.0,963736.0


In [44]:
test_e.groupby(['fwver', 'model_nm'])['user_id'].sum().dropna().reset_index()

Unnamed: 0,fwver,model_nm,user_id
0,03.11.1167,model_4,46781890000.0
1,04.16.3553,model_1,199892600000.0
2,04.16.3571,model_1,5543469000.0
3,04.22.1684,model_0,178120000.0
4,04.22.1750,model_0,111028400000.0
5,04.22.1778,model_0,51416340000.0
6,04.33.1149,model_2,198294700.0
7,04.33.1185,model_2,36810170000.0
8,04.33.1261,model_2,97021490000.0
9,05.15.2120,model_3,16776930.0


In [70]:
train_e.groupby(['model_nm', 'fwver']).count().dropna().reset_index().sort_values('model_nm')

Unnamed: 0,model_nm,fwver,user_id,time,errtype,errcode,year,month,day,hour,minute
0,model_0,04.22.1684,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0,5554.0
1,model_0,04.22.1750,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0,2874213.0
2,model_0,04.22.1778,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0,1293946.0
3,model_0,04.22.1666,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
4,model_0,04.22.1442,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0
5,model_0,04.22.1656,39.0,39.0,39.0,39.0,39.0,39.0,39.0,39.0,39.0
11,model_1,04.16.3345,101.0,101.0,101.0,101.0,101.0,101.0,101.0,101.0,101.0
9,model_1,04.16.3569,980.0,980.0,980.0,980.0,980.0,980.0,980.0,980.0,980.0
10,model_1,04.16.2641,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0,296.0
7,model_1,04.16.3571,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0,145156.0


In [32]:
test_e.groupby(['model_nm', 'fwver']).count().dropna().reset_index()

Unnamed: 0,model_nm,fwver,user_id,time,errtype,errcode,year,month,day,hour,minute
0,model_0,04.22.1684,4967.0,4967.0,4967.0,4967.0,4967.0,4967.0,4967.0,4967.0,4967.0
1,model_0,04.22.1750,2969638.0,2969638.0,2969638.0,2969638.0,2969638.0,2969638.0,2969638.0,2969638.0,2969638.0
2,model_0,04.22.1778,1368279.0,1368279.0,1368279.0,1368279.0,1368279.0,1368279.0,1368279.0,1368279.0,1368279.0
3,model_0,04.22.1478,535.0,535.0,535.0,535.0,535.0,535.0,535.0,535.0,535.0
4,model_0,04.22.1656,835.0,835.0,835.0,835.0,835.0,835.0,835.0,835.0,835.0
5,model_0,04.22.1666,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0
6,model_0,04.22.1608,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0
7,model_0,04.22.1448,840.0,840.0,840.0,840.0,840.0,840.0,840.0,840.0,840.0
8,model_0,10.22.1770,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0
9,model_0,10.22.1780,3804.0,3804.0,3804.0,3804.0,3804.0,3804.0,3804.0,3804.0,3804.0


In [46]:
train_e.groupby(['model_nm', 'errtype'])['errtype'].count().unstack()

errtype,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,38,39,40,41,42
model_nm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
model_0,108,3775,0,2596,395563,10164,12037,47,47,20325,85231,87733,2911,54128,545830,394076,5340,958,249,841,248,617067,574183,10960,645,158050,1621,1625,123,737022,4402,68781,71144,3391,4249,4253,1432,8222,243220,35289,8393
model_1,69,655,3,2720,220544,8985,9807,10,23,15059,56414,59574,8439,37596,272336,218642,25268,340,108,292,108,993653,1490911,16651,223,78966,943,947,53,521305,1110834,42544,47470,2359,960,961,1450,2175,120399,14446,249
model_2,78,18161,0,284,303689,14847,15759,43,26,6670,72628,76331,6983,75087,519375,373806,7160,1034,242,920,243,249246,206188,3713,305,211279,982,988,101,718178,13039,66566,44839,4390,4041,4056,1499,5370,365314,54086,25708
model_3,208,3363,29483,998177,14762,12606,12068,33,12,85283,74734,78023,3689,71637,212525,212155,3994,308,57,0,0,0,0,7666,0,83154,1703,1705,93,0,0,0,0,0,0,0,0,0,0,0,0
model_4,21082,0,881,282871,1081,3637,2031,31,1,5584,15161,15530,548,3499,39393,38094,3778,0,0,0,0,0,0,0,0,0,535547,539033,20,0,0,0,0,0,0,0,0,0,0,0,0
model_5,0,122,0,16,4766,88,104,0,0,21,699,726,30,161,5106,3725,68,6,1,6,1,1734,989,7,0,1376,8,8,4,7981,318,519,1640,40,40,39,150,53,1359,301,21
model_7,0,0,326,5733,5118,111,142,2,0,375,1413,1496,101,670,5656,4790,123,17,6,15,6,2025,2905,30,8,2035,0,0,4,510,1347,36,262,3,15,15,4,0,38,16,3
model_8,0,30,65,4353,5651,44,45,0,0,86,750,768,142,9237,4730,4088,278,8,1,6,1,1636,1339,44,3,1541,0,0,4,298,624,31,181,4,12,12,4,0,48,0,0
model_6,0,0,0,0,1746,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [47]:
train_e.groupby(['fwver', 'errtype'])['errtype'].count().unstack()

errtype,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,38,39,40,41,42
fwver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
03.11.1149,90,0,0,399,26,29,11,0,0,0,0,3,0,14,0,0,0,0,0,0,0,0,0,0,0,0,767,775,0,0,0,0,0,0,0,0,0,0,0,0,0
03.11.1167,20989,0,881,282472,1054,3605,2018,31,1,5584,15161,15527,548,3482,39393,38094,3778,0,0,0,0,0,0,0,0,0,534772,538250,19,0,0,0,0,0,0,0,0,0,0,0,0
04.16.3553,69,627,0,2596,215494,8868,9686,10,23,14731,54873,57994,8404,35899,264147,212222,24619,282,82,251,82,970609,1468296,16589,171,74951,931,934,47,506653,1073268,41282,36267,2304,452,453,1371,1633,116527,13965,154
04.16.3571,0,28,0,0,5049,113,114,0,0,324,1506,1544,34,1683,8085,6319,648,58,26,41,26,22716,22251,62,52,3960,10,11,6,14550,37367,1250,11193,54,508,508,77,542,3868,481,92
04.22.1684,1,0,0,7,1321,22,28,0,0,20,23,29,9,291,139,134,8,1,0,2,0,22,21,0,1,15,11,11,0,2930,0,4,54,15,0,0,131,0,273,31,0
04.22.1750,83,2461,0,2075,279747,7255,8961,40,26,14541,58650,60456,1988,41722,378368,273167,3051,645,165,574,164,418553,406403,8171,439,83823,987,993,85,510162,3104,46718,54132,2408,83,90,1009,5632,173898,23316,68
04.22.1778,24,1314,0,24,114488,2873,3037,7,21,5763,26498,27179,900,12098,167034,120600,2281,310,84,264,84,197832,167060,2789,204,74159,622,620,38,223930,1298,22059,16958,968,4166,4163,291,2590,69049,11942,8325
04.33.1149,1,0,0,0,316,14,14,0,0,4,88,94,6,44,534,279,5,23,1,23,1,131,68,0,0,122,0,0,2,654,0,182,188,3,0,10,32,6,332,95,0
04.33.1171,0,0,0,0,0,0,1,0,0,0,1,1,0,5,6,4,0,1,0,1,0,0,0,0,0,2,0,0,0,28,0,3,0,0,0,0,0,0,4,1,0
04.33.1185,11,10869,0,13,89893,4724,5076,7,18,2122,19547,20751,1783,19287,142533,101166,1636,247,33,223,33,71001,66507,502,56,34059,311,315,28,197251,4078,17509,11380,1367,195,201,478,1206,122581,14724,15


In [73]:
train_b_p

Unnamed: 0,user_id,target
0,10000,0
1,10001,1
2,10002,0
3,10003,0
4,10004,1
...,...,...
14995,24995,0
14996,24996,0
14997,24997,1
14998,24998,1


In [89]:
train_e

Unnamed: 0,user_id,time,model_nm,fwver,errtype,errcode,year,month,day,hour,minute
0,10000,2020-11-01 02:00:00,model_3,05.15.2138,15,1,2020,11,01,02,00
1,10000,2020-11-01 03:00:00,model_3,05.15.2138,12,1,2020,11,01,03,00
2,10000,2020-11-01 03:00:00,model_3,05.15.2138,11,1,2020,11,01,03,00
3,10000,2020-11-01 05:00:00,model_3,05.15.2138,16,1,2020,11,01,05,00
4,10000,2020-11-01 05:00:00,model_3,05.15.2138,4,0,2020,11,01,05,00
...,...,...,...,...,...,...,...,...,...,...,...
16554658,24999,2020-11-30 16:00:00,model_3,05.15.2138,15,1,2020,11,30,16,00
16554659,24999,2020-11-30 17:00:00,model_3,05.15.2138,16,1,2020,11,30,17,00
16554660,24999,2020-11-30 17:00:00,model_3,05.15.2138,4,0,2020,11,30,17,00
16554661,24999,2020-11-30 17:00:00,model_3,05.15.2138,4,0,2020,11,30,17,00


In [122]:
train_e.groupby(['hour','errtype'])['errtype'].count().unstack().fillna(0)

errtype,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,38,39,40,41,42
hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
0,91.0,1640.0,1025.0,46989.0,40875.0,1750.0,1712.0,5.0,1.0,31690.0,4708.0,4846.0,873.0,10081.0,73849.0,25269.0,1272.0,23.0,16.0,21.0,16.0,67577.0,90924.0,1489.0,18.0,146101.0,14544.0,14735.0,0.0,47978.0,52679.0,1098.0,4077.0,391.0,38.0,38.0,177.0,1131.0,28712.0,3894.0,17.0
1,48.0,1575.0,1061.0,43735.0,26982.0,1889.0,1711.0,5.0,4.0,3169.0,4014.0,4347.0,813.0,8229.0,45575.0,16540.0,539.0,22.0,12.0,13.0,12.0,54669.0,72663.0,1181.0,13.0,21237.0,12947.0,13036.0,0.0,33218.0,54191.0,817.0,2544.0,291.0,20.0,20.0,111.0,844.0,20156.0,2039.0,10.0
2,4874.0,926.0,1033.0,49389.0,20453.0,2000.0,1989.0,0.0,1.0,9564.0,45338.0,45709.0,937.0,8608.0,121253.0,12350.0,392.0,12.0,4.0,8.0,4.0,94828.0,70801.0,1232.0,15.0,10299.0,15679.0,15786.0,0.0,42861.0,49223.0,32287.0,1340.0,253.0,1769.0,1769.0,96.0,815.0,17053.0,1193.0,9025.0
3,4814.0,777.0,1204.0,49491.0,15700.0,2307.0,2063.0,0.0,0.0,3693.0,49176.0,49649.0,817.0,10429.0,120837.0,11390.0,532.0,4.0,1.0,4.0,1.0,94043.0,67488.0,1157.0,3.0,8231.0,17487.0,17665.0,0.0,41694.0,53642.0,34695.0,1246.0,207.0,2120.0,2117.0,79.0,404.0,11506.0,757.0,9530.0
4,5047.0,634.0,1361.0,51680.0,13469.0,1951.0,1917.0,0.0,1.0,2950.0,50550.0,51066.0,993.0,10758.0,120680.0,14945.0,570.0,4.0,1.0,3.0,1.0,91066.0,61076.0,1320.0,0.0,10485.0,17693.0,17934.0,0.0,43906.0,44430.0,35735.0,1170.0,98.0,2263.0,2265.0,91.0,353.0,11232.0,615.0,10111.0
5,2600.0,499.0,1748.0,54072.0,12153.0,2517.0,2503.0,0.0,2.0,2786.0,28990.0,30197.0,1454.0,11112.0,73448.0,26291.0,1154.0,5.0,4.0,4.0,4.0,62097.0,51706.0,1090.0,0.0,19031.0,19819.0,19918.0,0.0,45654.0,39647.0,19494.0,1339.0,83.0,1418.0,1421.0,88.0,312.0,14260.0,569.0,4857.0
6,141.0,385.0,1488.0,55515.0,12162.0,2193.0,2163.0,3.0,3.0,2751.0,5536.0,6596.0,1462.0,10557.0,29009.0,48502.0,2665.0,16.0,12.0,7.0,5.0,39487.0,51203.0,1107.0,4.0,34024.0,17649.0,17851.0,1.0,62428.0,37075.0,2307.0,3154.0,118.0,176.0,176.0,98.0,268.0,21807.0,763.0,10.0
7,186.0,582.0,1560.0,64503.0,21078.0,2323.0,2280.0,0.0,1.0,2881.0,6941.0,8048.0,1580.0,10937.0,46004.0,78625.0,4580.0,19.0,6.0,9.0,6.0,50128.0,65812.0,1459.0,20.0,51775.0,21606.0,21756.0,0.0,104694.0,39719.0,3512.0,5422.0,158.0,162.0,164.0,160.0,435.0,32080.0,2032.0,18.0
8,233.0,897.0,1050.0,61840.0,30404.0,1751.0,1661.0,2.0,2.0,3028.0,7068.0,7541.0,902.0,9965.0,64069.0,79518.0,4258.0,67.0,28.0,46.0,26.0,64395.0,86004.0,1589.0,36.0,44162.0,21387.0,21479.0,1.0,111260.0,42441.0,3248.0,7517.0,244.0,117.0,116.0,154.0,524.0,35753.0,3749.0,33.0
9,186.0,598.0,1120.0,58227.0,39128.0,1859.0,1750.0,8.0,1.0,2973.0,6724.0,7157.0,1073.0,10828.0,61565.0,64650.0,3052.0,147.0,33.0,114.0,32.0,67380.0,89938.0,1496.0,86.0,29165.0,24723.0,24922.0,32.0,96318.0,40916.0,3141.0,7357.0,387.0,131.0,132.0,176.0,573.0,33654.0,4069.0,40.0


In [80]:
temp = train_e.groupby(['user_id', 'fwver'])['errcode'].count().reset_index()

In [83]:
temp_2 = temp[temp.errcode != 0]

In [90]:
temp_3 = pd.merge(train_e, train_b_p, how='outer', on=['user_id'])

In [105]:
temp_4 = temp_3.groupby(['fwver', 'user_id'])['target'].max().reset_index()

In [117]:
temp_5 = temp_3.groupby(['model_nm', 'user_id'])['target'].max().reset_index()

In [111]:
temp_4.groupby('fwver').sum()

Unnamed: 0_level_0,user_id,target
fwver,Unnamed: 1_level_1,Unnamed: 2_level_1
03.11.1149,262492500,0.0
03.11.1167,262492500,141.0
04.16.3553,262492500,1314.0
04.16.3571,262492500,295.0
04.22.1684,262492500,28.0
04.22.1750,262492500,1630.0
04.22.1778,262492500,1586.0
04.33.1149,262492500,112.0
04.33.1171,262492500,1.0
04.33.1185,262492500,1128.0


In [118]:
temp_5.groupby('model_nm').sum()

Unnamed: 0_level_0,user_id,target
model_nm,Unnamed: 1_level_1,Unnamed: 2_level_1
model_0,262492500,1637.0
model_1,262492500,1318.0
model_2,262492500,1744.0
model_3,262492500,720.0
model_4,262492500,141.0
model_5,262492500,20.0
model_7,262492500,32.0
model_8,262492500,22.0
model_6,262492500,9.0
