In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 1000)

%matplotlib inline

In [None]:
train_df = pd.read_csv("../../data/Processed/train2.csv", index_col='id')
test_df = pd.read_csv("../../data/Processed/test2.csv", index_col='id')

train_size = train_df.shape[0]
test_size = test_df.shape[0]
all_size = train_size + test_size

print("train data size : ", train_size)
print("test data size : ", test_size)
print("the ratio of train and test  ", train_size/all_size, " : ", test_size/all_size )
train_df.head()

## カラムの確認

In [None]:
print(train_df.columns)

In [None]:
train_df["period"] = pd.to_datetime(train_df['period'])
test_df["period"] = pd.to_datetime(test_df['period'])

print(train_df["period"].max() == test_df["period"].max())
print(train_df["period"].min() == test_df["period"].min())

In [None]:
train_df["period"].hist()

In [None]:
test_df["period"].hist()

In [None]:
print(train_df["A1-level"].max())
print(train_df["A1-level"].min())
print(train_df["A1-level"].std())

In [None]:
train_df["category1-A1"].unique()

In [None]:
train_df["category2-A1"].unique()

In [None]:
train_df[["category1-A1", "category2-A1"]].head()

In [None]:
train_df["stage"].unique().shape

In [None]:
for mode in train_df["mode"].unique():
    
    rate = train_df[train_df["mode"] == mode]["y"].value_counts().loc[1] / train_df[train_df["mode"] == mode].shape[0]
    print("{} : {}".format(mode, rate))

In [2]:
train = pd.read_csv("../../data/Raw/train_data.csv")
test = pd.read_csv("../../data/Raw/test_data.csv")
data = pd.concat([train, test]).reset_index(drop=True)

In [3]:
def identify_A1(df1, df2):
    all_df = pd.concat([df1, df2]).reset_index(drop=True)
    def get_seq_labels(seq, threshold=0):
        """
        seq : 時系列順のリスト
        threshold : level up のための最低試合数

        [3,3,3,4,4,4,4,4,7,7,7,7,2,2,2,1,1,1,2,8,8,8] : level
         => [1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,4,4,4,3,2,2,2] : player id
        というように、レベルに応じてA1の特定を考えます

        """

        box = np.zeros(len(seq), dtype=int)  # 最終的にラベルが入るボックス
        count = 1  # label

        for _ in (range(1000)):
            # level : 時系列順のレベルでユニークなもの
            # s     : levelの値を格納

            ind = np.where(box == 0)[0][0]
            s = seq[ind]

            renew_box = []
            for i in range(len(seq)):

                if box[i] == 0:

                    if s == seq[i]:
                        box[i] = count
                        renew_box.append(seq[i])

                    elif (s + 1 == seq[i]) and ((np.array(renew_box)==s).sum() >= threshold):  
                        s += 1
                        box[i] = count

                else:
                    continue

            count += 1

            if (box == 0).sum() == 0:
                # box が全部埋まれば break
                break

        return box
    
    all_df = all_df.sort_values(["period", "A1-level"])
    levels = all_df["A1-level"].tolist() # A1 level を時系列順にソートしたリスト

    all_df["a1-player"] = get_seq_labels(levels, 15)
    all_df = all_df.sort_index()

    df1 = all_df[:df1.shape[0]]
    df2 = all_df[df1.shape[0]:].reset_index(drop=True)
    return df1, df2

In [4]:
train, test = identify_A1(train, test)

In [None]:
train[train["a1-player"] == 1].sort_values(["period", "A1-level"])

In [None]:
test[test["a1-player"] == 1].sort_values(["period", "A1-level"])

In [None]:
train["y"].isnull().sum()

In [None]:
all_df.iloc[:train.shape[0]]

In [5]:
test

Unnamed: 0,id,period,game-ver,lobby-mode,lobby,mode,stage,A1-weapon,A1-rank,A1-level,A2-weapon,A2-rank,A2-level,A3-weapon,A3-rank,A3-level,A4-weapon,A4-rank,A4-level,B1-weapon,B1-rank,B1-level,B2-weapon,B2-rank,B2-level,B3-weapon,B3-rank,B3-level,B4-weapon,B4-rank,B4-level,y,a1-player
0,1,2019-12-17T12:00:00+00:00,5.0.1,gachi,standard,area,hakofugu,prime_collabo,x,174,herospinner_replica,x,130.0,nzap89,x,127.0,nova_becchu,x,233.0,furo,x,160,nautilus47,x,151.0,l3reelgun_d,x,213.0,nzap89,x,306.0,,34
1,2,2019-11-25T16:00:00+00:00,5.0.1,gachi,standard,asari,anchovy,prime_becchu,s+,363,nzap89,s+,59.0,dynamo_becchu,s+,36.0,rapid_becchu,s+,225.0,furo,s+,326,dualsweeper_custom,s+,289.0,prime_becchu,s+,147.0,splatroller,s+,156.0,,25
2,3,2019-10-22T08:00:00+00:00,5.0.1,gachi,standard,area,mutsugoro,furo_deco,s,116,bold,s,125.0,nzap85,s,124.0,wakaba,s,181.0,splatroller,s,76,momiji,s,232.0,nzap83,s,183.0,promodeler_pg,s,105.0,,2
3,4,2019-12-30T04:00:00+00:00,5.0.1,gachi,standard,asari,devon,prime_becchu,s+,192,splatspinner_collabo,s+,115.0,pablo,s+,171.0,dualsweeper_custom,s+,170.0,momiji,s+,90,quadhopper_black,s+,119.0,soytuber,s+,172.0,prime,s+,70.0,,83
4,5,2019-10-15T16:00:00+00:00,5.0.1,gachi,standard,yagura,anchovy,l3reelgun_d,x,267,rapid_becchu,x,223.0,heroroller_replica,x,240.0,hydra_custom,x,190.0,nzap83,x,259,sshooter_becchu,x,198.0,splatscope,x,50.0,screwslosher_becchu,x,287.0,,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28335,28336,2019-10-11T16:00:00+00:00,5.0.1,gachi,standard,yagura,battera,prime_becchu,x,67,splatroller,x,62.0,bold,x,136.0,furo,x,131.0,longblaster_custom,x,139,hissen,x,152.0,prime_becchu,x,240.0,nzap83,x,150.0,,14
28336,28337,2019-10-20T12:00:00+00:00,5.0.1,gachi,standard,yagura,anchovy,longblaster_necro,c-,25,splatroller_collabo,c-,18.0,quadhopper_black,c,19.0,bold,c-,18.0,hokusai,c-,10,bamboo14mk1,c,18.0,nzap89,c-,17.0,dualsweeper,c+,35.0,,74
28337,28338,2019-12-14T00:00:00+00:00,5.0.1,gachi,standard,yagura,zatou,furo,x,386,kugelschreiber,x,136.0,heroroller_replica,x,198.0,clashblaster_neo,x,512.0,rapid_becchu,x,62,bamboo14mk1,x,250.0,carbon_deco,x,257.0,l3reelgun_d,x,243.0,,127
28338,28339,2019-10-12T04:00:00+00:00,5.0.1,gachi,standard,asari,hokke,maneuver_becchu,s+,185,wakaba,s+,103.0,furo,s+,202.0,pablo,s+,477.0,dualsweeper_custom,s+,89,bold,s+,298.0,hokusai,s+,152.0,ochiba,s+,130.0,,83


In [7]:
sp = pd.read_csv("../../data/Raw/specialのコピー.csv", encoding="shiftjis")
sp

Unnamed: 0,special_,damage_min-special,damage_max-special,duration-special,good-special
0,マルチミサイル,30.0,150.0,10,173
1,ハイパープレッサー,2.0,2.0,7,116
2,ジェットパック,30.0,120.0,8,102
3,スーパーチャクチ,55.0,180.0,0,176
4,インクアーマー,0.0,0.0,8,132
5,ボムピッチャー,0.0,0.0,6,66
6,アメフラシ,24.0,24.0,9,118
7,イカスフィア,55.0,180.0,7,78
8,バブルランチャー,30.0,30.0,8,79
9,ナイスダマ,0.0,0.0,9,43
