In [36]:
%load_ext autoreload
%matplotlib inline
%autoreload 
import matplotlib.pyplot as plt
import sklearn
import pandas as pd
import numpy as np
import librosa
from librosa.core.spectrum import _spectrogram
from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [37]:
# load data
df_resampled = pd.read_pickle("./data_for_modeling.pickle")

In [38]:
df_resampled = df_resampled[df_resampled['tp'] < (0.01 / 1000) / 3600]

In [39]:
df_copy = df_resampled.copy()
# 飛沫データのラベリング
spray_all = np.array(df_copy['SPC_flux[mm]'])
# 飛沫発生ならlabel=1, 飛沫0.0ならlabel=0
spc_label = [1 if i > 0.0 else 0 for i in spray_all]
df_copy['label'] = spc_label

In [40]:
print(len(df_copy), len(df_copy[df_copy['label'] != 0.0]), len(df_copy[df_copy['label'] != 0.0]) / len(df_copy) * 100)

944088 46491 4.9244350102956505


# 普通にlightgbm

In [124]:
all_columns = list(df_copy.columns)
all_columns.remove("label")
all_columns.remove('SPC_flux[mm]')

# 用いる特徴量
for_modeling = ['ratio', 'encounter cycle', 'swh', 'relative_wind_y', 'Speed[knot]']
#for_modeling = all_columns
for_modeling.extend([i+1 for i in range(30)])
print(for_modeling)
X = df_copy.loc[:, for_modeling]
X = np.array(X)
y = np.array(df_copy['label'])

['ratio', 'encounter cycle', 'swh', 'relative_wind_y', 'Speed[knot]', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]


In [125]:
import lightgbm as lgb
from sklearn.metrics import roc_auc_score

def imbalanced_data_split(X, y, test_size=0.2):
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=3)
    for train_index, test_index in sss.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = imbalanced_data_split(X, y, test_size=0.2)
# for validation
X_train2, X_valid, y_train2, y_valid = imbalanced_data_split(X_train, y_train, test_size=0.2)

In [126]:
lgbm_params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
}

def lgbm_train(X_train_df, X_valid_df, y_train_df, y_valid_df, lgbm_params):
    lgb_train = lgb.Dataset(X_train_df, y_train_df)
    lgb_eval = lgb.Dataset(X_valid_df, y_valid_df, reference=lgb_train)

    # 上記のパラメータでモデルを学習する
    model = lgb.train(lgbm_params, lgb_train,
                      # モデルの評価用データを渡す
                      valid_sets=lgb_eval,
                      # 最大で 1000 ラウンドまで学習する
                      num_boost_round=1000,
                      # 10 ラウンド経過しても性能が向上しないときは学習を打ち切る
                      early_stopping_rounds=10)
    
    return model

model_normal = lgbm_train(X_train2, X_valid, y_train2, y_valid, lgbm_params)

y_pred = model_normal.predict(X_test, num_iteration=model_normal.best_iteration)
y_pred_binary = []
for p in y_pred:
    if p >= 0.2:
        y_pred_binary.append(1)
    else:
        y_pred_binary.append(0)

from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score
cm = confusion_matrix(y_test, y_pred_binary)
print(cm)
print("accuracy: ", accuracy_score(y_pred_binary, y_test))
print("recall:", recall_score(y_pred_binary, y_test))
print("precision:", precision_score(y_pred_binary, y_test))
print("f1_score:", f1_score(y_pred_binary, y_test))

[1]	valid_0's binary_logloss: 0.178744
Training until validation scores don't improve for 10 rounds
[2]	valid_0's binary_logloss: 0.169778
[3]	valid_0's binary_logloss: 0.163621
[4]	valid_0's binary_logloss: 0.158414
[5]	valid_0's binary_logloss: 0.154586
[6]	valid_0's binary_logloss: 0.151064
[7]	valid_0's binary_logloss: 0.148209
[8]	valid_0's binary_logloss: 0.145998
[9]	valid_0's binary_logloss: 0.143955
[10]	valid_0's binary_logloss: 0.142164
[11]	valid_0's binary_logloss: 0.140645
[12]	valid_0's binary_logloss: 0.139189
[13]	valid_0's binary_logloss: 0.137952
[14]	valid_0's binary_logloss: 0.136812
[15]	valid_0's binary_logloss: 0.135659
[16]	valid_0's binary_logloss: 0.134627
[17]	valid_0's binary_logloss: 0.133822
[18]	valid_0's binary_logloss: 0.132994
[19]	valid_0's binary_logloss: 0.132215
[20]	valid_0's binary_logloss: 0.131353
[21]	valid_0's binary_logloss: 0.130744
[22]	valid_0's binary_logloss: 0.129933
[23]	valid_0's binary_logloss: 0.129284
[24]	valid_0's binary_loglos

[210]	valid_0's binary_logloss: 0.11529
[211]	valid_0's binary_logloss: 0.115285
[212]	valid_0's binary_logloss: 0.115193
[213]	valid_0's binary_logloss: 0.115173
[214]	valid_0's binary_logloss: 0.115123
[215]	valid_0's binary_logloss: 0.115067
[216]	valid_0's binary_logloss: 0.11504
[217]	valid_0's binary_logloss: 0.11504
[218]	valid_0's binary_logloss: 0.115001
[219]	valid_0's binary_logloss: 0.115005
[220]	valid_0's binary_logloss: 0.114982
[221]	valid_0's binary_logloss: 0.114967
[222]	valid_0's binary_logloss: 0.11498
[223]	valid_0's binary_logloss: 0.114942
[224]	valid_0's binary_logloss: 0.114927
[225]	valid_0's binary_logloss: 0.114915
[226]	valid_0's binary_logloss: 0.114921
[227]	valid_0's binary_logloss: 0.11491
[228]	valid_0's binary_logloss: 0.114898
[229]	valid_0's binary_logloss: 0.114892
[230]	valid_0's binary_logloss: 0.114878
[231]	valid_0's binary_logloss: 0.114875
[232]	valid_0's binary_logloss: 0.114866
[233]	valid_0's binary_logloss: 0.114862
[234]	valid_0's binar

[415]	valid_0's binary_logloss: 0.112564
[416]	valid_0's binary_logloss: 0.112552
[417]	valid_0's binary_logloss: 0.112552
[418]	valid_0's binary_logloss: 0.112501
[419]	valid_0's binary_logloss: 0.112499
[420]	valid_0's binary_logloss: 0.112502
[421]	valid_0's binary_logloss: 0.112485
[422]	valid_0's binary_logloss: 0.112462
[423]	valid_0's binary_logloss: 0.112459
[424]	valid_0's binary_logloss: 0.112461
[425]	valid_0's binary_logloss: 0.112451
[426]	valid_0's binary_logloss: 0.112453
[427]	valid_0's binary_logloss: 0.112452
[428]	valid_0's binary_logloss: 0.112447
[429]	valid_0's binary_logloss: 0.112455
[430]	valid_0's binary_logloss: 0.112451
[431]	valid_0's binary_logloss: 0.112444
[432]	valid_0's binary_logloss: 0.112446
[433]	valid_0's binary_logloss: 0.112444
[434]	valid_0's binary_logloss: 0.112445
[435]	valid_0's binary_logloss: 0.112405
[436]	valid_0's binary_logloss: 0.112397
[437]	valid_0's binary_logloss: 0.112382
[438]	valid_0's binary_logloss: 0.11236
[439]	valid_0's b

[623]	valid_0's binary_logloss: 0.110837
[624]	valid_0's binary_logloss: 0.110838
[625]	valid_0's binary_logloss: 0.110824
[626]	valid_0's binary_logloss: 0.110772
[627]	valid_0's binary_logloss: 0.110775
[628]	valid_0's binary_logloss: 0.110763
[629]	valid_0's binary_logloss: 0.110753
[630]	valid_0's binary_logloss: 0.110748
[631]	valid_0's binary_logloss: 0.110736
[632]	valid_0's binary_logloss: 0.110748
[633]	valid_0's binary_logloss: 0.110751
[634]	valid_0's binary_logloss: 0.110746
[635]	valid_0's binary_logloss: 0.110751
[636]	valid_0's binary_logloss: 0.110748
[637]	valid_0's binary_logloss: 0.110742
[638]	valid_0's binary_logloss: 0.11073
[639]	valid_0's binary_logloss: 0.11072
[640]	valid_0's binary_logloss: 0.110706
[641]	valid_0's binary_logloss: 0.110703
[642]	valid_0's binary_logloss: 0.11069
[643]	valid_0's binary_logloss: 0.110704
[644]	valid_0's binary_logloss: 0.110705
[645]	valid_0's binary_logloss: 0.110697
[646]	valid_0's binary_logloss: 0.110664
[647]	valid_0's bin

In [127]:
importance = pd.DataFrame(model_normal.feature_importance(), index=for_modeling, columns=['importance'])
display(importance.sort_values('importance', ascending=False))

Unnamed: 0,importance
relative_wind_y,1984
encounter cycle,1910
Speed[knot],1880
ratio,1503
swh,1370
30,762
1,714
2,595
3,527
4,465


In [96]:
X_train, X_test, y_train, y_test = imbalanced_data_split(X, y, seed1, test_size=0.3)
oss = OneSidedSelection()
X_resampled, y_resampled = oss.fit_sample(X_train, y_train)

# undersampling + bagging

In [103]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.under_sampling import OneSidedSelection
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score

all_columns = list(df_copy.columns)
all_columns.remove("label")
all_columns.remove('SPC_flux[mm]')

# 用いる特徴量
#for_modeling = ['ratio', 'encounter cycle', 'swh']
for_modeling = all_columns
#for_modeling.extend([i+1 for i in range(30)])
print(for_modeling)
X = df_copy.loc[:, for_modeling]
X = np.array(X)
y = np.array(df_copy['label'])

from datetime import datetime
date = datetime.now()
seed1 = date.minute + date.second + date.hour
print(seed1)

X_train, X_test, y_train, y_test = imbalanced_data_split(X, y, seed1, test_size=0.3)
# for validation
X_train2, X_valid, y_train2, y_valid = imbalanced_data_split(X_train, y_train, seed1, test_size=0.3)

lgbm_params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
}

def imbalanced_data_split(X, y, seed, test_size=0.2):
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=seed)
    for train_index, test_index in sss.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        return X_train, X_test, y_train, y_test

def bagging(seed):
    sampler = RandomUnderSampler(random_state=seed, replacement=True)
    X_resampled, y_resampled = sampler.fit_resample(X_train, y_train)
    X_train2, X_valid, y_train2, y_valid = imbalanced_data_split(X_resampled, y_resampled, seed, test_size=0.3)
    model_bagging = lgbm_train(X_train2, X_valid, y_train2, y_valid, lgbm_params)
    return model_bagging

# def bagging(seed):
#     oss = OneSidedSelection()
#     X_resampled, y_resampled = oss.fit_sample(X_train, y_train)
#     X_train2, X_valid, y_train2, y_valid = imbalanced_data_split(X_resampled, y_resampled, seed, test_size=0.3)
#     model_bagging = lgbm_train(X_train2, X_valid, y_train2, y_valid, lgbm_params)
#     return model_bagging

models = []

from datetime import datetime
date = datetime.now()
seed2 = date.minute + date.second + date.hour + date.year
print(seed2)

for i in range(10):
    models.append(bagging(seed2))
    
y_preds = []

for m in models:
    y_preds.append(m.predict(X_test, num_iteration=m.best_iteration))
    
y_preds_bagging = sum(y_preds)/len(y_preds)

['ピッチ (度)', 'Relative wind speed[m/s]', '相対風速 (m/s)_cos', '相対風速 (m/s)_sin', 'Course[deg]', 'Speed[knot]', 'mwp', 't2m', 'sst', 'mwd', 'tp', 'swh', 'u10', 'v10', 'relative_wind_y', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 'Relative_wind_direction[deg]', 'Relative wave direction[deg]', 'Speed[m/s]', 'encounter cycle', 'wave_length', 'ratio']
8
2028
[1]	valid_0's binary_logloss: 0.649435
Training until validation scores don't improve for 10 rounds
[2]	valid_0's binary_logloss: 0.613408
[3]	valid_0's binary_logloss: 0.584365
[4]	valid_0's binary_logloss: 0.559393
[5]	valid_0's binary_logloss: 0.538627
[6]	valid_0's binary_logloss: 0.518553
[7]	valid_0's binary_logloss: 0.500944
[8]	valid_0's binary_logloss: 0.485994
[9]	valid_0's binary_logloss: 0.471535
[10]	valid_0's binary_logloss: 0.458312
[11]	valid_0's binary_logloss: 0.447738
[12]	valid_0's binary_logloss: 0.43796
[13]	valid_0's binary_logloss: 0.429369
[14]	valid

[222]	valid_0's binary_logloss: 0.317276
[223]	valid_0's binary_logloss: 0.317235
[224]	valid_0's binary_logloss: 0.316992
[225]	valid_0's binary_logloss: 0.316957
[226]	valid_0's binary_logloss: 0.316841
[227]	valid_0's binary_logloss: 0.316746
[228]	valid_0's binary_logloss: 0.316671
[229]	valid_0's binary_logloss: 0.316588
[230]	valid_0's binary_logloss: 0.31649
[231]	valid_0's binary_logloss: 0.316425
[232]	valid_0's binary_logloss: 0.316258
[233]	valid_0's binary_logloss: 0.316222
[234]	valid_0's binary_logloss: 0.316271
[235]	valid_0's binary_logloss: 0.316175
[236]	valid_0's binary_logloss: 0.316166
[237]	valid_0's binary_logloss: 0.316222
[238]	valid_0's binary_logloss: 0.316047
[239]	valid_0's binary_logloss: 0.315846
[240]	valid_0's binary_logloss: 0.315835
[241]	valid_0's binary_logloss: 0.315832
[242]	valid_0's binary_logloss: 0.315668
[243]	valid_0's binary_logloss: 0.31562
[244]	valid_0's binary_logloss: 0.315568
[245]	valid_0's binary_logloss: 0.315396
[246]	valid_0's bi

[449]	valid_0's binary_logloss: 0.309959
[450]	valid_0's binary_logloss: 0.309892
[451]	valid_0's binary_logloss: 0.30986
[452]	valid_0's binary_logloss: 0.309815
[453]	valid_0's binary_logloss: 0.309916
[454]	valid_0's binary_logloss: 0.309904
[455]	valid_0's binary_logloss: 0.309847
[456]	valid_0's binary_logloss: 0.309822
[457]	valid_0's binary_logloss: 0.309804
[458]	valid_0's binary_logloss: 0.309861
[459]	valid_0's binary_logloss: 0.309778
[460]	valid_0's binary_logloss: 0.309769
[461]	valid_0's binary_logloss: 0.309789
[462]	valid_0's binary_logloss: 0.309738
[463]	valid_0's binary_logloss: 0.30976
[464]	valid_0's binary_logloss: 0.30969
[465]	valid_0's binary_logloss: 0.30967
[466]	valid_0's binary_logloss: 0.309606
[467]	valid_0's binary_logloss: 0.309573
[468]	valid_0's binary_logloss: 0.30954
[469]	valid_0's binary_logloss: 0.309582
[470]	valid_0's binary_logloss: 0.309548
[471]	valid_0's binary_logloss: 0.309498
[472]	valid_0's binary_logloss: 0.309458
[473]	valid_0's binar

[127]	valid_0's binary_logloss: 0.324966
[128]	valid_0's binary_logloss: 0.324906
[129]	valid_0's binary_logloss: 0.324887
[130]	valid_0's binary_logloss: 0.324605
[131]	valid_0's binary_logloss: 0.324459
[132]	valid_0's binary_logloss: 0.324389
[133]	valid_0's binary_logloss: 0.324356
[134]	valid_0's binary_logloss: 0.324321
[135]	valid_0's binary_logloss: 0.324046
[136]	valid_0's binary_logloss: 0.323794
[137]	valid_0's binary_logloss: 0.32374
[138]	valid_0's binary_logloss: 0.32361
[139]	valid_0's binary_logloss: 0.323555
[140]	valid_0's binary_logloss: 0.323518
[141]	valid_0's binary_logloss: 0.323479
[142]	valid_0's binary_logloss: 0.323351
[143]	valid_0's binary_logloss: 0.323352
[144]	valid_0's binary_logloss: 0.323297
[145]	valid_0's binary_logloss: 0.323184
[146]	valid_0's binary_logloss: 0.323087
[147]	valid_0's binary_logloss: 0.322947
[148]	valid_0's binary_logloss: 0.32299
[149]	valid_0's binary_logloss: 0.322931
[150]	valid_0's binary_logloss: 0.322497
[151]	valid_0's bin

[358]	valid_0's binary_logloss: 0.31187
[359]	valid_0's binary_logloss: 0.311847
[360]	valid_0's binary_logloss: 0.311855
[361]	valid_0's binary_logloss: 0.311849
[362]	valid_0's binary_logloss: 0.31185
[363]	valid_0's binary_logloss: 0.311764
[364]	valid_0's binary_logloss: 0.311746
[365]	valid_0's binary_logloss: 0.311745
[366]	valid_0's binary_logloss: 0.311688
[367]	valid_0's binary_logloss: 0.311648
[368]	valid_0's binary_logloss: 0.311584
[369]	valid_0's binary_logloss: 0.311608
[370]	valid_0's binary_logloss: 0.311629
[371]	valid_0's binary_logloss: 0.31167
[372]	valid_0's binary_logloss: 0.311649
[373]	valid_0's binary_logloss: 0.311559
[374]	valid_0's binary_logloss: 0.311602
[375]	valid_0's binary_logloss: 0.311636
[376]	valid_0's binary_logloss: 0.311613
[377]	valid_0's binary_logloss: 0.311561
[378]	valid_0's binary_logloss: 0.311493
[379]	valid_0's binary_logloss: 0.311505
[380]	valid_0's binary_logloss: 0.311499
[381]	valid_0's binary_logloss: 0.311498
[382]	valid_0's bin

[28]	valid_0's binary_logloss: 0.360999
[29]	valid_0's binary_logloss: 0.358904
[30]	valid_0's binary_logloss: 0.356871
[31]	valid_0's binary_logloss: 0.355083
[32]	valid_0's binary_logloss: 0.353766
[33]	valid_0's binary_logloss: 0.352061
[34]	valid_0's binary_logloss: 0.350578
[35]	valid_0's binary_logloss: 0.349217
[36]	valid_0's binary_logloss: 0.348251
[37]	valid_0's binary_logloss: 0.347031
[38]	valid_0's binary_logloss: 0.346348
[39]	valid_0's binary_logloss: 0.345312
[40]	valid_0's binary_logloss: 0.344453
[41]	valid_0's binary_logloss: 0.343678
[42]	valid_0's binary_logloss: 0.343089
[43]	valid_0's binary_logloss: 0.342444
[44]	valid_0's binary_logloss: 0.341969
[45]	valid_0's binary_logloss: 0.341458
[46]	valid_0's binary_logloss: 0.340856
[47]	valid_0's binary_logloss: 0.340641
[48]	valid_0's binary_logloss: 0.339923
[49]	valid_0's binary_logloss: 0.339351
[50]	valid_0's binary_logloss: 0.338924
[51]	valid_0's binary_logloss: 0.338748
[52]	valid_0's binary_logloss: 0.338494


[236]	valid_0's binary_logloss: 0.316166
[237]	valid_0's binary_logloss: 0.316222
[238]	valid_0's binary_logloss: 0.316047
[239]	valid_0's binary_logloss: 0.315846
[240]	valid_0's binary_logloss: 0.315835
[241]	valid_0's binary_logloss: 0.315832
[242]	valid_0's binary_logloss: 0.315668
[243]	valid_0's binary_logloss: 0.31562
[244]	valid_0's binary_logloss: 0.315568
[245]	valid_0's binary_logloss: 0.315396
[246]	valid_0's binary_logloss: 0.315347
[247]	valid_0's binary_logloss: 0.315286
[248]	valid_0's binary_logloss: 0.315205
[249]	valid_0's binary_logloss: 0.315227
[250]	valid_0's binary_logloss: 0.315194
[251]	valid_0's binary_logloss: 0.315106
[252]	valid_0's binary_logloss: 0.315046
[253]	valid_0's binary_logloss: 0.315053
[254]	valid_0's binary_logloss: 0.315039
[255]	valid_0's binary_logloss: 0.314872
[256]	valid_0's binary_logloss: 0.314879
[257]	valid_0's binary_logloss: 0.314827
[258]	valid_0's binary_logloss: 0.314719
[259]	valid_0's binary_logloss: 0.314697
[260]	valid_0's b

[443]	valid_0's binary_logloss: 0.310111
[444]	valid_0's binary_logloss: 0.310144
[445]	valid_0's binary_logloss: 0.310061
[446]	valid_0's binary_logloss: 0.310131
[447]	valid_0's binary_logloss: 0.310056
[448]	valid_0's binary_logloss: 0.310018
[449]	valid_0's binary_logloss: 0.309959
[450]	valid_0's binary_logloss: 0.309892
[451]	valid_0's binary_logloss: 0.30986
[452]	valid_0's binary_logloss: 0.309815
[453]	valid_0's binary_logloss: 0.309916
[454]	valid_0's binary_logloss: 0.309904
[455]	valid_0's binary_logloss: 0.309847
[456]	valid_0's binary_logloss: 0.309822
[457]	valid_0's binary_logloss: 0.309804
[458]	valid_0's binary_logloss: 0.309861
[459]	valid_0's binary_logloss: 0.309778
[460]	valid_0's binary_logloss: 0.309769
[461]	valid_0's binary_logloss: 0.309789
[462]	valid_0's binary_logloss: 0.309738
[463]	valid_0's binary_logloss: 0.30976
[464]	valid_0's binary_logloss: 0.30969
[465]	valid_0's binary_logloss: 0.30967
[466]	valid_0's binary_logloss: 0.309606
[467]	valid_0's bina

[132]	valid_0's binary_logloss: 0.324389
[133]	valid_0's binary_logloss: 0.324356
[134]	valid_0's binary_logloss: 0.324321
[135]	valid_0's binary_logloss: 0.324046
[136]	valid_0's binary_logloss: 0.323794
[137]	valid_0's binary_logloss: 0.32374
[138]	valid_0's binary_logloss: 0.32361
[139]	valid_0's binary_logloss: 0.323555
[140]	valid_0's binary_logloss: 0.323518
[141]	valid_0's binary_logloss: 0.323479
[142]	valid_0's binary_logloss: 0.323351
[143]	valid_0's binary_logloss: 0.323352
[144]	valid_0's binary_logloss: 0.323297
[145]	valid_0's binary_logloss: 0.323184
[146]	valid_0's binary_logloss: 0.323087
[147]	valid_0's binary_logloss: 0.322947
[148]	valid_0's binary_logloss: 0.32299
[149]	valid_0's binary_logloss: 0.322931
[150]	valid_0's binary_logloss: 0.322497
[151]	valid_0's binary_logloss: 0.322317
[152]	valid_0's binary_logloss: 0.322251
[153]	valid_0's binary_logloss: 0.322048
[154]	valid_0's binary_logloss: 0.321888
[155]	valid_0's binary_logloss: 0.321744
[156]	valid_0's bin

[365]	valid_0's binary_logloss: 0.311745
[366]	valid_0's binary_logloss: 0.311688
[367]	valid_0's binary_logloss: 0.311648
[368]	valid_0's binary_logloss: 0.311584
[369]	valid_0's binary_logloss: 0.311608
[370]	valid_0's binary_logloss: 0.311629
[371]	valid_0's binary_logloss: 0.31167
[372]	valid_0's binary_logloss: 0.311649
[373]	valid_0's binary_logloss: 0.311559
[374]	valid_0's binary_logloss: 0.311602
[375]	valid_0's binary_logloss: 0.311636
[376]	valid_0's binary_logloss: 0.311613
[377]	valid_0's binary_logloss: 0.311561
[378]	valid_0's binary_logloss: 0.311493
[379]	valid_0's binary_logloss: 0.311505
[380]	valid_0's binary_logloss: 0.311499
[381]	valid_0's binary_logloss: 0.311498
[382]	valid_0's binary_logloss: 0.311555
[383]	valid_0's binary_logloss: 0.311545
[384]	valid_0's binary_logloss: 0.311449
[385]	valid_0's binary_logloss: 0.311437
[386]	valid_0's binary_logloss: 0.311518
[387]	valid_0's binary_logloss: 0.311454
[388]	valid_0's binary_logloss: 0.311435
[389]	valid_0's b

[37]	valid_0's binary_logloss: 0.347031
[38]	valid_0's binary_logloss: 0.346348
[39]	valid_0's binary_logloss: 0.345312
[40]	valid_0's binary_logloss: 0.344453
[41]	valid_0's binary_logloss: 0.343678
[42]	valid_0's binary_logloss: 0.343089
[43]	valid_0's binary_logloss: 0.342444
[44]	valid_0's binary_logloss: 0.341969
[45]	valid_0's binary_logloss: 0.341458
[46]	valid_0's binary_logloss: 0.340856
[47]	valid_0's binary_logloss: 0.340641
[48]	valid_0's binary_logloss: 0.339923
[49]	valid_0's binary_logloss: 0.339351
[50]	valid_0's binary_logloss: 0.338924
[51]	valid_0's binary_logloss: 0.338748
[52]	valid_0's binary_logloss: 0.338494
[53]	valid_0's binary_logloss: 0.338313
[54]	valid_0's binary_logloss: 0.337806
[55]	valid_0's binary_logloss: 0.337626
[56]	valid_0's binary_logloss: 0.337286
[57]	valid_0's binary_logloss: 0.337179
[58]	valid_0's binary_logloss: 0.336928
[59]	valid_0's binary_logloss: 0.336818
[60]	valid_0's binary_logloss: 0.336753
[61]	valid_0's binary_logloss: 0.336173


[261]	valid_0's binary_logloss: 0.314611
[262]	valid_0's binary_logloss: 0.314475
[263]	valid_0's binary_logloss: 0.314499
[264]	valid_0's binary_logloss: 0.314381
[265]	valid_0's binary_logloss: 0.31435
[266]	valid_0's binary_logloss: 0.314297
[267]	valid_0's binary_logloss: 0.314257
[268]	valid_0's binary_logloss: 0.314294
[269]	valid_0's binary_logloss: 0.314307
[270]	valid_0's binary_logloss: 0.314235
[271]	valid_0's binary_logloss: 0.314272
[272]	valid_0's binary_logloss: 0.314279
[273]	valid_0's binary_logloss: 0.314278
[274]	valid_0's binary_logloss: 0.314315
[275]	valid_0's binary_logloss: 0.314258
[276]	valid_0's binary_logloss: 0.314182
[277]	valid_0's binary_logloss: 0.31413
[278]	valid_0's binary_logloss: 0.314185
[279]	valid_0's binary_logloss: 0.314169
[280]	valid_0's binary_logloss: 0.314148
[281]	valid_0's binary_logloss: 0.314023
[282]	valid_0's binary_logloss: 0.314106
[283]	valid_0's binary_logloss: 0.314034
[284]	valid_0's binary_logloss: 0.313846
[285]	valid_0's bi

[478]	valid_0's binary_logloss: 0.309352
[479]	valid_0's binary_logloss: 0.309333
[480]	valid_0's binary_logloss: 0.309268
[481]	valid_0's binary_logloss: 0.309315
[482]	valid_0's binary_logloss: 0.309252
[483]	valid_0's binary_logloss: 0.309216
[484]	valid_0's binary_logloss: 0.309183
[485]	valid_0's binary_logloss: 0.309185
[486]	valid_0's binary_logloss: 0.309027
[487]	valid_0's binary_logloss: 0.309033
[488]	valid_0's binary_logloss: 0.30899
[489]	valid_0's binary_logloss: 0.308926
[490]	valid_0's binary_logloss: 0.308939
[491]	valid_0's binary_logloss: 0.308757
[492]	valid_0's binary_logloss: 0.308825
[493]	valid_0's binary_logloss: 0.308817
[494]	valid_0's binary_logloss: 0.308788
[495]	valid_0's binary_logloss: 0.308809
[496]	valid_0's binary_logloss: 0.308849
[497]	valid_0's binary_logloss: 0.308809
[498]	valid_0's binary_logloss: 0.308829
[499]	valid_0's binary_logloss: 0.308789
[500]	valid_0's binary_logloss: 0.308666
[501]	valid_0's binary_logloss: 0.308595
[502]	valid_0's b

[129]	valid_0's binary_logloss: 0.324887
[130]	valid_0's binary_logloss: 0.324605
[131]	valid_0's binary_logloss: 0.324459
[132]	valid_0's binary_logloss: 0.324389
[133]	valid_0's binary_logloss: 0.324356
[134]	valid_0's binary_logloss: 0.324321
[135]	valid_0's binary_logloss: 0.324046
[136]	valid_0's binary_logloss: 0.323794
[137]	valid_0's binary_logloss: 0.32374
[138]	valid_0's binary_logloss: 0.32361
[139]	valid_0's binary_logloss: 0.323555
[140]	valid_0's binary_logloss: 0.323518
[141]	valid_0's binary_logloss: 0.323479
[142]	valid_0's binary_logloss: 0.323351
[143]	valid_0's binary_logloss: 0.323352
[144]	valid_0's binary_logloss: 0.323297
[145]	valid_0's binary_logloss: 0.323184
[146]	valid_0's binary_logloss: 0.323087
[147]	valid_0's binary_logloss: 0.322947
[148]	valid_0's binary_logloss: 0.32299
[149]	valid_0's binary_logloss: 0.322931
[150]	valid_0's binary_logloss: 0.322497
[151]	valid_0's binary_logloss: 0.322317
[152]	valid_0's binary_logloss: 0.322251
[153]	valid_0's bin

[345]	valid_0's binary_logloss: 0.312216
[346]	valid_0's binary_logloss: 0.312177
[347]	valid_0's binary_logloss: 0.312206
[348]	valid_0's binary_logloss: 0.312139
[349]	valid_0's binary_logloss: 0.312107
[350]	valid_0's binary_logloss: 0.312054
[351]	valid_0's binary_logloss: 0.312005
[352]	valid_0's binary_logloss: 0.311959
[353]	valid_0's binary_logloss: 0.311973
[354]	valid_0's binary_logloss: 0.311971
[355]	valid_0's binary_logloss: 0.311954
[356]	valid_0's binary_logloss: 0.311998
[357]	valid_0's binary_logloss: 0.311941
[358]	valid_0's binary_logloss: 0.31187
[359]	valid_0's binary_logloss: 0.311847
[360]	valid_0's binary_logloss: 0.311855
[361]	valid_0's binary_logloss: 0.311849
[362]	valid_0's binary_logloss: 0.31185
[363]	valid_0's binary_logloss: 0.311764
[364]	valid_0's binary_logloss: 0.311746
[365]	valid_0's binary_logloss: 0.311745
[366]	valid_0's binary_logloss: 0.311688
[367]	valid_0's binary_logloss: 0.311648
[368]	valid_0's binary_logloss: 0.311584
[369]	valid_0's bi

[1]	valid_0's binary_logloss: 0.649435
Training until validation scores don't improve for 10 rounds
[2]	valid_0's binary_logloss: 0.613408
[3]	valid_0's binary_logloss: 0.584365
[4]	valid_0's binary_logloss: 0.559393
[5]	valid_0's binary_logloss: 0.538627
[6]	valid_0's binary_logloss: 0.518553
[7]	valid_0's binary_logloss: 0.500944
[8]	valid_0's binary_logloss: 0.485994
[9]	valid_0's binary_logloss: 0.471535
[10]	valid_0's binary_logloss: 0.458312
[11]	valid_0's binary_logloss: 0.447738
[12]	valid_0's binary_logloss: 0.43796
[13]	valid_0's binary_logloss: 0.429369
[14]	valid_0's binary_logloss: 0.420761
[15]	valid_0's binary_logloss: 0.413411
[16]	valid_0's binary_logloss: 0.407384
[17]	valid_0's binary_logloss: 0.401435
[18]	valid_0's binary_logloss: 0.395525
[19]	valid_0's binary_logloss: 0.390051
[20]	valid_0's binary_logloss: 0.385447
[21]	valid_0's binary_logloss: 0.381366
[22]	valid_0's binary_logloss: 0.377836
[23]	valid_0's binary_logloss: 0.374753
[24]	valid_0's binary_logloss

[217]	valid_0's binary_logloss: 0.317302
[218]	valid_0's binary_logloss: 0.317351
[219]	valid_0's binary_logloss: 0.317335
[220]	valid_0's binary_logloss: 0.317218
[221]	valid_0's binary_logloss: 0.317245
[222]	valid_0's binary_logloss: 0.317276
[223]	valid_0's binary_logloss: 0.317235
[224]	valid_0's binary_logloss: 0.316992
[225]	valid_0's binary_logloss: 0.316957
[226]	valid_0's binary_logloss: 0.316841
[227]	valid_0's binary_logloss: 0.316746
[228]	valid_0's binary_logloss: 0.316671
[229]	valid_0's binary_logloss: 0.316588
[230]	valid_0's binary_logloss: 0.31649
[231]	valid_0's binary_logloss: 0.316425
[232]	valid_0's binary_logloss: 0.316258
[233]	valid_0's binary_logloss: 0.316222
[234]	valid_0's binary_logloss: 0.316271
[235]	valid_0's binary_logloss: 0.316175
[236]	valid_0's binary_logloss: 0.316166
[237]	valid_0's binary_logloss: 0.316222
[238]	valid_0's binary_logloss: 0.316047
[239]	valid_0's binary_logloss: 0.315846
[240]	valid_0's binary_logloss: 0.315835
[241]	valid_0's b

[429]	valid_0's binary_logloss: 0.31019
[430]	valid_0's binary_logloss: 0.310156
[431]	valid_0's binary_logloss: 0.310113
[432]	valid_0's binary_logloss: 0.310166
[433]	valid_0's binary_logloss: 0.310176
[434]	valid_0's binary_logloss: 0.310093
[435]	valid_0's binary_logloss: 0.310082
[436]	valid_0's binary_logloss: 0.310077
[437]	valid_0's binary_logloss: 0.310068
[438]	valid_0's binary_logloss: 0.310097
[439]	valid_0's binary_logloss: 0.310047
[440]	valid_0's binary_logloss: 0.310063
[441]	valid_0's binary_logloss: 0.310084
[442]	valid_0's binary_logloss: 0.310083
[443]	valid_0's binary_logloss: 0.310111
[444]	valid_0's binary_logloss: 0.310144
[445]	valid_0's binary_logloss: 0.310061
[446]	valid_0's binary_logloss: 0.310131
[447]	valid_0's binary_logloss: 0.310056
[448]	valid_0's binary_logloss: 0.310018
[449]	valid_0's binary_logloss: 0.309959
[450]	valid_0's binary_logloss: 0.309892
[451]	valid_0's binary_logloss: 0.30986
[452]	valid_0's binary_logloss: 0.309815
[453]	valid_0's bi

[84]	valid_0's binary_logloss: 0.330868
[85]	valid_0's binary_logloss: 0.330622
[86]	valid_0's binary_logloss: 0.330501
[87]	valid_0's binary_logloss: 0.330406
[88]	valid_0's binary_logloss: 0.330069
[89]	valid_0's binary_logloss: 0.329904
[90]	valid_0's binary_logloss: 0.329725
[91]	valid_0's binary_logloss: 0.329629
[92]	valid_0's binary_logloss: 0.329547
[93]	valid_0's binary_logloss: 0.329502
[94]	valid_0's binary_logloss: 0.329362
[95]	valid_0's binary_logloss: 0.329335
[96]	valid_0's binary_logloss: 0.329319
[97]	valid_0's binary_logloss: 0.329123
[98]	valid_0's binary_logloss: 0.328708
[99]	valid_0's binary_logloss: 0.328618
[100]	valid_0's binary_logloss: 0.328515
[101]	valid_0's binary_logloss: 0.328521
[102]	valid_0's binary_logloss: 0.328446
[103]	valid_0's binary_logloss: 0.328475
[104]	valid_0's binary_logloss: 0.328225
[105]	valid_0's binary_logloss: 0.328071
[106]	valid_0's binary_logloss: 0.327688
[107]	valid_0's binary_logloss: 0.327641
[108]	valid_0's binary_logloss: 

[293]	valid_0's binary_logloss: 0.313795
[294]	valid_0's binary_logloss: 0.313738
[295]	valid_0's binary_logloss: 0.313768
[296]	valid_0's binary_logloss: 0.313778
[297]	valid_0's binary_logloss: 0.313865
[298]	valid_0's binary_logloss: 0.313738
[299]	valid_0's binary_logloss: 0.313735
[300]	valid_0's binary_logloss: 0.313705
[301]	valid_0's binary_logloss: 0.313774
[302]	valid_0's binary_logloss: 0.31373
[303]	valid_0's binary_logloss: 0.313702
[304]	valid_0's binary_logloss: 0.313643
[305]	valid_0's binary_logloss: 0.313635
[306]	valid_0's binary_logloss: 0.313581
[307]	valid_0's binary_logloss: 0.313453
[308]	valid_0's binary_logloss: 0.313445
[309]	valid_0's binary_logloss: 0.313489
[310]	valid_0's binary_logloss: 0.313524
[311]	valid_0's binary_logloss: 0.313389
[312]	valid_0's binary_logloss: 0.313342
[313]	valid_0's binary_logloss: 0.313376
[314]	valid_0's binary_logloss: 0.313339
[315]	valid_0's binary_logloss: 0.313329
[316]	valid_0's binary_logloss: 0.313302
[317]	valid_0's b

[522]	valid_0's binary_logloss: 0.307777
[523]	valid_0's binary_logloss: 0.30773
[524]	valid_0's binary_logloss: 0.307656
[525]	valid_0's binary_logloss: 0.3076
[526]	valid_0's binary_logloss: 0.307588
[527]	valid_0's binary_logloss: 0.30747
[528]	valid_0's binary_logloss: 0.307443
[529]	valid_0's binary_logloss: 0.307393
[530]	valid_0's binary_logloss: 0.307376
[531]	valid_0's binary_logloss: 0.307354
[532]	valid_0's binary_logloss: 0.307323
[533]	valid_0's binary_logloss: 0.307351
[534]	valid_0's binary_logloss: 0.307341
[535]	valid_0's binary_logloss: 0.307376
[536]	valid_0's binary_logloss: 0.307374
[537]	valid_0's binary_logloss: 0.30726
[538]	valid_0's binary_logloss: 0.307223
[539]	valid_0's binary_logloss: 0.307201
[540]	valid_0's binary_logloss: 0.307155
[541]	valid_0's binary_logloss: 0.307072
[542]	valid_0's binary_logloss: 0.307048
[543]	valid_0's binary_logloss: 0.306993
[544]	valid_0's binary_logloss: 0.306969
[545]	valid_0's binary_logloss: 0.307021
[546]	valid_0's binar

[175]	valid_0's binary_logloss: 0.320327
[176]	valid_0's binary_logloss: 0.320196
[177]	valid_0's binary_logloss: 0.32011
[178]	valid_0's binary_logloss: 0.320067
[179]	valid_0's binary_logloss: 0.319838
[180]	valid_0's binary_logloss: 0.319715
[181]	valid_0's binary_logloss: 0.31961
[182]	valid_0's binary_logloss: 0.319506
[183]	valid_0's binary_logloss: 0.319442
[184]	valid_0's binary_logloss: 0.319419
[185]	valid_0's binary_logloss: 0.319289
[186]	valid_0's binary_logloss: 0.319216
[187]	valid_0's binary_logloss: 0.319224
[188]	valid_0's binary_logloss: 0.319145
[189]	valid_0's binary_logloss: 0.319102
[190]	valid_0's binary_logloss: 0.319073
[191]	valid_0's binary_logloss: 0.319008
[192]	valid_0's binary_logloss: 0.318756
[193]	valid_0's binary_logloss: 0.318614
[194]	valid_0's binary_logloss: 0.318639
[195]	valid_0's binary_logloss: 0.318616
[196]	valid_0's binary_logloss: 0.318632
[197]	valid_0's binary_logloss: 0.318617
[198]	valid_0's binary_logloss: 0.318573
[199]	valid_0's bi

[396]	valid_0's binary_logloss: 0.311235
[397]	valid_0's binary_logloss: 0.311218
[398]	valid_0's binary_logloss: 0.311177
[399]	valid_0's binary_logloss: 0.31114
[400]	valid_0's binary_logloss: 0.311076
[401]	valid_0's binary_logloss: 0.310995
[402]	valid_0's binary_logloss: 0.311005
[403]	valid_0's binary_logloss: 0.31098
[404]	valid_0's binary_logloss: 0.310961
[405]	valid_0's binary_logloss: 0.310917
[406]	valid_0's binary_logloss: 0.310801
[407]	valid_0's binary_logloss: 0.310728
[408]	valid_0's binary_logloss: 0.310712
[409]	valid_0's binary_logloss: 0.310744
[410]	valid_0's binary_logloss: 0.310651
[411]	valid_0's binary_logloss: 0.310449
[412]	valid_0's binary_logloss: 0.310482
[413]	valid_0's binary_logloss: 0.310397
[414]	valid_0's binary_logloss: 0.310418
[415]	valid_0's binary_logloss: 0.310409
[416]	valid_0's binary_logloss: 0.310368
[417]	valid_0's binary_logloss: 0.310357
[418]	valid_0's binary_logloss: 0.310282
[419]	valid_0's binary_logloss: 0.310283
[420]	valid_0's bi

[47]	valid_0's binary_logloss: 0.340641
[48]	valid_0's binary_logloss: 0.339923
[49]	valid_0's binary_logloss: 0.339351
[50]	valid_0's binary_logloss: 0.338924
[51]	valid_0's binary_logloss: 0.338748
[52]	valid_0's binary_logloss: 0.338494
[53]	valid_0's binary_logloss: 0.338313
[54]	valid_0's binary_logloss: 0.337806
[55]	valid_0's binary_logloss: 0.337626
[56]	valid_0's binary_logloss: 0.337286
[57]	valid_0's binary_logloss: 0.337179
[58]	valid_0's binary_logloss: 0.336928
[59]	valid_0's binary_logloss: 0.336818
[60]	valid_0's binary_logloss: 0.336753
[61]	valid_0's binary_logloss: 0.336173
[62]	valid_0's binary_logloss: 0.335533
[63]	valid_0's binary_logloss: 0.335156
[64]	valid_0's binary_logloss: 0.335047
[65]	valid_0's binary_logloss: 0.334364
[66]	valid_0's binary_logloss: 0.334129
[67]	valid_0's binary_logloss: 0.334043
[68]	valid_0's binary_logloss: 0.333924
[69]	valid_0's binary_logloss: 0.333736
[70]	valid_0's binary_logloss: 0.333227
[71]	valid_0's binary_logloss: 0.333132


[268]	valid_0's binary_logloss: 0.314294
[269]	valid_0's binary_logloss: 0.314307
[270]	valid_0's binary_logloss: 0.314235
[271]	valid_0's binary_logloss: 0.314272
[272]	valid_0's binary_logloss: 0.314279
[273]	valid_0's binary_logloss: 0.314278
[274]	valid_0's binary_logloss: 0.314315
[275]	valid_0's binary_logloss: 0.314258
[276]	valid_0's binary_logloss: 0.314182
[277]	valid_0's binary_logloss: 0.31413
[278]	valid_0's binary_logloss: 0.314185
[279]	valid_0's binary_logloss: 0.314169
[280]	valid_0's binary_logloss: 0.314148
[281]	valid_0's binary_logloss: 0.314023
[282]	valid_0's binary_logloss: 0.314106
[283]	valid_0's binary_logloss: 0.314034
[284]	valid_0's binary_logloss: 0.313846
[285]	valid_0's binary_logloss: 0.313855
[286]	valid_0's binary_logloss: 0.313827
[287]	valid_0's binary_logloss: 0.31382
[288]	valid_0's binary_logloss: 0.313877
[289]	valid_0's binary_logloss: 0.313918
[290]	valid_0's binary_logloss: 0.313908
[291]	valid_0's binary_logloss: 0.313838
[292]	valid_0's bi

[471]	valid_0's binary_logloss: 0.309498
[472]	valid_0's binary_logloss: 0.309458
[473]	valid_0's binary_logloss: 0.309465
[474]	valid_0's binary_logloss: 0.309374
[475]	valid_0's binary_logloss: 0.309374
[476]	valid_0's binary_logloss: 0.309395
[477]	valid_0's binary_logloss: 0.309346
[478]	valid_0's binary_logloss: 0.309352
[479]	valid_0's binary_logloss: 0.309333
[480]	valid_0's binary_logloss: 0.309268
[481]	valid_0's binary_logloss: 0.309315
[482]	valid_0's binary_logloss: 0.309252
[483]	valid_0's binary_logloss: 0.309216
[484]	valid_0's binary_logloss: 0.309183
[485]	valid_0's binary_logloss: 0.309185
[486]	valid_0's binary_logloss: 0.309027
[487]	valid_0's binary_logloss: 0.309033
[488]	valid_0's binary_logloss: 0.30899
[489]	valid_0's binary_logloss: 0.308926
[490]	valid_0's binary_logloss: 0.308939
[491]	valid_0's binary_logloss: 0.308757
[492]	valid_0's binary_logloss: 0.308825
[493]	valid_0's binary_logloss: 0.308817
[494]	valid_0's binary_logloss: 0.308788
[495]	valid_0's b

In [104]:
f_score = []
recall = []
precision = []
accuracy = []
th_list = []
ans = 0
for j in range(100):
    th = 0.01 + 0.01 * j
    th_list.append(th)
    y_pred_binary = []
    for p in y_preds_bagging:
        if p >= th:
            y_pred_binary.append(1)
        else:
            y_pred_binary.append(0)
            
            
    accuracy.append(accuracy_score(y_pred_binary, y_test))
    f_score.append(f1_score(y_pred_binary, y_test))
    recall.append(recall_score(y_pred_binary, y_test))
    precision.append(precision_score(y_pred_binary, y_test))

# plt.figure(figsize=(15, 5))
# plt.plot(th_list, presicion, label='precision')
# plt.plot(th_list, recall, label='recall')
# plt.plot(th_list, f_score, label='f1_score')
# plt.legend()
# plt.show()

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
print("One-sided selection")
print("th ", th_list[np.argmax(f_score)])
print("accuracy", accuracy[np.argmax(f_score)])
print("recall", recall[np.argmax(f_score)])
print("precision", precision[np.argmax(f_score)])
print("f_score", max(f_score))

lightgbmのみ：特徴量全部

['ratio', 'encounter cycle', 'swh', 'relative_wind_y', 'Speed[knot]', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

- accuracy:  0.9449363937760171
- recall: 0.45710047622765243
- precision: 0.6297053129705313
- f1_score: 0.5296964762292486

Random Sampling
- th  0.86
- accuracy 0.9438877649376648
- recall 0.4472721359460119
- precision 0.5916326091632609
- f_score 0.509422604991434

One-sided selection
1. th  0.27
1. accuracy 0.9503543094408372
1. recall 0.4963905775075988
1. precision 0.5620563562056357
1. f_score 0.5271865227479068

In [None]:
for j in range(20):
    th = 0.01 + 0.01 * j
    y_pred_binary = []
    for p in y_pred:
        if p >= th:
            y_pred_binary.append(1)
        else:
            y_pred_binary.append(0)

    cm = confusion_matrix(y_test, y_pred_binary)
    print(th)
    print(cm)
    print("accuracy: ", accuracy_score(y_pred_binary, y_test))
    print("recall:", recall_score(y_pred_binary, y_test))
    print("precision:", precision_score(y_pred_binary, y_test))
    print("f1_score:", f1_score(y_pred_binary, y_test))

# xgboost

In [None]:
import xgboost as xgb
train = np.array(shirase_spc.loc[:, ['relative_wind_y', 'swh']])
test = np.array(shirase_spc.loc[:, 'label'])

dtrain = xgb.DMatrix(train, label=test)
params = {'objective': 'binary:logistic', 'silent':1, 'random_state':71}
num_round = 50
model = xgb.train(params, dtrain, num_round)

fscore = model.get_score(importance_type='total_gain')
fscore = sorted([(k, v) for k , v in fscore.items()], key=lambda tpl: tpl[1], reverse=True)
print('xgboost importance')
print(fscore[:5])

In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# データ読み込み
X_train, X_test = train_test_split(train)
y_train, y_test = train_test_split(test)

# xgboostモデルの作成
clf = xgb.XGBClassifier()

# ハイパーパラメータ探索
clf_cv = GridSearchCV(clf, {'max_depth': 2, 'n_estimators': 50}, verbose=1)
clf_cv.fit(X_train, y_train)
print (clf_cv.best_params_, clf_cv.best_score_)

# 改めて最適パラメータで学習
clf = xgb.XGBClassifier(**clf_cv.best_params_)
clf.fit(X_train, y_train)

# 学習モデルの保存、読み込み
# import pickle
# pickle.dump(clf, open("model.pkl", "wb"))
# clf = pickle.load(open("model.pkl", "rb"))

# 学習モデルの評価
pred = clf.predict(X_test)
print (confusion_matrix(y_test, pred))
print (classification_report(y_test, pred))

# PCA

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import os

def plot_reduced_graph(data, label, image_name, model='tsne',
                                     random_sampling=False, random_state=0,
                                     file_path='/Users/nakamurataiki/Desktop/sprayf/dataset/cruise/jare60'):
    if random_sampling:
        data = data.sample(1000, random_state=random_state)
        label = label.sample(1000, random_state=random_state)
        
    data = np.array(data)
    label = np.array(label)
        
    if model == 'pca':
        pca = PCA(n_components=2, random_state=random_state)
        transformed = pca.fit_transform(data)
    elif model == 'tsne':
        transformed = TSNE(n_components=2, random_state=random_state).fit_transform(data)
    
    transformed_1 = transformed[label == 1]
    transformed_0 = transformed[label == 0]
    
    plt.figure(figsize=(7, 7))
    plt.scatter(transformed_0[:, 0], transformed_0[:, 1], color='black', label='0', alpha=0.1)
    plt.scatter(transformed_1[:, 0], transformed_1[:, 1], color='yellow', label='1')
    plt.title("PCA: FFT")
    plt.show()
    #plt.savefig(os.path.join(file_path, image_name+'.png'))

reduction_data = []
#reduction_data = ['swh', '速度 (kt)', '相対風速 (m/s)', 0, 1]
#reduction_data = ['Relative wind speed[m/s]', 'swh']
reduction_data.extend([i for i in range(1, 31)])
for_reduction = without_tp.loc[:, reduction_data]
label = without_tp.loc[:, 'label']
print(for_reduction.shape, label.shape)
plot_reduced_graph(for_reduction, label, 'pca_relativewind_swh', model='tsne', random_sampling=True)