In [26]:
import numpy as np, os
import pandas as pd

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import seaborn as sns

import tensorflow as tf
from tensorflow import keras

import warnings
warnings.filterwarnings(action='ignore')

#한글설정
import matplotlib.font_manager as fm

font_dirs = ['/usr/share/fonts/truetype/nanum', ]
font_files = fm.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    fm.fontManager.addfont(font_file)
    
# 한글 출력을 위해서 폰트 옵션을 설정합니다.
# "axes.unicode_minus" : 마이너스가 깨질 것을 방지

sns.set(font="NanumBarunGothic", 
        rc={"axes.unicode_minus":False},
        style='darkgrid')

In [27]:
fi = pd.read_csv('./data/df_all_best_type.csv')
COLS = list(pd.read_csv('/project/LSH/total_data_7727.csv')['ITEMID'].sort_values().unique())
model = tf.keras.models.load_model('/project/LSH/model/(LSTM_best_4068)seed42-05-0.5029.hdf5')

# FI에 ReLU 적용

In [28]:
#relu 함수 정의
def relu(x):
    return np.maximum(0, x)

In [29]:
tmp = fi.copy()
tmp['diff'] = tmp['diff'].apply(lambda x:relu(x))
relu_fi = tmp[['feature', 'diff']].sort_values('diff', ascending=False)

In [30]:
relu_fi

Unnamed: 0,feature,diff
0,409606211,0.059180
1,74606211,0.036668
2,67434504,0.032467
3,54817525,0.029130
4,10019055302,0.027498
...,...,...
3469,78005303,0.000000
3470,17317001801,0.000000
3471,597003212,0.000000
3472,51435,0.000000


# Prepare Data

In [31]:
import random    
seed_num = 42
random.seed(seed_num)
path = '/project/LSH/'
X = np.load(path + 'x_(7727,10,4068).npy')
y = np.load(path + 'y_(7727,1).npy')

idx = list(range(len(X)))
random.shuffle(idx)


i = round(X.shape[0]*0.8)
X_train, y_train = X[idx[:i],:,:], y[idx[:i]]
X_test, y_test = X[idx[i:],:,:], y[idx[i:]]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((6182, 10, 4068), (6182,), (1545, 10, 4068), (1545,))

In [32]:
base_pred = np.mean(model.predict(X_test))
base_pred

0.63693285

# 0 to 1 - D-1~5

In [None]:
result = []
for i in tqdm(range(X_test.shape[2])):
    save_cols = X_test[:,:,i].copy()
    X_test[:,-5:,i] = 1
    pred = model.predict(X_test)
    value = np.mean(pred) - base_pred
    result.append({'feature' : str(COLS[i]), 'diff' : value})
    #값 복원
    X_test[:,:,i] = save_cols

  0%|          | 0/4068 [00:00<?, ?it/s]

In [None]:
near_df = pd.DataFrame(result)
near_df_ori = near_df.copy()
near_df.sort_values('diff', ascending=False)

## Sign(부호) 추출

In [None]:
#부호 판별하여 1, -1로 변경하는 함수
def only_sign(x):
    if x >= 0:
        return 1
    else:
        return -1

In [None]:
near_df['sign'] = near_df['diff'].apply(lambda x:only_sign(x))
near_df = near_df.sort_values('feature')

near_df['sign'].value_counts()

## 최종 Score

In [None]:
near_result = near_df.feature.to_frame()
relu_fi_near = relu_fi.sort_values('feature').reset_index(drop=True)

near_result['score'] = relu_fi_near['diff'] * near_df['sign']
near_result = near_result.sort_values('score', ascending=False).reset_index(drop=True)

#save
near_result.to_csv('./data/method2_nearscore.csv', index=False)

near_result

# 0 to 1 - D-6~10

In [None]:
result = []
for i in tqdm(range(X_test.shape[2])):
    save_cols = X_test[:,:,i].copy()
    X_test[:,:5,i] = 1
    pred = model.predict(X_test)
    value = np.mean(pred) - base_pred
    result.append({'feature' : str(COLS[i]), 'diff' : value})
    #값 복원
    X_test[:,:,i] = save_cols

In [None]:
away_df = pd.DataFrame(result)
away_df_ori = away_df.copy()
away_df.sort_values('diff', ascending=False)

## sign(부호) 추출

In [None]:
away_df['sign'] = away_df['diff'].apply(lambda x:only_sign(x))
away_df = away_df.sort_values('feature')

away_df['sign'].value_counts()

## 최종 Score

In [None]:
away_result = away_df.feature.to_frame()
relu_fi_away = relu_fi.sort_values('feature').reset_index(drop=True)

away_result['score'] = relu_fi_away['diff'] * away_df['sign']
away_result = away_result.sort_values('score', ascending=False).reset_index(drop=True)

#save
away_result.to_csv('./data/method2_awayscore.csv', index=False)

away_result

# 역전되는 Feature 탐색
원거리(away_result['score']) - 근거리(near_result['score']) 

In [None]:
near_result = near_result.sort_values('feature')
away_result = away_result.sort_values('feature')

In [None]:
final_result = near_result.feature.to_frame()
final_result['diff_score'] = away_result['score'] - near_result['score']
final_result = final_result.sort_values('diff_score', ascending=False)

In [None]:
final_result