# Library

In [2]:
import numpy as np, os
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

# 결과 확인을 용이하게 하기 위한 코드
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

#한글설정
import matplotlib.font_manager as fm

font_dirs = ['/usr/share/fonts/truetype/nanum', ]
font_files = fm.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    fm.fontManager.addfont(font_file)
    
# 한글 출력을 위해서 폰트 옵션을 설정합니다.
# "axes.unicode_minus" : 마이너스가 깨질 것을 방지

sns.set(font="NanumBarunGothic", 
        rc={"axes.unicode_minus":False},
        style='darkgrid')

# Data load

In [79]:
# LSH
all_df = pd.read_csv('df_all_best.csv')
all_df = all_df.sort_values(by='diff',ascending=False)

df_d1d5 = pd.read_csv('df_d1d5_best.csv')
df_d1d5 = df_d1d5.sort_values(by='diff',ascending=False)

df_d6d10 = pd.read_csv('df_d6d10_best.csv')
df_d6d10 = df_d6d10.sort_values(by='diff',ascending=False)

# Method 1 : 일관성, 방향성
$$
FI^{our} = Relu(FI^{inverse}) * sign(FI^{0->1})
$$

$$
일관성 : Relu(FI^{inverse})
$$

$$
방향성 : sign(FI^{0->1})
$$

## 일관성

In [89]:
# relu 방법 1
def relu(x):
    return np.maximum(0, x)

일관성_df = all_df[['feature','diff']]
일관성_df['diff'] = 일관성_df['diff'].apply(relu)

# # relu 방법 2
일관성_df = all_df[['feature','diff']]
일관성_df.loc[(일관성_df['diff'] <= 0),'diff'] = 0
일관성_df = 일관성_df.sort_values(by='feature')
일관성_df.index = list(range(4068))
일관성_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  일관성_df['diff'] = 일관성_df['diff'].apply(relu)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,feature,diff
0,0,0.000000
1,50803,0.002118
2,50804,0.003495
3,50805,0.001575
4,50806,0.005405
...,...,...
4063,78112001103,0.002014
4064,79511050204,0.000000
4065,87701071218,0.013409
4066,87701083336,0.000000


## 방향성

In [27]:
# 1) DATA 
import random    
# ---------------------
seed_num = 42
# ---------------------
random.seed(seed_num)

x = np.load('/project/LSH/x_(7727,10,4068).npy')
y = np.load('/project/LSH/y_(7727,1).npy')

idx = list(range(len(x)))
random.shuffle(idx)

i = round(x.shape[0]*0.8)
X_train, y_train = x[idx[:i],:,:], y[idx[:i]]
X_test, y_test = x[idx[i:],:,:], y[idx[i:]]

# 2) MODEL
from keras.models import load_model
lstm2 = load_model('./model/(LSTM_best_4068)seed42-05-0.5029.hdf5')

In [28]:
# 3)
# FEATURES
a = pd.read_csv('total_data_7727.csv')
features = list(a['ITEMID'].sort_values().unique()) # 4068개 ITEMID 

# No Change에 대한 평균예측값 (baseline)
preds = lstm2.predict(X_test)
pred_base = np.mean(preds)

# COMPUTE BASELINE
results = []
results.append({'feature':'BASELINE','pred_base':pred_base}) 

for k in tqdm(range(len(features))):

    # REVERSE ALL FEATURE K
    save_col = X_test[:,:,k].copy()
    X_test[:,:,k] = np.where(X_test[:,:,k]==0, 1, X_test[:,:,k])

    # COMPUTE BCE WITH FEATURE K REVERSED
    pred2 = lstm2.predict(X_test)
    pred2_mean = np.mean(pred2)
    
    results.append({'feature':features[k],'pred2':pred2_mean})
    X_test[:,:,k] = save_col

100%|██████████| 4068/4068 [39:59<00:00,  1.70it/s]


In [77]:
# 4) diff 구해서 sign함수에 적용

방향성_df = pd.DataFrame(results)
pred_base = 방향성_df.iloc[0,1]
방향성_df = 방향성_df.drop(index=0, columns=['pred_base'])
방향성_df['diff'] = 방향성_df['pred2']-pred_base

방향성_df.loc[(방향성_df['diff']>0),'diff'] = 1
방향성_df.loc[(방향성_df['diff']<-0),'diff'] = -1
방향성_df = 방향성_df.sort_values(ascending=True, by='feature')
# 방향성_df.to_csv('m1_방향성df.csv',index=False)

## 일관성 * 방향성

In [90]:
방향성_df = pd.read_csv('m1_방향성df.csv')
방향성_df.index = list(range(4068))

FI_our = 일관성_df.copy()
FI_our['mul'] = 일관성_df['diff'] * 방향성_df['diff']

FI_our = FI_our.sort_values(ascending=False, by='mul')
FI_our

Unnamed: 0,feature,diff,mul
2195,409606211,0.059180,0.059180
1276,74606211,0.036668,0.036668
1081,67434504,0.032467,0.032467
982,54817525,0.029130,0.029130
2744,10019055302,0.027498,0.027498
...,...,...,...
162,51200,0.016966,-0.016966
1815,182138167,0.017109,-0.017109
37,50889,0.018246,-0.018246
469,4003822,0.019987,-0.019987


In [91]:
FI_our.head(10)
FI_our.tail(10)

Unnamed: 0,feature,diff,mul
2195,409606211,0.05918,0.05918
1276,74606211,0.036668,0.036668
1081,67434504,0.032467,0.032467
982,54817525,0.02913,0.02913
2744,10019055302,0.027498,0.027498
1011,54858516,0.023033,0.023033
1306,74706811,0.021105,0.021105
845,45006701,0.020763,0.020763
392,228125,0.018433,0.018433
3820,63323016501,0.018043,0.018043


Unnamed: 0,feature,diff,mul
2662,904585461,0.013519,-0.013519
3852,63323038810,0.014429,-0.014429
1722,172572810,0.014614,-0.014614
3818,63323016101,0.016792,-0.016792
2860,25021011210,0.016798,-0.016798
162,51200,0.016966,-0.016966
1815,182138167,0.017109,-0.017109
37,50889,0.018246,-0.018246
469,4003822,0.019987,-0.019987
383,227194,0.025418,-0.025418


# Method 2 : 변동성
$$
FI^{역전} = FI^{원시간} - FI^{근시간}
$$ 

$$
FI^{근시간} = Relu(FI^{d1d5,inverse}) * sign(FI^{d1d5, 0->1})
$$
$$
FI^{원시간} = Relu(FI^{d6d10,inverse}) * sign(FI^{d6d10, 0->1})
$$

$$
FI^{our} = Relu(FI^{inverse}) * sign(FI^{0->1})
$$

## 근시간에 대한 FI^our
$$
FI^{근시간} = Relu(FI^{d1d5,inverse}) * sign(FI^{d1d5, 0->1})
$$
$$
FI^{원시간} = Relu(FI^{d6d10,inverse}) * sign(FI^{d6d10, 0->1})
$$

### 일관성

In [84]:
# 1. 일관성
m2_일관성_df1 = df_d1d5[['feature','diff']]
m2_일관성_df1.loc[(m2_일관성_df1['diff'] <= 0),'diff'] = 0
m2_일관성_df1 = m2_일관성_df1.sort_values(by='feature')
m2_일관성_df1.index = list(range(4068))
m2_일관성_df1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,feature,diff
0,0,0.000000
1,50803,0.000739
2,50804,0.003245
3,50805,0.000276
4,50806,0.004817
...,...,...
4063,78112001103,0.000263
4064,79511050204,0.000000
4065,87701071218,0.009628
4066,87701083336,0.000000


### 방향성

In [9]:
# 2. 방향성

# 1) DATA 
import random    
# ---------------------
seed_num = 42
# ---------------------
random.seed(seed_num)

x = np.load('/project/LSH/x_(7727,10,4068).npy')
y = np.load('/project/LSH/y_(7727,1).npy')

idx = list(range(len(x)))
random.shuffle(idx)

i = round(x.shape[0]*0.8)
X_train, y_train = x[idx[:i],:,:], y[idx[:i]]
X_test, y_test = x[idx[i:],:,:], y[idx[i:]]

# 2) MODEL
from keras.models import load_model
lstm2 = load_model('./model/(LSTM_best_4068)seed42-05-0.5029.hdf5')

# 3)
# FEATURES
a = pd.read_csv('total_data_7727.csv')
features = list(a['ITEMID'].sort_values().unique()) # 4068개 ITEMID 

# No Change에 대한 평균예측값 (baseline)
preds = lstm2.predict(X_test)
pred_base = np.mean(preds)

# COMPUTE BASELINE
results = []
results.append({'feature':'BASELINE','pred_base':pred_base}) 

for k in tqdm(range(len(features))):

    # REVERSE ALL FEATURE K
    save_col = X_test[:,:,k].copy()
    X_test[:,5:,k] = np.where(X_test[:,5:,k]==0, 1, X_test[:,5:,k])

    # COMPUTE BCE WITH FEATURE K REVERSED
    pred2 = lstm2.predict(X_test)
    pred2_mean = np.mean(pred2)
    
    results.append({'feature':features[k],'pred2':pred2_mean})
    X_test[:,:,k] = save_col

100%|██████████| 4068/4068 [41:25<00:00,  1.64it/s] 


In [11]:
# 4) diff 구해서 sign함수에 적용

방향성_df = pd.DataFrame(results)
pred_base = 방향성_df.iloc[0,1]
방향성_df = 방향성_df.drop(index=0, columns=['pred_base'])
방향성_df['diff'] = 방향성_df['pred2']-pred_base

방향성_df.loc[(방향성_df['diff']>0),'diff'] = 1
방향성_df.loc[(방향성_df['diff']<-0),'diff'] = -1
방향성_df = 방향성_df.sort_values(ascending=True, by='feature')
# 방향성_df.to_csv('m2_방향성df.csv',index=False)

In [98]:
# 2. 방향성
m2_방향성_df1 = pd.read_csv('m2_방향성df.csv')
m2_방향성_df1.index = list(range(4068))

### 일관성 * 방향성

In [102]:
# 3. 일관성 * 방향성
FI_our_근시간 = m2_일관성_df1.copy()
FI_our_근시간['mul'] = m2_일관성_df1['diff'] * m2_방향성_df1['diff']

FI_our_근시간 = FI_our_근시간.sort_values(ascending=False, by='mul')
FI_our_근시간

Unnamed: 0,feature,diff,mul
2195,409606211,0.044651,0.044651
1276,74606211,0.028581,0.028581
1081,67434504,0.024894,0.024894
982,54817525,0.022155,0.022155
2744,10019055302,0.021081,0.021081
...,...,...,...
1815,182138167,0.008059,-0.008059
3818,63323016101,0.008225,-0.008225
37,50889,0.008520,-0.008520
469,4003822,0.009620,-0.009620


In [103]:
FI_our_근시간.head(10)
FI_our_근시간.tail(10)

Unnamed: 0,feature,diff,mul
2195,409606211,0.044651,0.044651
1276,74606211,0.028581,0.028581
1081,67434504,0.024894,0.024894
982,54817525,0.022155,0.022155
2744,10019055302,0.021081,0.021081
1011,54858516,0.017398,0.017398
1306,74706811,0.015787,0.015787
845,45006701,0.015386,0.015386
392,228125,0.014006,0.014006
3820,63323016501,0.01384,0.01384


Unnamed: 0,feature,diff,mul
2662,904585461,0.006203,-0.006203
3852,63323038810,0.006388,-0.006388
1722,172572810,0.006786,-0.006786
162,51200,0.007664,-0.007664
2860,25021011210,0.008035,-0.008035
1815,182138167,0.008059,-0.008059
3818,63323016101,0.008225,-0.008225
37,50889,0.00852,-0.00852
469,4003822,0.00962,-0.00962
383,227194,0.011878,-0.011878


## 원시간에 대한 FI^our

### 일관성

In [104]:
# 1. 일관성
m2_일관성_df2 = df_d6d10[['feature','diff']]
m2_일관성_df2.loc[(m2_일관성_df2['diff'] <= 0),'diff'] = 0
m2_일관성_df2 = m2_일관성_df2.sort_values(by='feature')
m2_일관성_df2.index = list(range(4068))
m2_일관성_df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,feature,diff
0,0,0.000000
1,50803,0.000890
2,50804,0.000546
3,50805,0.000613
4,50806,0.000000
...,...,...
4063,78112001103,0.000454
4064,79511050204,0.000011
4065,87701071218,0.000000
4066,87701083336,0.000000
