In [1]:
import pandas as pd
import numpy as np
from model_team14 import select_features

## select imputed & transformed data
X=pd.read_csv('../data/X_data_tr.csv', index_col='date', parse_dates=True)
y=pd.read_csv('../data/y_data_tr.csv', index_col='date', parse_dates=True)
threshold=0.2
criteria=None

metadata=pd.read_csv('../data/full_info.csv')
df_feature=select_features(metadata, X, threshold, criteria=criteria)

selected_features=list(df_feature[df_feature.select==1]['variable'])

def get_data(y_type, test_year, features):
    X_train=X[features][:-(test_year*12)]
    y_train=y[y_type][:-(test_year*12)]           
    X_test=X[features][-(test_year*12):]   
    y_test=y[y_type][-(test_year*12):]

    return X_train, y_train, X_test, y_test

In [2]:
len(selected_features)

12

In [3]:
len(selected_features)

12

In [4]:
# !pip install numpy==1.23.5
# !pip install protobuf==3.19.*

In [5]:
#!pip install protobuf==3.19.0
import tensorflow as tf
from keras.utils.np_utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Recall, Precision  ##, F1Score

In [6]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)
# Timesteps will define how many Elements we have
# TIME_STEPS = 5

# X_train, y_train = create_dataset(X_train, y_train, TIME_STEPS)
# X_test, y_test = create_dataset(X_test, y_test, TIME_STEPS)

# print(X_train.shape)

In [7]:
# inputs = tf.random.normal([510, 18, 12])
# lstm = tf.keras.layers.LSTM(100)
# output=lstm(inputs)
# print(output.shape)
# input=output
# dense=tf.keras.layers.Dense(3, activation='softmax')  #
# output=dense(input)

# print(output.shape)
# # np.array(output).sum(axis=1)

In [8]:


def grid_search(trainX, trainy, testX, testy, y_type, nl=[50,100], epochs=[100,300], batch_sizes=[10], time_steps=[18,24],
                n_layer=[50,100]):
    results = []
    best_auc = 0
    best_params = None
    nc=len(trainy.unique())


    for time_step in time_steps:
        X_train, y_train = create_dataset(trainX, trainy, time_step)
        X_test, y_test = create_dataset(testX, testy, time_step)
        y_train = to_categorical(y_train, num_classes=nc)
        y_test = to_categorical(y_test, num_classes=nc)
        
        loss_type = 'categorical_crossentropy'   ##if y_type == 'y_agg' else 'binary_crossentropy'

    
        for num_epochs in epochs:
            for batch_size in batch_sizes:
                for num_layer in nl:
                    # create the model
                    model = Sequential()
                    model.add(LSTM(num_layer, input_shape=(X_train.shape[1], X_train.shape[2])))  ##input_shape=(X_train.shape[1], X_train.shape[2])
                    # model.add(Dropout(0.2))
                    #model.add(Dense(2))
                    model.add(Dense(nc, activation='softmax'))

                    model.compile(loss=loss_type, optimizer='adam', metrics=['AUC','Accuracy',
                                                                             'Recall', 
                                                                             'Precision'])
                    model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size)
                    print(model.summary())

                    # Final evaluation of the model
                    scores = model.evaluate(X_test, y_test, verbose=0)
                    #print(scores)
                    auc = scores[1]*100
                    acc = scores[2]*100
                    recall = scores[3]*100
                    precision = scores[4]*100
                    #print("AUC: %.2f%%" % (auc))

                    current_result = {'model':model,
                                      'time_steps': time_step,
                                      'num_epochs': num_epochs,
                                      'batch_size': batch_size,
                                      'hiddenlayer': num_layer,
                                      'auc': "%.2f%%" % (auc),
                                      'acc': "%.2f%%" % (acc),
                                      'recall': "%.2f%%" % (recall),
                                      'precision': "%.2f%%" % (precision)
                                    }
                    results.append(current_result)

                    if auc > best_auc:
                        best_auc = auc
                        best_params = current_result

    print("Best AUC: ", best_params)
    return results


In [9]:
# y_type = 'y_agg'
# X_train, y_train, X_test, y_test = get_data(y_type, 5, selected_features)

# # print(grid_search(X_train, y_train, X_test, y_test, y_type, nl=[50,100],  epochs=[300,500], batch_sizes=[5], time_steps=[18,24]))
# ## ,  epochs=[100,300], batch_sizes=[5,10], time_steps=[18,24]

# result_dict_agg=grid_search(X_train, y_train, X_test, y_test, y_type, nl=[50,100],  epochs=[300,500], batch_sizes=[5], time_steps=[18,24])

In [10]:
# y_type = 'y_oecd'
# X_train, y_train, X_test, y_test = get_data(y_type, 5, selected_features)

# # print(grid_search(X_train, y_train, X_test, y_test, y_type, nl=[10,20],  epochs=[5,10], batch_sizes=[5], time_steps=[18]))
# ## ,  epochs=[100,300], batch_sizes=[5,10], time_steps=[18,24]

# result_dict_oecd=grid_search(X_train, y_train, X_test, y_test, y_type, nl=[50,100],  epochs=[300,500], batch_sizes=[5], time_steps=[18,24])

In [11]:
# pd.DataFrame(result_dict_agg)

In [12]:
# pd.DataFrame(result_dict_oecd)

In [13]:

# model2=result_dict[1]['model']

# print(model, model2)

# dir(model)
# model.summary()

In [14]:
# !pip uninstall shap
# !pip install shap

In [15]:
# import pickle
# with open('../result/result_dict_agg.pkl','wb') as f:
#     pickle.dump(result_dict_agg, f)

# with open('../result/result_dict_oecd.pkl','wb') as f:
#     pickle.dump(result_dict_oecd, f)


In [16]:
# !pip uninstall numba
# #!pip install --upgrade numba

In [17]:
#import shap
# import model_team14 
# from model_team14 import *  ##select_features, plot_pca, DTW

# import os
# import pandas as pd
# import numpy as np
# import pickle
# import matplotlib as mpl
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import label_binarize
# from sklearn.metrics import confusion_matrix,accuracy_score, recall_score, precision_score, f1_score,classification_report 
# from sklearn.metrics import roc_curve, auc

# import seaborn as sns


In [18]:
# model.predict(np.array(X_test_shap))
# model.fit(np.array(X_test_shap), np.array(y_train_shap))
# model.summary()
# X_train_shap.shape, X_train.shape

In [21]:
##https://stackoverflow.com/questions/66814523/shap-deepexplainer-with-tensorflow-2-4-error?noredirect=1
## this might be helpful your debugging!!!

import shap
import pickle
# import tensorflow.compat.v1.keras.backend as K
# import tensorflow as tf
#tf.compat.v1.disable_eager_execution()
# tf version is 2.3.1
# kerase version is 2.4.0
# Shap version is 0.36

#tf.compat.v1.disable_v2_behavior()

with open ('../result/result_dict_oecd.pkl','rb') as f:
    result_dict_oecd=pickle.load(f)

model=result_dict_oecd[0]['model']

y_type = 'y_oecd'
X_train, y_train, X_test, y_test = get_data(y_type, 5, selected_features)


X_train_shap, y_train_shap = create_dataset(X_train, y_train, 18)
X_test_shap, y_test_shap = create_dataset(X_train, y_train, 18)

y_train_shap = to_categorical(y_train_shap, num_classes=2)
y_test_shap = to_categorical(y_test_shap, num_classes=2)

# X_train_shap=tf.convert_to_tensor(X_train_shap)
# X_test_shap=tf.convert_to_tensor(X_test_shap)

X_train_shap=np.array(X_train_shap)
y_train_shap=np.array(y_train_shap)
X_test_shap=np.array(X_test_shap)
y_test_shap=np.array(y_test_shap)

#type(X_train_shap)
#X_train

model.fit(X_train_shap, y_train_shap, batch_size=5, epochs=5)


X_train_shap2=tf.convert_to_tensor(X_train_shap)
X_test_shap2=tf.convert_to_tensor(X_test_shap)

#explainer=shap.DeepExplainer(model, X_train_shap)   #works!

explainer=shap.DeepExplainer(model, X_train_shap)   #works!

values=explainer.shap_values(np.array(X_test_shap))



Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-06-21 03:21:52         1822
metadata.json                                  2023-06-21 03:21:52           64
variables.h5                                   2023-06-21 03:21:52       185968


Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers\dense
......vars
.........0
.........1
...layers\lstm
......vars
...layers\lstm\cell
......vars
.........0
.........1
.........2
...metrics\auc
......vars
.........0
.........1
.........2
.........3
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...metrics\precision
......vars
.........0
.........1
...metrics\recall
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-06-21 03:21:52         1823
metadata.json                                  2023-06-21 03:21:52           64
variables.h5                                   2023-06-21 03:21:52       578800
Kera

keras is no longer supported, please use tf.keras instead.
Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.
`tf.keras.backend.set_learning_phase` is deprecated and will be removed after 2020-10-11. To update it, simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model.


AttributeError: in user code:

    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 247, in grad_graph  *
        out = self.model(shap_rAnD)
    File "C:\Users\gredi\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 378, in custom_grad
        out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefex before the lookup
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 667, in handler
        return linearity_with_excluded_handler(input_inds, explainer, op, *grads)
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 674, in linearity_with_excluded_handler
        assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 224, in _variable_inputs
        out[i] = t.name in self.between_tensors

    AttributeError: Exception encountered when calling layer 'lstm_8' (type LSTM).
    
    'TFDeep' object has no attribute 'between_tensors'
    
    Call arguments received by layer 'lstm_8' (type LSTM):
      • inputs=tf.Tensor(shape=(1020, 18, 12), dtype=float32)
      • mask=None
      • training=False
      • initial_state=None


In [None]:
pd.DataFrame(result_dict_agg)

Unnamed: 0,model,time_steps,num_epochs,batch_size,hiddenlayer,auc,acc,recall,precision
0,<keras.engine.sequential.Sequential object at ...,18,300,5,50,78.56%,59.52%,59.52%,59.52%
1,<keras.engine.sequential.Sequential object at ...,18,300,5,100,79.56%,57.14%,57.14%,57.14%
2,<keras.engine.sequential.Sequential object at ...,18,500,5,50,85.98%,78.57%,78.57%,78.57%
3,<keras.engine.sequential.Sequential object at ...,18,500,5,100,79.51%,64.29%,64.29%,64.29%
4,<keras.engine.sequential.Sequential object at ...,24,300,5,50,89.37%,75.00%,75.00%,75.00%
5,<keras.engine.sequential.Sequential object at ...,24,300,5,100,71.76%,61.11%,61.11%,61.11%
6,<keras.engine.sequential.Sequential object at ...,24,500,5,50,79.67%,61.11%,61.11%,61.11%
7,<keras.engine.sequential.Sequential object at ...,24,500,5,100,76.02%,61.11%,61.11%,61.11%




`tf.keras.backend.set_learning_phase` is deprecated and will be removed after 2020-10-11. To update it, simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model.


AttributeError: in user code:

    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 247, in grad_graph  *
        out = self.model(shap_rAnD)
    File "C:\Users\gredi\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 378, in custom_grad
        out = op_handlers[type_name](self, op, *grads) # we cut off the shap_ prefex before the lookup
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 667, in handler
        return linearity_with_excluded_handler(input_inds, explainer, op, *grads)
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 674, in linearity_with_excluded_handler
        assert not explainer._variable_inputs(op)[i], str(i) + "th input to " + op.name + " cannot vary!"
    File "C:\Users\gredi\anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py", line 224, in _variable_inputs
        out[i] = t.name in self.between_tensors

    AttributeError: Exception encountered when calling layer 'lstm' (type LSTM).
    
    'TFDeep' object has no attribute 'between_tensors'
    
    Call arguments received by layer 'lstm' (type LSTM):
      • inputs=tf.Tensor(shape=(1020, 18, 12), dtype=float32)
      • mask=None
      • training=False
      • initial_state=None
