In [1]:
import os, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import wandb

from helpers.wandb_common import get_wandb_df
from helpers.gpu_selection import auto_gpu_selection, use_gpu
from models.loader import model_loader
from data.loaders import dataset_loader
from visualization.helpers import xai_plot
from configs.defaults import Globs
use_gpu(False)
# auto_gpu_selection()
tf.random.set_seed(0)

# Train Models

In [9]:
api = wandb.Api()
df = get_wandb_df(Globs.PROJECT_NAME)

In [10]:
queries = [
    "MODEL_NAME == 'fcn' & DATASET == 'rossmann'",
    "MODEL_NAME == 'fcn' & DATASET == 'rule_based' & NUM_CLASSES == 4",
    # "MODEL_NAME == 'fcn' & DATASET == 'rule_based' & NUM_CLASSES == 5",
    "MODEL_NAME == 'fcn' & DATASET == 'variance_data' & NUM_CLASSES == 4 & label_type == 'standard'",
    # "MODEL_NAME == 'fcn' & DATASET == 'variance_data' & NUM_CLASSES == 5 & label_type == 'standard'",
]

In [11]:
model_configs = {}
for query in queries:
    df_query = df.query(query).sort_values(by='f1', ascending=False)
    if len(df_query) > 0:
        model_id = df_query.iloc[0]['id']
        run = api.run(f"oozyegen/{Globs.PROJECT_NAME}/{model_id}")
        model_configs[model_id] = run.config.copy()
print(len(model_configs))


3


In [5]:
models = {}
for model_id, config in model_configs.items():
   _, dataset = dataset_loader[config['DATASET']](config)
   model = model_loader(config)
   model.train(dataset)
   models[model_id] = model


Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.



Packing dataset
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch

In [2]:
SAVE_DIR = "models/xai/"

In [3]:
# os.makedirs(SAVE_DIR)
# json.dump(model_configs, open(os.path.join(SAVE_DIR, 'model_configs.json'), 'w'))
# for model_id, _ in model_configs.items():
#     MODEL_SAVE_PATH = os.path.join(SAVE_DIR, model_id)
#     os.makedirs(MODEL_SAVE_PATH, exist_ok=True)
#     models[model_id].save(MODEL_SAVE_PATH)

In [3]:
models = {}
model_configs = json.load(open(os.path.join(SAVE_DIR, 'model_configs.json'), 'r'))
for model_id, config in model_configs.items():
    if config['NUM_CLASSES'] == 5: # Skip 5 classes
        continue
    MODEL_SAVE_PATH = os.path.join(SAVE_DIR, model_id)
    models[model_id] = model_loader(config)
    print(MODEL_SAVE_PATH)
    print(model_id)
    models[model_id].load(MODEL_SAVE_PATH)
# Update LABELS
for model_id, config in model_configs.items():
    if config['DATASET'] == 'rossmann':  # Temporary fix
        config.update({'LABELS': ['Low','Mid', 'High']})
    elif config['DATASET'] == 'variance_data' and config['NUM_CLASSES'] == 5:
        config.update({'LABELS': ['Promo', 'Phasing', 'POS', 'Other', 'NoComm']})
    elif config['DATASET'] == 'variance_data' and config['NUM_CLASSES'] == 4:
        config.update({'LABELS': ['Promo', 'Phasing', 'POS', 'Other']})

models/xai/3ofxbbdw
3ofxbbdw
models/xai/1bqhc7b6
1bqhc7b6
models/xai/29a0rz82
29a0rz82


# Generate CAM

In [4]:
from visualization.cam import cam_graph

In [23]:
for model_id in models.keys():
    config = model_configs[model_id]
    model = models[model_id]
    print(config)
    _, dataset = dataset_loader[config['DATASET']](config)
    results = cam_graph(model.model, dataset['test_x'], dataset['test_y'], config['LABELS'], config)
    MODEL_SAVE_PATH = os.path.join(SAVE_DIR, model_id)
    xai_plot(results, config, 
        title=f"""Dataset:{config['DATASET']}, NUM CLASSES:{config['NUM_CLASSES']} <br>
        Contribution of Months Predictions for CAM""",
        SAVE_DIR=MODEL_SAVE_PATH)

{'EPOCHS': 100, 'STRIDE': 1, 'DATASET': 'rossmann', 'PADDING': 'SAME', 'CONVERTER': 'EMA', 'NUM_ROUND': 5, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': True, 'BATCH_SIZE': 128, 'DOWNSAMPLE': True, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'NUM_SERIES': 100, 'NUM_CLASSES': 3, 'TARGET_SIZE': 5, 'DROPOUT_RATE': 0.2, 'HISTORY_SIZE': 14, 'KERNEL_SIZES': [5, 7, 9], 'KEEP_WEEKDAYS': True, 'LABEL_HISTORY': 14, 'LABEL_GENERATOR': 'MULTI_CLASS_PROMO', 'LABELS': ['Low', 'Mid', 'High']}



Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.



Packing dataset


{'EPOCHS': 1000, 'LABELS': ['Promo', 'Phasing', 'POS', 'Other'], 'DATASET': 'rule_based', 'PADDING': 'SAME', 'UPSAMPLE': True, 'NUM_ROUND': 8, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': False, 'BATCH_SIZE': 26, 'DOWNSAMPLE': False, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'NUM_CLASSES': 4, 'DROPOUT_RATE': 0.5, 'HISTORY_SIZE': 13, 'KERNEL_SIZES': [5, 7, 9], 'hyper_config': "{'N_FILTERS': [16, 16, 16], 'PADDING': 'SAME', 'KERNEL_SIZES': [5, 7, 9], 'BATCH_NORM': False, 'DROPOUT_RATE': 0.5}"}


{'EPOCHS': 1000, 'labels': ['Promo', 'Phasing', 'POS', 'Other'], 'DATASET': 'variance_data', 'PADDING': 'SAME', 'UPSAMPLE': True, 'NUM_ROUND': 1, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': True, 'BATCH_SIZE': 26, 'DOWNSAMPLE': False, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'label_type': 'standard', 'NUM_CLASSES': 4, 'DROPOUT_RATE': 0.2, 'HISTORY_SIZE': 13, 'KERNEL_SIZES': [5, 7, 9], 'LABELS': ['Promo', 'Phasing', 'POS', 'Other']}


# Generate SHAP

In [5]:
from visualization.shap_explainer import get_kernel_shap_feature_importances, get_deepshap_feature_importances

In [52]:
shap_nsamples = 200
for model_id in models.keys():
    config = model_configs[model_id]
    model = models[model_id]
    print(config)
    _, dataset = dataset_loader[config['DATASET']](config)

    shap_val = get_kernel_shap_feature_importances(model.predict_proba, 
        dataset['test_x'], nsamples=shap_nsamples) 
    print(shap_val.shape)
    shap_avg = shap_val.mean(axis=1).swapaxes(0,1)
    MODEL_SAVE_PATH = os.path.join(SAVE_DIR, model_id)
    xai_plot(shap_avg, config, 
        title=f"""Dataset:{config['DATASET']}, NUM CLASSES:{config['NUM_CLASSES']} <br>
        Contribution of Months Predictions for SHAP""",
        SAVE_DIR=MODEL_SAVE_PATH, plot_name=f'SHAP_{shap_nsamples}', color_scale='RdBu')


{'EPOCHS': 100, 'STRIDE': 1, 'DATASET': 'rossmann', 'PADDING': 'SAME', 'CONVERTER': 'EMA', 'NUM_ROUND': 5, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': True, 'BATCH_SIZE': 128, 'DOWNSAMPLE': True, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'NUM_SERIES': 100, 'NUM_CLASSES': 3, 'TARGET_SIZE': 5, 'DROPOUT_RATE': 0.2, 'HISTORY_SIZE': 14, 'KERNEL_SIZES': [5, 7, 9], 'KEEP_WEEKDAYS': True, 'LABEL_HISTORY': 14, 'LABEL_GENERATOR': 'MULTI_CLASS_PROMO', 'LABELS': ['Low', 'Mid', 'High']}


Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.


Packing dataset


Using 200 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/200 [00:00<?, ?it/s]

(3, 200, 14)


Using 147 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


{'EPOCHS': 1000, 'LABELS': ['Promo', 'Phasing', 'POS', 'Other'], 'DATASET': 'rule_based', 'PADDING': 'SAME', 'UPSAMPLE': True, 'NUM_ROUND': 8, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': False, 'BATCH_SIZE': 26, 'DOWNSAMPLE': False, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'NUM_CLASSES': 4, 'DROPOUT_RATE': 0.5, 'HISTORY_SIZE': 13, 'KERNEL_SIZES': [5, 7, 9], 'hyper_config': "{'N_FILTERS': [16, 16, 16], 'PADDING': 'SAME', 'KERNEL_SIZES': [5, 7, 9], 'BATCH_NORM': False, 'DROPOUT_RATE': 0.5}"}


  0%|          | 0/147 [00:00<?, ?it/s]

(4, 147, 13)


Using 110 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


{'EPOCHS': 1000, 'labels': ['Promo', 'Phasing', 'POS', 'Other'], 'DATASET': 'variance_data', 'PADDING': 'SAME', 'UPSAMPLE': True, 'NUM_ROUND': 1, 'N_FILTERS': [16, 16, 16], 'BATCH_NORM': True, 'BATCH_SIZE': 26, 'DOWNSAMPLE': False, 'MODEL_NAME': 'fcn', 'NUM_ROUNDS': 10, 'label_type': 'standard', 'NUM_CLASSES': 4, 'DROPOUT_RATE': 0.2, 'HISTORY_SIZE': 13, 'KERNEL_SIZES': [5, 7, 9], 'LABELS': ['Promo', 'Phasing', 'POS', 'Other']}


  0%|          | 0/110 [00:00<?, ?it/s]

(4, 110, 13)
