In [None]:
import sys
import os
import matplotlib.font_manager as font_manager
import viz_sequence as viz_sequence
import torch
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import json
import tqdm
tqdm.tqdm_notebook()

In [None]:
test_shap = np.load('./shap/test_shap.npy')
test_prior_shap = np.load('./shap/test_prior_shap.npy')
val_shap = np.load('./shap/val_shap.npy')
val_prior_shap = np.load('./shap/val_prior_shap.npy')

In [None]:
# Functions from https://github.com/amtseng/fourier_attribution_priors/blob/27b95141da26f1c7d388db9046e9d06b1a7b5df9/notebooks/view_profile_predictions.ipynb

def dft(signal):
    fourier_coeffs = np.fft.fft(signal)
    fourier_freqs = 2 * np.pi * np.fft.fftfreq(signal.size)
    fourier_freqs = fourier_freqs[:int(len(fourier_freqs) / 2)]  # Only the positive frequencies
    mags = np.abs(fourier_coeffs)[:int(len(fourier_coeffs) / 2)]  # Frequency magnitudes are symmetric
    return fourier_freqs, mags


def fourier_highfreq_mags(imp_scores, freq_limit):
    """
    For an N x I x 4 array of actual importance scores, computes the sum of the
    Fourier magnitudes in high frequencies, defined by `freq_limit`. Returns an
    N-array of Fourier scores (i.e. sum of low-frequency magnitudes)
    """
    scores = []
    # Normalize
    imp_scores_sum = np.sum(np.abs(imp_scores), axis=2)  # Make into N x I
    
    for score_track in imp_scores_sum:
        freqs, mags = dft(score_track)
        freqs, mags = freqs[1:], mags[1:]  # Cut off DC
        mags = mags / np.sum(mags)  # Normalize
        scores.append(np.sum(mags[freq_limit:]))
    return np.array(scores)

def entropy(imp_scores, pseudocount=0.001):
    """
    For an N x I x 4 array of actual importance scores, computes the entropy
    of each track. Returns an N-array of entropy values.
    """
    scores = []
    # Normalize
    imp_scores_sum = np.sum(np.abs(imp_scores), axis=2)  # Make into N x I
    imp_scores_sum = imp_scores_sum + pseudocount
    imp_scores_norm = imp_scores_sum / np.sum(imp_scores_sum, axis=1, keepdims=True)
    
    return -np.sum(imp_scores_norm * np.log2(imp_scores_norm), axis=1)


def plot_global_smoothness(
    noprior_imp_fourier_scores, prior_imp_fourier_scores, noprior_imp_entropy_scores,
    prior_imp_entropy_scores, imp_type
):
    bin_num = 20
    fig, ax = plt.subplots(1, 2, figsize=(20, 6))
    all_vals = np.concatenate([noprior_imp_fourier_scores, prior_imp_fourier_scores])
    bins = np.linspace(np.min(all_vals), np.max(all_vals), bin_num)
    ax[0].hist(noprior_imp_fourier_scores, bins=bins, color="coral", label="No prior", alpha=0.7)
    ax[0].hist(prior_imp_fourier_scores, bins=bins, color="slateblue", label="With Fourier prior", alpha=0.7)
    ax[0].set_xlabel("Sum of high-frequency Fourier magnitudes")
    all_vals = np.concatenate([noprior_imp_entropy_scores, prior_imp_entropy_scores])
    bins = np.linspace(np.min(all_vals), np.max(all_vals), bin_num)
    ax[1].hist(noprior_imp_entropy_scores, bins=bins, color="coral", label="No prior", alpha=0.7)
    ax[1].hist(prior_imp_entropy_scores, bins=bins, color="slateblue", label="With Fourier prior", alpha=0.7)
    ax[1].set_xlabel("Entropy")
    ax[1].legend()
    title = "Histograms of smoothness of %s"
    title += "\n%s profile models"
    title += "\nComputed on %d randomly drawn test peaks"
    fig.suptitle(title)
    plt.subplots_adjust(top=0.80)
    plt.show()
    
    def draw_xy_line(ax):
        limits = [
        np.min([ax.get_xlim(), ax.get_ylim()]),
        np.max([ax.get_xlim(), ax.get_ylim()]),
        ]
        ax.plot(limits, limits, "--", alpha=0.5, color="black")
        ax.set_aspect("equal")
        ax.set_xlim(limits)
        ax.set_ylim(limits)
    fig, ax = plt.subplots(1, 2, figsize=(20, 6))
    ax[0].scatter(noprior_imp_fourier_scores, prior_imp_fourier_scores, color="mediumorchid", alpha=0.4)
    ax[0].set_xlabel("High frequency sum without prior")
    ax[0].set_ylabel("High frequency sum with Fourier prior")
    ax[1].scatter(noprior_imp_entropy_scores, prior_imp_entropy_scores, color="mediumorchid", alpha=0.4)
    ax[1].set_xlabel("Entropy without prior")
    ax[1].set_ylabel("Entropy with Fourier prior")
    draw_xy_line(ax[0])
    draw_xy_line(ax[1])
    title = "Pairwise comparison of %s smoothness"
    title += "\n%s profile models"
    title += "\nComputed on %d randomly drawn test peaks"
    fig.suptitle(title)
    plt.subplots_adjust(top=0.80)
    plt.show()
    
    print("High-frequency Fourier sum:")
    print("Average without priors: %f" % np.nanmean(noprior_imp_fourier_scores))
    print("Average with priors: %f" % np.nanmean(prior_imp_fourier_scores))
    print("Standard error without priors: %f" % scipy.stats.sem(noprior_imp_fourier_scores, nan_policy="omit"))
    print("Standard error with priors: %f" % scipy.stats.sem(prior_imp_fourier_scores, nan_policy="omit"))
    w, p = scipy.stats.wilcoxon(noprior_imp_fourier_scores, prior_imp_fourier_scores, alternative="greater")
    print("One-sided Wilcoxon test: w = %f, p = %f" % (w, p))
    print("Entropy:")
    print("Average without priors: %f" % np.nanmean(noprior_imp_entropy_scores))
    print("Average with priors: %f" % np.nanmean(prior_imp_entropy_scores))
    print("Standard error without priors: %f" % scipy.stats.sem(noprior_imp_entropy_scores, nan_policy="omit"))
    print("Standard error with priors: %f" % scipy.stats.sem(prior_imp_entropy_scores, nan_policy="omit"))
    w, p = scipy.stats.wilcoxon(noprior_imp_entropy_scores, prior_imp_entropy_scores, alternative="greater")
    print("One-sided Wilcoxon test: w = %f, p = %f" % (w, p))

In [None]:
## Statistical Tests and Visualizations

condition_name = "BPNet"
test_input_seqs = np.load('./shap/test_input_seqs.npy')
test_entropy = entropy(test_shap*test_input_seqs)
test_prior_entropy = entropy(test_prior_shap*test_input_seqs)
test_mags = fourier_highfreq_mags(test_shap*test_input_seqs, 200)
test_prior_mags = fourier_highfreq_mags(test_prior_shap*test_input_seqs, 200)
plot_global_smoothness(
    test_mags, test_prior_mags, test_entropy,
    test_prior_entropy, "DeepSHAP scores"
)

In [None]:
## Specific Example

test_scores = test_shap*test_input_seqs
test_scores_prior = test_prior_shap*test_input_seqs
test_prof = np.load('./shap/test_prof.npy')
x = np.arange(0,1346)
prof_padded = np.pad(test_prof[1], (173, 173))
plt.figure(figsize=(20,2))
plt.xticks(np.arange(0,1346,100))
plt.xlim((0,1346))
x = plt.plot(x,prof_padded, color='blue',)
plt.show()

In [None]:
viz_sequence.plot_weights(test_scores[1], subticks_frequency=100)

In [None]:
viz_sequence.plot_weights(test_scores_prior[1], subticks_frequency=100)

In [None]:
viz_sequence.plot_weights(test_scores[1][650:750], subticks_frequency=10)

In [None]:
viz_sequence.plot_weights(test_scores_prior[1][650:750], subticks_frequency=10)