#### Reg Compare Groups (with Arianna)
- Show the contribution of various properties (H3K27me3 target, TF, CTS) to the RNA decay rate in a linear model
- Add labels Beta0 - Beta7 to indicate which group is being shown

In [None]:
#Imports
import sys
import os
import pandas as pd
import seaborn as sns
import numpy as np
import statsmodels.api as sm
import itertools
import random
import math
from sklearn.preprocessing import PolynomialFeatures
from scipy.stats.mstats import winsorize
from collections import defaultdict

sys.path.append('../scripts')
from plot_helpers import *
from utilities import load_dataset
from matplotlib import lines
from plotting_fxns import Connector, PrettyBox

%load_ext autoreload
%autoreload 2

In [None]:
outdir = '../Figures/Reg/'
os.makedirs(outdir, exist_ok=True)

In [None]:
# Load the data
# log-transform and winsorize the deg_rates
# rate_df = load_dataset('../Figures/summary_files/INSPEcT_rates.csv', '../Figures/summary_files/brain4sU_passed.csv')
df = pd.read_csv('../Figures/Devreg/gene_cat_me3.csv', index_col='gene')
df['me3'] = df['category'] == 'updowngene'
df['log_deg'] = df['deg_rate'].apply(np.log10)
df['log_deg_wins_1'] = winsorize(df['log_deg'], (0.01, 0.01))
target_column = 'log_deg_wins_1'
predictors = ['TF', 'me3', 'CTS']

In [None]:
# Plot the log transformed and untransformed decay rates

# Untransformed
fig = plt.figure(figsize=(dfig, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
ax = sns.histplot(data=df, x='deg_rate', color = color_dict['grey'], label='decay rate\n(1 / TPM)', element='step')
ax.set_ylabel('number of genes')
ax.set_xlabel('decay rate (1 / TPM)')
# ax.set_xlabel('log'r'$_{10}$'' rate')
plt.savefig('%s.%s' % (os.path.join(outdir, 'decay_rate_untransformed'), out_fmt), dpi = out_dpi)

# Log10-transformed
fig = plt.figure(figsize=(dfig, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
ax = sns.histplot(data=df, x='log_deg_wins_1', color = color_dict['grey'], label='decay rate\n(1 / TPM)', element='step')
ax.set_ylabel('number of genes')
ax.set_xlabel('decay rate log'r'$_{10}$'' (1 / TPM)')
plt.savefig('%s.%s' % (os.path.join(outdir, 'decay_rate_transformed'), out_fmt), dpi = out_dpi)



#### Part I: Qualitative assessment of the factor combinations
1) Find all possible combinations of factors -- assign combinations to different groups
2) Plot the decay rates of all groups
3) Plot the ratio of decay rate means between groups that differ by only one factor

In [None]:
# Get all combinations of groups using PolynomialFeatures()
def feat_2_group(arr):
    '''
    Convert the featurenames_out() to arrays specifying which group they belong to.
    nfeats is the number of input features before transformation.
    It is understood that 1 = 'intercept', not group 0,0,0
    However, I will assign it to group 0,0,0 for convenience, 
    and this will map Beta0.
    '''
    n_feats = max([len(i.split(' ')) for i in arr])
    group_ids = []
    for i in arr:
        a = np.zeros(n_feats,).astype(int)
        if i == '1':
            pass
        else:
            l = [int(j.lstrip('x')) for j in i.split(' ')]
            a[l] = 1
        group_ids.append(a)
    return group_ids

X = df[predictors].values
y = df[target_column].values
poly = PolynomialFeatures(interaction_only=True, degree=len(predictors))
X_tr = poly.fit_transform(X)
feat_names = poly.get_feature_names_out()
group_ids = feat_2_group(feat_names)
# Make dataframe mapping group index -> group labels
group_df = pd.DataFrame(group_ids)
idx = list(group_df.columns)
pred_array = np.array(predictors)
group_df['tuple_label'] = group_df[idx].apply(lambda x: f'({",".join([str(i) for i in x])})', 1)
group_df['beta_label'] = group_df.index.map(lambda x: '$\\beta_{%s}$' % x)
group_df['str_label'] = group_df[idx].apply(lambda x: ':'.join(np.compress(x, pred_array)), 1)
group_df['str_label'] = group_df['str_label'].replace('', 'intercept')

In [None]:
# Get the decay rates for each group of genes
group_arr = group_df.iloc[:, 0:len(predictors)].values
deg_rates = []
for row in group_arr:
    # Get degrates for all groups
    query_str = '&'.join([f'{pred} == {v}' for (pred,v) in zip(predictors, row)])
    deg_rates.append(df.query(query_str)[target_column].values)

In [None]:
# Find all pairs, store indices
passed_pairs = defaultdict(list)
ordered_pairs = []
for i in itertools.combinations(enumerate(group_arr), 2):
    indices = [j[0] for j in i]
    comb = [j[1] for j in i]
    a = np.array(comb)
    sum = a.sum(axis=0)
    # Arrays with one difference will have one column that sums to 1.
    if np.count_nonzero(sum == 1) == 1:
        ordered_pairs.append(indices)
        diff_i = np.where(sum == 1)[0][0]
        passed_pairs[diff_i].append(indices)
        # Double check that directionality is from F -> T, so that we use 0-1 to get the effect of adding one True
        assert comb[1].sum() > comb[0].sum()

In [None]:
# Plot the decay rates of each group
fig = plt.figure(figsize=(dfig*2,dfig*1.5), constrained_layout=True)

gs = fig.add_gridspec(ncols=1, nrows=4)
ax = fig.add_subplot(gs[1:4,:])
ax = PrettyBox(data=deg_rates, fliersize=1, color=color_dict['grey'])
ax.set_ylabel('decay rate (log'r'$_{10}$'' 1 / min)')
ngenes = [len(i) for i in deg_rates]
labels = [f'{l}\n[{n}]' for (l,n) in zip(group_df['tuple_label'], ngenes)]
_ = ax.set_xticklabels(labels)
c = Connector(ax)
for i in ordered_pairs:
    c.add_connector(ax, i)
pred_str = f'({",".join(predictors)})'
ax.text(-0.5, 1, f'{pred_str} 1 = present, 0 = not present')
ax.text(-0.5, -3.78, '[num genes]', ha='right', va='top')
plt.savefig('%s.%s' % (os.path.join(outdir, 'decay_clusters_boxplot'), out_fmt), dpi = out_dpi)

In [None]:
# Mark the effect of having TF and me3 together, to put an asterix on the plot to indicate these
TF_index = np.where(np.array(predictors) == 'TF')[0][0]
me3_index = np.where(np.array(predictors) == 'me3')[0][0]
CTS_index = np.where(np.array(predictors) == 'CTS')[0][0]
for i,l in enumerate(group_arr):
    sum = np.sum([l[TF_index], l[me3_index]])
    if sum == 2 and np.sum(l) == 2:
        double_comp_index = i

In [None]:
# Get the ratio of mean decay rates between groups
# Decay rates are log10-transformed, so need to subtract
ratios = []
n = []
# 0, 1, 2
for pos in sorted(list(passed_pairs.keys())):
    pos_ratios = []
    pos_genes = []
    for pair in passed_pairs[pos]:
        i, j = pair
        # Effect of True compared to False is index 1 - index 0
        ratio = deg_rates[j].mean() - deg_rates[i].mean()
        pos_ratios.append(ratio)
        pos_genes.append(f'{ngenes[j]} / {ngenes[i]}')
        if (double_comp_index in [i, j]) and (pos != CTS_index):
            pos_genes[-1] = '*' + pos_genes[-1]
    ratios.append(pos_ratios)
    n.append(pos_genes)
ngenes_a = np.array(n)
ratio_a = np.array(ratios)
# Convert to base 2 to conform to traditional bio expression change presentation
ratio_a_log2 = ratio_a/math.log(2, 10)

In [None]:
# Plot the ratio of decay rate means between groups
# Get the range of the data and make symmetrical so that red=+ve and blue=-ve
max_val = abs(max(ratio_a_log2.min(), ratio_a_log2.max(), key=abs))
max_pos = (max_val//0.1)*0.1 + 0.1
max_neg = -max_pos
nrows = ratio_a.shape[0]
fig = plt.figure(figsize=(dfig*2, dfig*1.5))
gs = fig.add_gridspec(ncols=20, nrows=nrows, hspace=0.5)
# cbar_ax = fig.add_subplot(gs[:,-1])
cbar_ax = fig.add_subplot(gs[:,-2])
for i in range(nrows):
    # ax = fig.add_subplot(gs[i,:-1])
    ax = fig.add_subplot(gs[i,:-2])
    if i == 0:
        # ax = sns.heatmap(ratio_a[i].reshape(1,-1), vmin=-0.5, vmax=0.5, cmap='seismic', cbar_ax=cbar_ax, cbar_kws={'label':'effect of adding factor (log'r'$_{10}$'')'})
        # ax = sns.heatmap(ratio_a[i].reshape(1,-1), vmin=max_neg, vmax=max_pos, cmap='seismic', cbar_ax=cbar_ax, cbar_kws={'label':'+factor / -factor (log'r'$_{10}$'' mean / mean)'},
        ax = sns.heatmap(ratio_a_log2[i].reshape(1,-1), vmin=max_neg, vmax=max_pos, cmap='seismic', cbar_ax=cbar_ax, cbar_kws={'label':'ratio of group means (log'r'$_{2}$'' mean / mean)'},
                         annot=ngenes_a[i].reshape(1,-1), fmt='')
    else:
        ax = sns.heatmap(ratio_a_log2[i].reshape(1,-1), vmin=max_neg, vmax=max_pos, cmap='seismic', cbar=False, annot=ngenes_a[i].reshape(1,-1), fmt='')
    ax.set_yticklabels([predictors[i]])
    # We should actually put thi the other way around to make it match with the +factor/-factor
    label_a = np.array(group_df['tuple_label'])[passed_pairs[i]]
    col1 = label_a[:, 0].reshape(-1, 1)
    col2 = label_a[:, 1].reshape(-1, 1)
    label_a2 = np.hstack([col2, col1])
    xlabels = np.apply_along_axis(lambda x: ' / '.join(x), 1, label_a2)
    ax.set_xticklabels(xlabels, fontsize=6)
fig.text(0.007, 0.5, 'feature varying between groups:', rotation=90, va='center', ha='left')
fig.text(0.5, 0.9, 'n genes / n genes', ha='center', va='bottom')
# fig.text(0.5, 0.01, 'group ID (0 = false, 1 = true for: TF, me3, CTS)', ha='center', va='bottom')
plt.savefig('%s.%s' % (os.path.join(outdir, 'TrueFalse_comp'), out_fmt), dpi = out_dpi)

#### Part II: Bootstrapping to get more accurate coefficients with the given group sizes
1) Find size of smallest group (N)
2) Choose random seed
3) Sample the N from each of the groups with replacement
4) Run 10,000 times (bootstraps)

In [None]:
def run_OLS(df, target_column, predictors, interactions=False):
    '''
    Run linear regression on the data given a target column and predictors.
    Use statsmodels.ols and pass in a dataframe with either the actual predictor names
    or a betas, which need to be specified via another mapping df.
    beta_df = df with integer labelled columns corresponding to the predictors (i.e. [0,1,2]),
    1/0 in each row and a column called 'beta_label'.
    '''
    X = df[predictors].values
    y = df[target_column].values
    n_comb = 2**len(predictors)
    # Betas should map to the same combinations in group_df since these were extracted with PolynomialFeatures()
    betas = ['$\\beta_{%s}$' % i for i in range(n_comb)]
    
    if interactions:
        poly = PolynomialFeatures(interaction_only=True, degree=len(predictors))
        # When using PolynomialFeatures, it already adds a constant.
        X_tr = poly.fit_transform(X)
        Xt = pd.DataFrame(X_tr, columns=betas)
        mod = sm.OLS(y, Xt)
        res = mod.fit()
        res.summary()
    else:
        X1 = sm.add_constant(X.astype(int))
        X1 = pd.DataFrame(X1, columns=betas[0:len(predictors)+1])
        mod = sm.OLS(y, X1)
        res = mod.fit()
        res.summary()
    return res

In [None]:
# Find the linear regression coefficients and pvalues via bootstrapping
def run_bootstrap(rates, group_arr, samp_n=30, seed_num=0, shuffle=False):
    '''
    Select a random sample from each of the groups.
    rates = a list of arrays, ordered by group
    group_arr = an array of shape (num_groups, num_predictors) with 0/1 to show membership
    samp_n = number of samples to take from each group
    returns group_samp_arr = array of shape (samp_n*len(rates), num_predictors + 1)
    '''
    group_rates = []
    np.random.seed(seed_num)
    predictors_n = group_arr.shape[1]
    for j in range(len(rates)):
        a = np.random.randint(0, len(rates[j]), size=samp_n)
        samp = np.take(rates[j], a).reshape(-1,1)
        # Add in predictor columns
        arr = np.broadcast_to(group_arr[j], (samp_n, predictors_n))
        group_rates.append(np.hstack([samp, arr]))
    group_samp_arr = np.vstack(group_rates)
    if shuffle:
        np.random.shuffle(group_samp_arr)
    return group_samp_arr

samp_n = min(ngenes)
bootstrap_n = 100 #CHANGE TO 10000 FOR THE FINAL VERSION
res_d = {k:{'coeff':[], 'pvalues':[]} for k in ['minus_int', 'plus_int']}
coefficients = []
pvalues = []
for i in range(bootstrap_n):
    # random seed will be a multiple of 997 (i+1, so it starts with 997)
    # random seed the same for all groups in the same bootstrap iteration
    seed_num = 104729*(i+1) + 997*i
    group_samp_arr = run_bootstrap(deg_rates, group_arr, samp_n=samp_n, seed_num=seed_num, shuffle=True)
    # np.random.seed(seed_num)
    # Original, run outside function
    # group_rates = []
    # for j in range(len(group_arr)):
    #     # Get rates here
    #     a = np.random.randint(0, len(deg_rates[j]), size=samp_n)
    #     samp = np.take(deg_rates[j], a).reshape(-1,1)
    #     # Add in predictor columns
    #     arr = np.broadcast_to(group_arr[j], (samp_n, len(predictors)))
    #     group_rates.append(np.hstack([samp, arr]))
    # group_samp_arr = np.vstack(group_rates)
    # np.random.shuffle(group_samp_arr)

    bdf = pd.DataFrame(group_samp_arr, columns=[target_column] + predictors)
    res = run_OLS(bdf, target_column, predictors, interactions=False)
    res_int = run_OLS(bdf, target_column, predictors, interactions=True)
    res_d['minus_int']['coeff'].append(res.params)
    res_d['minus_int']['pvalues'].append(res.pvalues)
    res_d['plus_int']['coeff'].append(res_int.params)
    res_d['plus_int']['pvalues'].append(res_int.pvalues)

res_d['minus_int']['coeff'] = pd.concat(res_d['minus_int']['coeff'], axis=1).transpose()
res_d['minus_int']['pvalues'] = pd.concat(res_d['minus_int']['pvalues'], axis=1).transpose()
res_d['plus_int']['coeff'] = pd.concat(res_d['plus_int']['coeff'], axis=1).transpose()
res_d['plus_int']['pvalues'] = pd.concat(res_d['plus_int']['pvalues'], axis=1).transpose()

In [None]:
# Plot the coefficients obtained from bootstrapping
# Constrained_layout doesnt work well here. 
fig = plt.figure(figsize=(dfig*2, dfig))
gs = fig.add_gridspec(ncols=3, nrows=1, wspace=0.5)
# gs.get_subplot_params().bottom
ax1 = fig.add_subplot(gs[0])
ax1 = PrettyBox(data=res_d['minus_int']['coeff'], fliersize=1, color=color_dict['grey'])
ax2 = fig.add_subplot(gs[1:])
ax2 = PrettyBox(data=res_d['plus_int']['coeff'], fliersize=1, color=color_dict['grey'])
ax1.set_ylabel(f'bootstrap coefficients (N = {bootstrap_n})')
# ax.sharey() is not symmetrical, have to call on both otherwise it only takes range of one into account
ax1.sharey(ax2)
ax2.sharey(ax1)
ax1.text(1, 0.25, 'no interactions', va='bottom', ha='right', transform=ax1.transAxes)
ax2.text(1, 0.25, '+ interactions', va='bottom', ha='right', transform=ax2.transAxes)
ax1.axhline(y=0, linestyle='--', color=color_dict['grey'], zorder=0)
ax2.axhline(y=0, linestyle='--', color=color_dict['grey'], zorder=0)
plt.subplots_adjust(bottom=0.14, left=0.14, right=0.95)
plt.savefig('%s.%s' % (os.path.join(outdir, 'bootstrap_coeffs'), out_fmt), dpi = out_dpi)

In [None]:
# Plot the pvalues obtained from bootstrapping
# Get best min/max values for the split axes
# add extra space to min/max
def y_limits(df):
    border = (df.max().max() - df.min().min())*0.025
    beta_0_min = df[df.columns[0]].min() - border
    beta_other_min = df[df.columns[1:]].min().min() - border
    beta_0_max = df[df.columns[0]].max() + border
    beta_other_max = df[df.columns[1:]].max().max() + border
    # set anything less than 0 to 0
    beta_0_min, beta_other_min, beta_0_max, beta_other_max = list(map(lambda x: 0 if x <0 else x, 
                                                            [beta_0_min, beta_other_min, beta_0_max, beta_other_max]))
    return beta_0_min, beta_other_min, beta_0_max, beta_other_max

def diagonal_cuts(top_ax, bottom_ax, d=0.015):
    '''
    Draw diagonal lines to split the axis.
    https://stackoverflow.com/questions/63726234/how-to-draw-a-broken-y-axis-catplot-graphes-with-seaborn
    '''
    kwargs = dict(transform=top_ax.transAxes, color='k', clip_on=False)
    top_ax.plot((-d, +d), (-d, +d), **kwargs)
    kwargs.update(transform=bottom_ax.transAxes)
    bottom_ax.plot((-d, +d), (1 - d, 1 + d), **kwargs)

# Try to split the y-axis since it intercept pvalue is so much larger than others
# Constrained_layout doesnt work well here. 
fig = plt.figure(figsize=(dfig*2, dfig))
gs = fig.add_gridspec(ncols=3, nrows=2, wspace=0.5)
# gs.get_subplot_params().bottom
ax1_top = fig.add_subplot(gs[0,0])
ax1_bottom = fig.add_subplot(gs[1,0])
ax2_top = fig.add_subplot(gs[0,1:])
ax2_bottom = fig.add_subplot(gs[1,1:])
# Get best min/maxes for the axes
minus_df = -res_d['minus_int']['pvalues'].apply(np.log10)
plus_df = -res_d['plus_int']['pvalues'].apply(np.log10)
min0a, min1a, max0a, max1a = y_limits(minus_df)
min0b, min1b, max0b, max1b = y_limits(plus_df)
ymin0 = min(min0a, min0b)
ymax0 = max(max0a, max0b)
ymin1 = min(min1a, min1b)
ymax1 = max(max1a, max1b)
ax1_top.set_ylim(ymin0, ymax0)
ax2_top.set_ylim(ymin0, ymax0)
ax1_bottom.set_ylim(ymin1, ymax1)
ax2_bottom.set_ylim(ymin1, ymax1)

ax1_top = PrettyBox(data=minus_df, fliersize=1, color=color_dict['grey'], ax=ax1_top)
ax1_bottom = PrettyBox(data=minus_df, fliersize=1, color=color_dict['grey'], ax=ax1_bottom)
sns.despine(ax=ax1_top, bottom=True)
ax1_top.set_xticks([])

ax2_top = PrettyBox(data=plus_df, fliersize=1, color=color_dict['grey'], ax=ax2_top)
ax2_bottom = PrettyBox(data=plus_df, fliersize=1, color=color_dict['grey'], ax=ax2_bottom)
sns.despine(ax=ax2_top, bottom=True)
ax2_top.set_xticks([])
# the supylabel is placed too close the axis
# fig.supylabel(f'Bootstrap p-values (-log'r'$_{10}$'')', fontsize=7)
diagonal_cuts(ax1_top, ax1_bottom)
diagonal_cuts(ax2_top, ax2_bottom)

ax1_top.set_ylabel('pvalues from model')
t = ax1_top.yaxis.get_label().get_position()
display_to_ax = ax1_top.transAxes.inverted()
x_pos, y_pos = display_to_ax.transform(t)
ax1_top.set_ylabel('')
# set_label_coords() #For some reason doesnt work as expected, puts it further to the left than if using ax.text()
# ax1_top.yaxis.set_label_coords(x_pos, 0)
# va centers it
ax1_top.text(x_pos, -0.1, 'p-values from bootstraps (-log'r'$_{10}$)', transform=ax1_top.transAxes, rotation=90, va='center', ha='right')
# ma=center for multiline label
pval_co_log10 = -math.log(0.05, 10)
ax1_bottom.axhline(y=pval_co_log10, linestyle='--', color=color_dict['grey'], zorder=0)
ax2_bottom.axhline(y=pval_co_log10, linestyle='--', color=color_dict['grey'], zorder=0)
plt.subplots_adjust(bottom=0.14, left=0.14, right=0.95)
plt.savefig('%s.%s' % (os.path.join(outdir, 'bootstrap_pvals'), out_fmt), dpi = out_dpi)

In [None]:
# As a comparison, plot the coefficents obtained from all data
# Get predictor values for each group and merge with the data
id_vals = []
for j in range(len(group_arr)):
    arr = np.broadcast_to(group_arr[j], (len(deg_rates[j]), len(predictors)))
    id_vals.append(arr)
id_cols = np.vstack(id_vals)
vals = np.hstack(deg_rates).reshape(-1,1)
big_df = pd.DataFrame(np.hstack([vals, id_cols]), columns=[target_column] + predictors)
res = run_OLS(big_df, target_column, predictors, interactions=False)
res_int = run_OLS(big_df, target_column, predictors, interactions=True)

In [None]:
# Plot coefficients for all data
fig = plt.figure(figsize=(dfig, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
res_df = pd.DataFrame(res.params, columns=['val'])
res_df['model'] = 'no interactions'
res_df2 = pd.DataFrame(res_int.params, columns=['val'])
res_df2['model'] = '+ interactions'
ax = sns.barplot(data=pd.concat([res_df, res_df2]).reset_index(), x='index', y='val', hue='model', ax=ax)
ax.set_ylabel('model coefficients')
ax.set_xlabel('')
plt.savefig('%s.%s' % (os.path.join(outdir, 'alldata_coeffs'), out_fmt), dpi = out_dpi)

In [None]:
# Plot p-values for all data
fig = plt.figure(figsize=(dfig, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
res_df = pd.DataFrame(res.pvalues, columns=['val'])
res_df['model'] = 'no interactions'
res_df2 = pd.DataFrame(res_int.pvalues, columns=['val'])
res_df2['model'] = '+ interactions'
res_df3 = pd.concat([res_df, res_df2])
res_df3['pval_nlog'] = -res_df3['val'].apply(np.log10)
ax = sns.barplot(data=res_df3.reset_index(), x='index', y='pval_nlog', hue='model', ax=ax)
ax.legend().remove()
ax.set_xlabel('')
ax.set_ylabel('p-values for coefficients (-log'r'$_{10}$'')')
ax.text(0, 4, 'undefined', rotation=90, ha='center', va='bottom')
plt.savefig('%s.%s' % (os.path.join(outdir, 'alldata_pvalues'), out_fmt), dpi = out_dpi)

In [None]:
# Test the bootstrap function with different seeds and +/- shuffle
def prime_seed(i):
    seed = 104729*(i+1) + 997*i
    return seed

def zero_seed(i):
    '''Just returns 0. Doesn't really make sense for multiple iterations as always the same.'''
    return 0

def test_bootstrap(deg_rates, group_arr, samp_n=samp_n, seed_fxn=prime_seed, bootstrap_n=100, shuffle=False, interactions=False, 
                   target_column=target_column, predictors=predictors):

    res_d = {'pvalues':[], 'coeff':[]}
    for i in range(bootstrap_n):
        seed_num = seed_fxn(i)
        group_samp_arr = run_bootstrap(deg_rates, group_arr, samp_n=samp_n, seed_num=seed_num, shuffle=shuffle)
        bdf = pd.DataFrame(group_samp_arr, columns=[target_column] + predictors)
        res = run_OLS(bdf, target_column, predictors, interactions=interactions)
        res_d['pvalues'].append(res.pvalues)
        res_d['coeff'].append(res.params)
    
    final_res_d = {}
    final_res_d['coeff'] = pd.concat(res_d['coeff'], axis=1).transpose()
    final_res_d['pvalues'] = pd.concat(res_d['pvalues'], axis=1).transpose()
    return final_res_d

res_noshuff = test_bootstrap(deg_rates, group_arr, samp_n=samp_n, seed_fxn=prime_seed, shuffle=False, target_column=target_column, predictors=predictors)
res_shuff = test_bootstrap(deg_rates, group_arr, samp_n=samp_n, seed_fxn=prime_seed, shuffle=True, target_column=target_column, predictors=predictors)