In [None]:
import os
import re
import matplotlib
import numpy as np
import pandas as pd
import os.path as op
import seaborn as sns
import scipy.stats as stats 
import matplotlib.pyplot as plt 

In [None]:
plt.rcParams.update({
  "text.usetex": False,
  "font.family": "Helvetica",
  "font.size": 14
})

In [None]:
paths_data = op.join("/path", "to", "data")
paths_save = op.join("paths", "to", "figure06")
os.makedirs(paths_save, exist_ok = True)

In [None]:
streamline_thr = 10 # minimum number of streamlines for a connection to be considered

df = pd.read_csv(op.join(paths_data, "n_streamlines.csv"))
df = df[["participant", "dataset", "method", "tract", "n_streamlines"]]
df = df[df["dataset"].isin(["multi-shell", "single-shell"])]
df = df[df["method"].isin(["afq-original", "afq-fwe", "afq-msmt"])]
df = df[df["tract"] != "Total Recognized"]
df["hemisphere"] = [re.sub("^(Left|Right).+", "\\1", x) for x in df["tract"]]
df["tract"]      = [re.sub("^(Left|Right) (\w+)", "\\2", x) for x in df["tract"]]
df["flag"] = (df["n_streamlines"] > streamline_thr) * 1.0
df.head()

In [None]:
difference_dict = {
  "fwe-original":  [ "afq-fwe", "afq-original"],
  "msmt-original": ["afq-msmt", "afq-original"],
}

id_cols = ["participant", "dataset", "tract", "hemisphere"]
df_missing = (df.pivot(index = id_cols, columns = "method", values = "flag")
                .reset_index())

for key, methods in difference_dict.items(): # for each difference 
  source = df_missing[methods[0]].copy()
  target = df_missing[methods[1]].copy()
  df_missing[key] = source - target # calculate difference

df_missing = (df_missing[id_cols + list(difference_dict.keys())]
                .melt(id_vars = id_cols, var_name = "method", value_name = "difference")
                .groupby(id_cols + ["method"])["difference"].sum().reset_index())
df_missing = (df_missing.groupby(["participant", "dataset", "tract", "method"])["difference"]
                        .mean().reset_index())
df_missing["method"] = df_missing["method"].str.replace("-original", "")
df_missing["method"] = df_missing["method"].str.upper()
df_missing.head()

In [None]:
cmap = matplotlib.colormaps["tab20"]
cmap = cmap(np.linspace(0, 1, num = 20))

color_dict = {
  "FWE":  cmap[6], # red
  "MSMT": cmap[0]  # blue
}

for dataset, df_group in df_missing.groupby("dataset"):
  df_plot = df_group.groupby(["tract", "method"])["difference"].sum().reset_index()

  trk_order = (df_plot[df_plot["method"] == "FWE"]
                .sort_values("difference", ascending = False)["tract"].values)

  fig, ax = plt.subplots(1, 1, figsize = (8, 10), tight_layout = True)
  ax.axvline(x = 0, color = "black", linestyle = "--")
  sns.barplot(data = df_plot, x = "difference", y = "tract", hue = "method",
              palette = color_dict, order = trk_order, errorbar = "se", ax = ax)
  ax.set_title(dataset)
  ax.set_xlabel("Average Difference in Number of Participants\nwith Segmentated Tracts from Original")
  ax.set_ylabel("")
  ax.legend(title = "Method", loc = "lower right")
  ax.set_xlim(-12, 30)
  ax.margins(y = 0.01)
  plt.show()

  save_name = f"figure06_{dataset}_identified.svg"
  fig.savefig(op.join(paths_save, save_name))

In [None]:
df_average = df.groupby(["participant", "dataset", "method"])["flag"].sum().reset_index()

difference_dict = {
  "fwe-original":  [ "afq-fwe", "afq-original"],
  "msmt-original": ["afq-msmt", "afq-original"],
}

df_yield = [] # initialize
for (participant, dataset), df_group in df_average.groupby(["participant", "dataset"]):
  df_curr = { "participant": participant, "dataset": dataset } # initialize
  for key, methods in difference_dict.items(): # for each difference metric
    if np.sum(df_group["method"].isin(methods)) == 2: # if both methods exist
      source = df_group[df_group["method"] == methods[0]]["flag"].values[0]
      target = df_group[df_group["method"] == methods[1]]["flag"].values[0]
      
      # calculate percentage difference
      df_curr[key] = (source - target) / ((source + target) / 2) * 100 
      # df_curr[key] = source - target # calculate difference

  df_yield.append(df_curr) # append to list
    
df_yield = pd.DataFrame(df_yield)
df_yield.head()

In [None]:
stats_kwargs = {
  "popmean": 0, 
  "nan_policy": "omit",
}

alpha = 0.05 # significance level, pre-bonferonni
alpha_corrected = alpha / 2 # two methods

y_sig     = 0.3  # pad between sem and %diff sig marker
y_comp    = 0.3  # %diff sig marker and comparison bar
y_sigcomp = 0.15  # comparison bar and comparison sig marker

for dataset, df_group in df_yield.groupby("dataset"):
  df_plot = (df_group[["participant", "dataset", "fwe-original", "msmt-original"]]
              .melt(id_vars = ["participant", "dataset"], 
                    var_name = "method", value_name = "diff"))
  df_plot["method"] = df_plot["method"].str.replace("-original", "")
  df_plot["method"] = df_plot["method"].str.upper()

  method_order = list(np.unique(df_plot["method"]))

  fig, ax = plt.subplots(1, 1, figsize = (3, 4), tight_layout = True)
  ax.axhline(y = 0, color = "black", linestyle = "--")
  sns.barplot(data = df_plot, x = "method", y = "diff", errorbar = "se",
              palette = color_dict)
  ax.set_xlabel("")
  ax.set_ylabel("Percent Difference in\nTract Yield from Original")
  ax.set_ylim(-1., 2.0)
  ax.set_yticks(np.arange(-1.0, 2.1, 0.5))
  ax.set_title(dataset)

  print(f"# {dataset} ----------------------------------------------------") 
  for method, df_stats in df_plot.groupby("method"): # for each method
    values = df_stats["diff"].values
    results = stats.ttest_1samp(values, **stats_kwargs)

    if results.pvalue < alpha_corrected: sig_str = "*"; # bonferonni significant
    elif results.pvalue < alpha: sig_str = "+"; # significant
    else : sig_str = ""; # not significant

    if (results.pvalue < alpha_corrected) or (results.pvalue < alpha):
      y_avg = np.nanmean(values) # bar height
      y_sem = stats.sem(values, nan_policy = "omit") # sem value
      y_sem = y_sem if y_avg > 0 else -y_sem # sign adjustment for sem value
      y_adj = y_sig if y_avg > 0 else -y_sig # sign adjustment for sig. marker
      y_height = y_avg + y_sem + y_adj
      
      x_height = method_order.index(method) # x-axis position
      
      ax.text(x = x_height, y = y_height, s = sig_str, 
              color = color_dict[method], ha = "center", va = "center")

  x_diff = df_plot[df_plot["method"] == method_order[0]]["diff"].values # fwe
  y_diff = df_plot[df_plot["method"] == method_order[1]]["diff"].values # msmt 

  results = stats.wilcoxon(y_diff, x_diff, nan_policy = "omit")
  if results.pvalue < alpha_corrected: sig_str = "*"; # bonferonni significant
  elif results.pvalue < alpha: sig_str = "+"; # significant
  else : sig_str = ""; # not significant

  if (results.pvalue < alpha_corrected) or (results.pvalue < alpha):
    y_avg = [np.nanmean(x_diff), np.nanmean(y_diff)] # bar heights
    y_sem = [stats.sem(x_diff, nan_policy = "omit"),
             stats.sem(y_diff, nan_policy = "omit")]
    y_avg = [x + y if x > 0 else x - y for x, y in zip(y_avg, y_sem)]
    y_avg = np.max(y_avg) if y_avg[0] > 0 else np.min(y_avg)
    y_adj = y_sig if y_avg > 0 else -y_sig # sign adjustment
    y_height = y_avg + y_adj + y_comp # height of comparison bar
  
    ax.plot([0, 1], [y_height, y_height], "black")
    ax.text(x = 0.5, y = y_height + y_sigcomp, s = sig_str, 
            color = "black", ha = "center", va = "center")
  plt.show()

  save_name = f"figure06_{dataset}_yield.svg"
  fig.savefig(op.join(paths_save, save_name))