In [None]:
from argparse import ArgumentParser
import os.path as op
import matplotlib.pyplot as plt
import socket
import pandas as pd
import numpy as np
import seaborn as sns
from time import sleep

In [None]:
%matplotlib inline

In [None]:
# Generate base names
class DataFinder:
    def __init__(self, basedir, subject, run):
        self.basedir = basedir
        self.projectdir = op.join(
            basedir,
            "NIMH_SFIM",
            "handwerkerd",
            "ComplexMultiEcho1",
            "Data",
        )
        self.subject = subject
        self.run = run

    def set_subject(self, subject: int):
        self.subject = subject

    def set_run(self, run: int):
        self.run = run

    def subid(self):
        return f"sub-{self.subject:02}"

    def runid(self):
        return f"run{self.run:02}"

    def regressor_dir(self):
        return op.join(
            self.projectdir,
            self.subid(),
            "Regressors",
            "RejectedComps_c75",
        )

    def mixing_dir(self):
        sd = self.subid()
        return op.join(
            self.projectdir,
            sd,
            "afniproc_orig",
            "WNW",
            f"{sd}.results",
            f"tedana_c75_r{self.run:02}"
        )

    def regressor_prefix(self):
        subject = self.subject
        run = self.run
        return op.join(
            self.regressor_dir(),
            f"{self.subid()}_r{run:02}_CombinedRejected_c75_"
        )

    def combined_metrics(self):
        return self.regressor_prefix() + "Combined_Metrics.csv"

    def combined_betas(self):
        return self.regressor_prefix() + "betas.csv"

    def combined_r2(self):
        return self.regressor_prefix() + "R2vals.csv"

    def combined_f(self):
        return self.regressor_prefix() + "Fvals.csv"

    def combined_p(self):
        return self.regressor_prefix() + "pvals.csv"

    def full_model(self):
        return self.regressor_prefix() + "FullRegressorModel.csv"

    def mixing_matrix(self):
        return op.join(
            self.mixing_dir(),
            "ica_mixing.tsv",
        )

In [None]:
if "biowulf" in socket.gethostname():
    basedir = "/data/"
else:
    basedir = "/Volumes/"

subject = 7
run = 3
component = 1

namer = DataFinder(basedir, subject, run)
Y = np.asarray(pd.read_csv(namer.mixing_matrix(), sep='\t'))
X_full = np.asarray(pd.read_csv(namer.full_model()))
betas = pd.read_csv(namer.combined_betas())

betas.drop(
    columns=[betas.columns[0], betas.columns[1]],
    axis=1,
    inplace=True,
)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ica = Y
fit = np.asarray(np.matmul(X_full[:, 2:], betas.T))

c = component

ica_ts = ica[:, c]
fit_ts = fit[:, c]
ax.plot(ica_ts, color='black')
ax.plot(fit_ts, color='red')

In [None]:
def scatter_cme(ax, fname: str, to_plot) -> None:
    df = pd.read_csv(fname)
    kappas = df["kappa"]
    rhos = df["rho"]
    varex = df["variance explained"]
    size = np.sqrt(varex) * 20
    rej_both = np.logical_and(df["Tedana Rejected"] == True, df["Regressors Rejected"] == True)
    rej_tedonly = np.logical_and(df["Tedana Rejected"] == True, df["Regressors Rejected"] == False)
    rej_regonly = np.logical_and(df["Tedana Rejected"] == False, df["Regressors Rejected"] == True)
    acc_all = np.logical_and(df["Tedana Rejected"] == False, df["Regressors Rejected"] == False)
    colors = pd.Series(data=["none" for _ in kappas])
    if to_plot[0]:
        colors[acc_all] = "green"
    if to_plot[1]:
        colors[rej_both] = "red"
    if to_plot[2]:
        colors[rej_tedonly] = "orange"
    if to_plot[3]:
        colors[rej_regonly] = "brown"
        print(rej_regonly)
    ax.scatter(kappas, rhos, s=size, c=colors)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)

subplots = [fig.add_subplot(2, 2, i + 1) for i in range(4)]
titles = ("Accepted All", "Rejected Both", "Non-Bold Only", "Motion/Phys Only")
top = [False for i in range(4)]
title_size = 24
label_size = 18
tick_size = 16
for s in (np.arange(13) + 1):
    for r in (np.arange(3) + 1):
        namer = DataFinder(basedir, s, r)
        for p in range(4):
            subplots[p].set_title(titles[p], y=0.75, size=title_size)
            top[p] = True
            scatter_cme(subplots[p], namer.combined_metrics(), top)
            top[p] = False
            subplots[p].tick_params(labelsize=tick_size)

# Set common labels
ax.set_xlabel('kappa', size=label_size)
ax.set_ylabel('rho', size=label_size)

In [None]:
# Make counts of significant fits per run
percent_signif = pd.DataFrame(columns=['Full Model','Motion Model','Phys_Freq Model','Phys_Variability Model','WM & CSF Model'],
                            index = np.arange(13*3))
reg_cat = ['Motion Model','Phys_Freq Model','Phys_Variability Model','WM & CSF Model']
idx=0
for s in (np.arange(13) + 1):
    for r in (np.arange(3) + 1):
        namer = DataFinder(basedir, s, r)
        pvals = pd.read_csv(namer.combined_p())
        numcomp = len(pvals)
        tmp_signif=pvals['Full Model']<(0.05/numcomp)
        percent_signif['Full Model'].iloc[idx] = 100*np.sum(tmp_signif)/numcomp
        for reg in reg_cat:
            percent_signif[reg].iloc[idx] = 100*np.sum((pvals[reg]<(0.05/numcomp)) * tmp_signif)/numcomp
        idx += 1
print(percent_signif)


In [None]:
fig = plt.figure(figsize=(10,7))
for regidx, reg in enumerate(percent_signif.columns):
    plt.subplot(2,3,regidx+1)
    plt.hist(percent_signif[reg])
    plt.title(reg)
plt.xlabel("% of components with signif fit to regressors")

In [None]:
fig = plt.figure(figsize=(10,7))
for regidx, reg in enumerate(percent_signif.columns):
    plt.subplot(2,3,regidx+1)
    sns.histplot(data=percent_signif, 
                    x=reg, bins=10)
    #plt.hist(percent_signif[reg])
    #plt.title(reg)
#plt.xlabel("% of components with signif fit to regressors")

In [None]:
percent_signif = pd.DataFrame(columns=['Full Model','Motion Model','Phys_Freq Model','Phys_Variability Model','WM & CSF Model'],
                            index = np.arange(13*3))
reg_cat = ['Motion Model','Phys_Freq Model','Phys_Variability Model','WM & CSF Model']
idx=0
for s in (np.arange(13) + 1):
    for r in (np.arange(3) + 1):
        namer = DataFinder(basedir, s, r)
        pvals = pd.read_csv(namer.combined_p())
        numcomp = len(pvals)
        tmp_signif=pvals['Full Model']<(0.05/numcomp)
        percent_signif['Full Model'].iloc[idx] = 100*np.sum(tmp_signif)/numcomp
        for reg in reg_cat:
            percent_signif[reg].iloc[idx] = 100*np.sum((pvals[reg]<(0.05/numcomp)) * tmp_signif)/numcomp
        idx += 1
print(percent_signif)

In [None]:
penguins = sns.load_dataset("penguins")

In [None]:
penguins