# Evaluate: ROC curve

In [1]:
from sklearn.metrics import roc_curve, auc
#roc_curve?
from joblib import Parallel, delayed
from bayes_window.generative_models import generate_fake_lfp
from bayes_window import workflow, models
import numpy as np
from tqdm import tqdm
import statsmodels.api as sm
from statsmodels.formula.api import ols, mixedlm
import itertools

In [2]:
def run_condition(true_slope,method='bw_student', y='Log power'):
    df, df_monster, index_cols, _ = generate_fake_lfp(mouse_response_slope=true_slope,
                                                     n_trials=30)
    if method[:2]=='bw':
        bw=workflow.BayesWindow(df,y=y, levels=('stim', 'mouse')).fit_slopes(
            model=models.model_hier_stim_one_codition,
            dist_y=method[3:],add_data=False,)
        return bw.data_and_posterior['lower HDI'].iloc[0]
    elif method[:5]=='anova':
        df=df.groupby('mouse').mean().reset_index().rename({'Log power':'log_power'},axis=1)
        if y=='Log power':
            y='log_power'
        lm=ols(f'{y}~stim',data=df).fit()
        anova = sm.stats.anova_lm(lm, typ=2)
        return anova['PR(>F)']['stim']<0.05
    elif method=='mlm':
        df=df.rename({'Log power':'log_power'},axis=1)
        if y=='Log power':
            y='log_power'
        return mixedlm(f"{y} ~ stim", df, groups=df["mouse"]).fit().pvalues['stim']<0.05

In [None]:
true_slopes = np.hstack([np.zeros(80 ),np.linspace(.03,18,40)])

#y_score=[run_condition(true_slope) for true_slope in tqdm(true_slopes)]

    
true_slopes.shape

y_scores={}
for method, y in tqdm(list(itertools.product(['mlm','anova', 'lognormal','student','normal'], 
                                   ['Log power','Power', ]))):
    #y_scores[f'{method}, {y}']=[run_condition(true_slope,method,y) for true_slope in tqdm(true_slopes)]    
    y_scores[f'{method}, {y}']= Parallel(n_jobs=6,verbose=2)(
        delayed(run_condition)(true_slope,method,y) for true_slope in true_slopes)

  0%|          | 0/10 [00:00<?, ?it/s][Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:  1.6min
[Parallel(n_jobs=6)]: Done 120 out of 120 | elapsed:  6.4min finished
 10%|█         | 1/10 [06:26<57:54, 386.10s/it][Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:  1.6min
[Parallel(n_jobs=6)]: Done 120 out of 120 | elapsed:  6.4min finished
 20%|██        | 2/10 [12:47<51:08, 383.51s/it][Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:  1.5min
[Parallel(n_jobs=6)]: Done 120 out of 120 | elapsed:  6.2min finished
 30%|███       | 3/10 [18:58<44:04, 377.79s/it][Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:  1.5min
[Parallel(n_jobs=6)]: Done 120 out of 120 | elapsed:  6.2min finish

In [None]:
import pandas as pd
aucs={}
df=[]
for condition,y_score in y_scores.items():
    y_score=np.array(y_score)
    fpr, tpr, _ = roc_curve(true_slopes>0, y_score>0)
    roc_auc = round(auc(fpr, tpr),5)
    df.append(pd.DataFrame({'False positive rate':fpr,
                           'True positive rate': tpr,
                           'Condition':condition,
                            'AUC':roc_auc}))

df=pd.concat(df)

In [None]:
import altair as alt
alt.Chart(df).mark_line(fill=None).encode(
    x='False positive rate',
    y='True positive rate',
    color='Condition'
) | \
    alt.Chart(df).mark_bar().encode(
    x='Condition',
    y='AUC',
    color='Condition'
    )

In [None]:
from sklearn.metrics import roc_curve, auc
#roc_curve?
from joblib import Parallel, delayed
from bayes_window.generative_models import generate_fake_lfp
from bayes_window import workflow, models
import numpy as np

def run_condition(true_slope,dist_y='student', y='Log power'):
    df, df_monster, index_cols, _ = generate_fake_lfp(mouse_response_slope=true_slope,
                                                     n_trials=10)
    bw=workflow.BayesWindow(df,y=y, levels=('stim', 'mouse'))
    bw.fit_slopes(add_data=False, model=models.model_hier_stim_one_codition,dist_y=dist_y)
    return bw.data_and_posterior['lower HDI'].iloc[0]

true_slopes = np.hstack([np.zeros(80 ),np.linspace(.03,18,40)])

#y_score=[run_condition(true_slope) for true_slope in true_slopes]

    
true_slopes.shape

In [None]:
import itertools
from tqdm import tqdm
y_scores={}
for dist_y, y in tqdm(list(itertools.product(['lognormal','student','normal'], 
                                   ['Power', 'Log power']))):
    y_scores[f'{dist_y}, {y}']= Parallel(n_jobs=12,verbose=0)(
        delayed(run_condition)(true_slope,dist_y,y) for true_slope in true_slopes)


In [None]:
import pandas as pd
aucs={}
df=[]
for condition,y_score in y_scores.items():
    y_score=np.array(y_score)
    fpr, tpr, _ = roc_curve(true_slopes>0, y_score>0)
    roc_auc = round(auc(fpr, tpr),5)
    df.append(pd.DataFrame({'False positive rate':fpr,
                           'True positive rate': tpr,
                           'Condition':condition,
                            'AUC':roc_auc}))

df=pd.concat(df)

import altair as alt
alt.Chart(df).mark_line(fill=None).encode(
    x='False positive rate',
    y='True positive rate',
    color='Condition'
) | \
    alt.Chart(df).mark_bar().encode(
    x='Condition',
    y='AUC',
    color='Condition'
    )