In [None]:
from IPython.display import display, Markdown

from model_drift import settings
from model_drift.helpers import column_xs, correlate_performance, mutual_info_performance, w_avg
import pandas as pd
import os
import datetime



In [None]:
# Settings to file CSV file

window = "30D"
stride = "D"
ref_frontal_only = True
target_frontal_only = True
min_periods = 150

nonfrontal_add_date = "2015-01-01"
frontal_remove_date = "2016-06-01"

# nonfrontal_add_date = None
# frontal_remove_date = None
replacement = True
sample_size = 2000
n_samples = 20

# Diplay settings
span = 14
which = 'mean'
clip = 2
standardize_perf = False
shift_drift_to_perf = not standardize_perf

this_center = 0
this_range = clip/2
standardize_dates = (settings.PADCHEST_SPLIT_DATES[0], settings.PADCHEST_SPLIT_DATES[1])
standardize_ix = pd.date_range(*standardize_dates)


stat = []
stat.append('pval')
stat.append('distance')

if nonfrontal_add_date is None:
    job_name = f"combined_s{stride}-w{window}-min{min_periods}_frontalonly-ref{ref_frontal_only}-target{target_frontal_only}_Samp-ss{sample_size}-n{n_samples}-repl{replacement}"
else:
    job_name = f"combined-od-inject-addnfrnt{nonfrontal_add_date}-rmfrnt{frontal_remove_date}_s{stride}-w{window}-min{min_periods}_frontalonly-ref{ref_frontal_only}_Samp-ss{sample_size}-n{n_samples}-repl{replacement}"

fname = str(settings.TOP_DIR.joinpath(
    "results", "drift_csvs", job_name+'.csv'))

print(fname)


if not os.path.exists(fname):
    raise ValueError("no fn")


combined_df_o = pd.read_csv(str(fname), index_col= 0, header = [0, 1, 2, 3])
combined_df_o.index = pd.to_datetime(combined_df_o.index)

flip = column_xs(combined_df_o, include=["pval"])
combined_df_o[flip] = 1-combined_df_o[flip]


combined_df = combined_df_o.copy()


def smooth(y: pd.DataFrame):
    ys = y.ewm(span=span, ignore_na=False).mean()
    ys[y.isna()] = None
    return ys


smooth_name = f"ewm{span}"

combined_df = combined_df.swaplevel(0, -1, axis=1)[[which]].swaplevel(0, -1, axis=1).droplevel(-1, axis=1).copy()

html_dir = settings.TOP_DIR.joinpath(
    "graphs", job_name)

if not os.path.exists(html_dir):
    os.makedirs(html_dir)

stat_str = '+'.join(stat)
fn = f"{html_dir}/YEARLY-{which}_{stat_str}_stdclip{clip}_smooth-{smooth_name}_stdperf{standardize_perf}_shift{shift_drift_to_perf}.html"

print("output:", fn)


combined_df.head()

with open(fn, 'w') as f:
    print(f"""<h1>Calculation method: {which}, Stats: {stat_str}<br />
          Stride:{stride}, Window:{window}, Min sample: {min_periods}<br />
          Frontal only ref: {ref_frontal_only}, add nonfrontal: {nonfrontal_add_date}, remove frontal: {frontal_remove_date}<br />
          Sampling Sample Size: {sample_size}, num samples: {n_samples}, replacement: {replacement}<br />
          Smooth: {smooth_name}, standardize_perf: {standardize_perf}, shift_drift_to_perf: {shift_drift_to_perf}, shift center {this_center}, shift_range: {this_range}<br/ >
          Standardization Dates: {standardize_dates[0]} to {standardize_dates[1]}, <br />
          </h1>
          created: {datetime.datetime.now()}
          """, file=f)


In [None]:
splits = pd.date_range(settings.PADCHEST_SPLIT_DATES[1], combined_df.index.max(), freq='Y')

splits = [pd.date_range(a+pd.DateOffset(1), b) for a,b in zip(splits, splits[1:])]




In [None]:
def shift_to_other(this, other, this_range=None, this_center=None):
    u = other.mean()
    r = other.std()#other.max()-other.min()

    if this_range is None:
        this_range = this.std()#this.max()-this.min()

    if this_center is None:
        this_center = this.mean()
    return (this-this_center)/(this_range)*r+u


In [None]:


perf_col = column_xs(combined_df, "auroc")[0]
perf_df = combined_df[perf_col]



perf_df


In [None]:
pdf.index.weekofyear

In [None]:
nx = x.copy()

nx.freq = "W"

nx

In [None]:
import plotly.express as px
x = pd.date_range(settings.PADCHEST_SPLIT_DATES[1], combined_df.index.max())
# x = pd.date_range(combined_df.index.min(), combined_df.index.max())

pdf = perf_df.reindex(x).rename('auroc')


index = (pdf.index.month, pdf.index.day)
var_name = "dayofyear"
index = getattr(pdf.index, var_name)
pdfpivot = pd.pivot_table(pdf.to_frame(), index=index, columns=pdf.index.year,
               values='auroc', aggfunc='mean')


pdfpivot_diffs = pdfpivot.copy()
for c in pdfpivot_diffs:
    pdfpivot_diffs[c] = (pdfpivot_diffs[c]-pdfpivot_diffs[c].mean()).abs()


# pdfpivot_diffs
df = pdfpivot_diffs.rolling(4).mean().melt(ignore_index=False, var_name="year", value_name="auroc")
df = df.reset_index().rename(columns={'index':var_name})
fig = px.line(df, x=var_name, y="auroc", color="year")
fig.show()


In [None]:
pdfpivot_diffs.corr()


In [None]:
other_cols = column_xs(combined_df, exclude=['auroc', 'count'])
other_df = combined_df[other_cols]

In [None]:
extra_valids = pd.read_csv(str(settings.TOP_DIR.joinpath(
    "results", "drift_csvs", "valwithonlyfrontals-sD-w30D-min150_frontalonly-refTrue_Samp-ss2000-n20-replTrue.csv")), index_col=0, header=[0, 1, 2, 3])
extra_valids.index = pd.to_datetime(extra_valids.index)


extra_valids = extra_valids.swaplevel(0, -1, axis=1)[[which]].swaplevel(0, -1, axis=1).droplevel(-1, axis=1)


extra_perf = extra_valids[perf_col]

extra_perf


In [None]:
cxs = column_xs(other_df, include=stat)
stats = pd.concat([other_df[cxs].dropna(axis=1), extra_valids[cxs].dropna(axis=1)], axis=0).sort_index()

stats = stats.loc[standardize_ix]

print(len(stats))
stats = stats.agg(["mean", "std"])


stats.T

In [None]:

otherstd = other_df[cxs].copy()

# cannot divide by zero
std0 = stats.loc['std'] == 0
stats.loc["std", stats.loc['std'] == 0] = 1
# print(bad_cols)

# otherstd = otherstd.drop(bad_cols, axis=1)
# stats = stats.drop(bad_cols, axis=1)

otherstd = (otherstd-stats.loc['mean'])/(stats.loc["std"])

bad_cols = otherstd.columns[otherstd.isnull().max(axis=0)].tolist()

# bad_cols += column_xs(otherstd, include=["Exposure_DICOM", "XRayTubeCurrent_DICOM", "BitsStored_DICOM",
#                                          "PatientSex_DICOM", "Manufacturer_DICOM", "PixelAspectRatio_DICOM", "ExposureInuAs_DICOM", "SpatialResolution_DICOM", "RelativeXRayExposure_DICOM"])

# bad_cols += column_xs(otherstd, include=["chi2"])


print(bad_cols)
# otherstd = otherstd.drop(bad_cols, axis=1)

vae_cols = [c for c in list(otherstd) if "mu." in c[0]]
score_cols = [c for c in list(otherstd) if "activation." in c[0]]
metadata_cols = sorted(set(otherstd).difference(vae_cols).difference(score_cols))


otherstd = otherstd.clip(-1*clip, clip)

otherstd


In [None]:
x = pd.date_range(combined_df.index.min(), combined_df.index.max())


if standardize_perf:
    perf_stats = pd.concat([perf_df.loc[standardize_ix], extra_perf]).agg(["mean", "std"])
    perf_stats_std = ((perf_df-perf_stats.loc['mean'])/(perf_stats.loc["std"]))#.clip(-1*clip, clip)
    yp = perf_stats_std.reindex(x)
else:
    yp = perf_df.reindex(x)


In [None]:
all_corr_df = correlate_performance(yp.rename('auroc'), otherstd)
all_ig_df = mutual_info_performance(yp.rename('auroc'), otherstd, bins=25)


m_ = all_ig_df.to_frame().join(all_corr_df.abs().rename('abs(corr)'))
m_ = m_.join(m_.mean(axis=1).rename('mean[abs(corr),info_gain]'))
m_ = m_.assign(no_weights=1)
m_ = m_.fillna(0)
m_.sort_values(by='mean[abs(corr),info_gain]', ascending=False).head(20)


In [None]:
true_counts = combined_df_o['count'].droplevel([0, 1], axis=1)['obs']

true_counts

In [None]:
count_df = combined_df['count'].reindex(x)
count_df

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px


ignore_bad_cols = True

m = m_.copy()
if ignore_bad_cols:
    m.loc[bad_cols] = 0
    



yp = yp.reindex(x)
otherstd = otherstd.reindex(x)
counts = combined_df['count'].iloc[:, 0].reindex(x)
counts2 = true_counts.reindex(x)

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05, row_heights=[.8, .2])



fig.add_trace(go.Bar(x=x, y=(counts), showlegend=True, legendgroup="Num Samples (used)", marker={"color": "green"}, name="Num Samples (used)"
                     ), row=2, col=1)

fig.add_trace(go.Line(x=x, y=(counts), showlegend=False, legendgroup="Num Samples (used)", marker={"color": "green", "line":{"width": .01}}, name="Num Samples (used)"
                     ), row=2, col=1)

fig.add_trace(go.Bar(x=x, y=(counts2), showlegend=True, legendgroup="Num Samples (obs)", marker={"color": "orange"}, name="Num Samples (obs)"
                     ), row=2, col=1)

fig.add_trace(go.Line(x=x, y=(counts2), showlegend=False, legendgroup="Num Samples (obs)", marker={"color": "orange", "line": {"width": .01}}, name="Num Samples (obs)"
                     ), row=2, col=1)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", connectgaps=False, line={"color": "blue"}), row=1, col=1)


single_disp = dict(line=dict(dash="dot", width=1))
# print("correlation with auroc")

corrs = []

colors = px.colors.qualitative.Set1

for i, name in enumerate(m):
    weights = m[name].sort_values(ascending=False)
    # weights = weights.iloc[:5]
    y = -w_avg(otherstd.loc[x], weights=weights.to_dict())

    c, cm = yp.corr(y), smooth(yp).corr(smooth(y))
    corrs.append({"name": 'All Unified', "stat": str(stat), "wieghts": name, "corr (raw)": c, "corr (smoothed)": cm, "when": "all"})
    
    c =  yp.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(y.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]]), 
    cm = smooth(yp).loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(smooth(y).loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]])
    corrs.append({"name": 'All Unified', "stat": str(stat), "wieghts": name,
                 "corr (raw)": c, "corr (smoothed)": cm, "when": "validation"})
    
    c = yp.loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(
        y.loc[settings.PADCHEST_SPLIT_DATES[1]:]),
    cm = smooth(yp).loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(
        smooth(y).loc[settings.PADCHEST_SPLIT_DATES[1]:])
    corrs.append({"name": 'All Unified', "stat": str(stat), "wieghts": name,
                 "corr (raw)": c, "corr (smoothed)": cm, "when": "test"})


    yo = y.copy()
    if shift_drift_to_perf:        
        y = shift_to_other(yo, yp, this_range, this_center)
    else:
        y = yo
    fig.add_trace(go.Line(x=x, y=smooth(y),
                            customdata=smooth(yo),
                          showlegend=True, legendgroup=name,
                          name=name, hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line={"width": 1},  connectgaps=False), row=1, col=1)
    # fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=name,
    #                       customdata=pd.np.vstack([counts.values, counts2.values]).T,
    #                       name=name, hovertemplate="%{y: .5f}<br />Num Samples: %{customdata[0]:d} (obs: %{customdata[1]:d})", line={"color": colors[i], "width": .8}, connectgaps=False), row=1, col=1)
    # fig.add_trace(go.Line(x=y.index, y=y, showlegend=False, legendgroup=name,
    #                       name=name, hovertemplate="%{y: .5f}", line={"color": colors[i], "width": .5, "dash": "dot"}), row=1, col=1)

c, cm = yp.corr(counts), smooth(yp).corr(smooth(counts))
# print(f"   num_samples: {c:.4f}, {cm:.4f}", )
corrs.append({"name": "num sampled (used)", "stat": str(stat), "wieghts": "NA" , "corr (raw)": c, "corr (smoothed)": cm, 
              "when": "all"})
c, cm = yp.corr(counts2), smooth(yp).corr(smooth(counts2))
# print(f"   num_samples (observed): {c:.4f}, {cm:.4f}", )
corrs.append({"name": "num sampled (obs)", "stat": str(stat), "wieghts": "NA",
             "corr (raw)": c, "corr (smoothed)": cm, "when": "all"})



def add_date_line(date, name, y=1.08):
    fig.add_shape(type='line',
                x0=date,
                y0=0,
                x1=date,
                y1=1,
                line=dict(color='black', dash='dot'),
                xref='x',
                yref='paper'
                )
    fig.add_annotation(textangle=0,
                    xref="x",
                    yref="paper", x=date, y=y,
                       text=name, showarrow=False,)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})")
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})")


if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})")
    
if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})")

fig.update_layout(
    title=f"(-1)*w_avg({stat}) and AUROC")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.update_layout(barmode='overlay')
corr_df = pd.DataFrame(corrs).sort_values('when')
display(corr_df)
fig.show()



fig_html = fig.to_html()
print(fn)
with open(fn, 'a') as f:
    print(f"<h2>Full Unified</h2>", file=f)
    corr_df.to_html(f)
    print(fig_html, file=f)


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", connectgaps=False), row=1, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
# print("correlation with auroc")

colors = px.colors.qualitative.Set1

corrs = []
for row, (name_, cols) in enumerate(zip(["metadata", "vae", "score", "vae+score", "metadate+vae+score"],  [metadata_cols, vae_cols, score_cols, vae_cols+score_cols, vae_cols+score_cols+metadata_cols]), 1):
    for i, name in enumerate(["abs(corr)"]):
        otherstd_ = otherstd[cols]
        weights = m[name].sort_values(ascending=False)
        # weights = weights.iloc[:5]
        yo = -w_avg(otherstd_.loc[x], weights=weights.to_dict())

        c, cm = yp.corr(yo), smooth(yp).corr(smooth(yo))
        # print(f"   {name_}: (-1)*w_avg({stat}, w={name}): {c:.4f}, {cm:.4f}", )
        
        corrs.append({"name": name_, "stat": str(stat),
                     "wieghts": name, "corr (raw)": c, "corr (smoothed)": cm, "when": "all"})
        
        c = yp.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(
            yo.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]]),
        cm = smooth(yp).loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(
        smooth(yo).loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]])
        corrs.append({"name": name_, "stat": str(stat),
                     "wieghts": name, "corr (raw)": c, "corr (smoothed)": cm, "when": "validation"})

        c = yp.loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(
        y.loc[settings.PADCHEST_SPLIT_DATES[1]:]),
        cm = smooth(yp).loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(
        smooth(yo).loc[settings.PADCHEST_SPLIT_DATES[1]:])
        corrs.append({"name": name_, "stat": str(stat),
                     "wieghts": name, "corr (raw)": c, "corr (smoothed)": cm, "when": "test"})
        
        if shift_drift_to_perf:
            y = shift_to_other(yo, yp, this_range, this_center)
        else:
            y = yo
        fig.add_trace(go.Line(x=x, y=smooth(y),
                            customdata=smooth(yo),
                            showlegend=True, legendgroup=name_,
                            name=name_, hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line={"width": 1},  connectgaps=False), row=1, col=1)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})")
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})")

if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})")

if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})")


fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
corr_df = pd.DataFrame(corrs).sort_values('when')
display(corr_df)
fig.show()

fig_html = fig.to_html()


with open(fn, 'a') as f:
    print(f"<h2>Level 1 Unified</h2>", file=f)
    corr_df.to_html(f)
    print(fig_html, file=f)


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px


col_name = "metadata"
cols = metadata_cols
otherstd_ = otherstd[cols]
cols_ = [column_xs(otherstd_, include='chi2'), column_xs(otherstd_, include='ks')]

fig = make_subplots(rows=len(cols_), cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'), connectgaps=False), row=1, col=1)

fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=False,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'),  connectgaps=False), row=2, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
print("correlation with auroc")

colors = px.colors.qualitative.Set1

name = "abs(corr)"
name_ = "metadata combined"
weights = m[name].sort_values(ascending=False)
# weights = weights.iloc[:5]
yo = -w_avg(otherstd_, weights=weights.to_dict())
c, cm = yp.corr(yo), smooth(yp).corr(smooth(yo))
if shift_drift_to_perf:
    y = yo/(clip)*r+u
else:
    y = yo
fig.add_trace(go.Line(x=x, y=smooth(y),
                      customdata=smooth(yo),
                      showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line={"color": 'red', "width": 1},  connectgaps=False), row=1, col=1)

fig.add_trace(go.Line(x=x, y=smooth(y),
                      customdata=smooth(yo),
                      showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line={"color": 'red', "width": 1},  connectgaps=False), row=2, col=1)


line = {"width": 1}
for row, cols in enumerate(cols_, 1):
    for c in cols:
        yo = -otherstd[c]
        if shift_drift_to_perf:
            y = shift_to_other(yo, yp, this_range, this_center)
        else:
            y = yo
        fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True,
                              customdata=smooth(yo),
                              legendgroup=str(c[0]),
                              name=str(c), hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line=line,  connectgaps=False), row=row, col=1)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})", y=1.08)
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})", y=1.08)

if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})",  y=1.08)

if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})",  y=1.08)

fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=1200)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.show()

fig_html = fig.to_html()

with open(fn, 'a') as f:
    print(f"<h2>Metadata</h2>", file=f)
    print(fig_html, file=f)


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

col_name = "vae"
cols = vae_cols
o = other_df[cols].loc[settings.PADCHEST_SPLIT_DATES[1]:].swaplevel(0, 2, axis=1)[['distance']].swaplevel(0, 2, axis=1)
cols = o.max(axis=0).sort_values(ascending=False).head(10).index.tolist()

otherstd_ = otherstd[cols]


cols_ = [column_xs(otherstd_, include='ks')]

fig = make_subplots(rows=len(cols_), cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'), connectgaps=False), row=1, col=1)


u = yp.mean()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
colors = px.colors.qualitative.Set1
name = "abs(corr)"
name_ = "combined"
weights = m[name].sort_values(ascending=False)
yo = -w_avg(otherstd_, weights=weights.to_dict())
c, cm = yp.corr(yo), smooth(yp).corr(smooth(yo))
if shift_drift_to_perf:
    y = shift_to_other(yo, yp, this_range, this_center)
else:
    y = yo
fig.add_trace(go.Line(x=x, y=smooth(y), 
                      customdata=smooth(yo),
                      showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line={"color": 'red', "width": 1},  connectgaps=False), row=1, col=1)

line = {"width": 1}
for row, cols in enumerate(cols_, 1):
    for c in cols:
        yo = -otherstd[c]
        if shift_drift_to_perf:
            y = shift_to_other(yo, yp, this_range, this_center)
        else:
            y = yo
        fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, 
                              customdata=smooth(yo),
                              legendgroup=str(c[0]),
                              name=str(c), hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line=line,  connectgaps=False), row=row, col=1)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})", y=1.03)
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})", y=1.03)

if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})",  y=1.03)

if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})",  y=1.03)

fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.show()

fig_html = fig.to_html()

with open(fn, 'a') as f:
    print(f"<h2>{col_name}</h2>", file=f)
    print(fig_html, file=f)


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

html = ["<h3>metadata correlation during validation</h3>"]
corrs = []
for col in metadata_cols:
    y = -otherstd[col].loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]]
    c, cm = yp.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(y), smooth(
        yp).loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].corr(smooth(y))
    corrs.append({"name": col, "corr (raw)": c, "corr (smoothed)": cm})

df = pd.DataFrame(corrs).sort_values("corr (raw)",ascending=False, )


col_name = "metadata"
cols = df.head(10).name.tolist()
otherstd_ = otherstd[cols]
cols_ = [cols]

fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'), connectgaps=False), row=1, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))

colors = px.colors.qualitative.Set1

name = "info_gain"
weights = m[name].sort_values(ascending=False)
# weights = weights.iloc[:5]
yo = -w_avg(otherstd_, weights=weights.to_dict())

c, cm = yp.corr(yo), smooth(yp).corr(smooth(yo))
if shift_drift_to_perf:
    y = shift_to_other(yo, yp, this_range, this_center)
else:
    y = yo
fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{y: .5f}", line={"color": 'red', "width": 1},  connectgaps=False), row=1, col=1)


line = {"width": 1}
for row, cols in enumerate(cols_, 1):
    for c in cols:
        yo = -otherstd[c]
        if shift_drift_to_perf:
            y = shift_to_other(yo, yp, this_range, this_center)
        else:
            y = yo
        fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True,
                              customdata=smooth(yo),
                              legendgroup=str(c[0]),
                              name=str(c), hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line=line,  connectgaps=False), row=row, col=1)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})")
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})")

if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})")

if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})")

fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC.")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600*len(cols_))
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[0], settings.PADCHEST_SPLIT_DATES[1]])
fig.show()

html.append(df.to_html())
html.append(fig.to_html())

with open(fn, 'a') as f:
    print("<br />".join(html), file=f)


In [None]:
html = ["<h3>metadata correlation during test</h3>"]

corrs = []

for col in metadata_cols:
    y = -otherstd[col].loc[settings.PADCHEST_SPLIT_DATES[1]:]
    c, cm = yp.loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(y), smooth(
        yp).loc[settings.PADCHEST_SPLIT_DATES[1]:].corr(smooth(y))
    corrs.append({"name": col, "corr (raw)": c, "corr (smoothed)": cm})

df = pd.DataFrame(corrs).sort_values("corr (raw)",ascending=False, )
col_name = "metadata"
cols = df.head(10).name.tolist()
otherstd_ = otherstd[cols]
cols_ = [cols]

fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'), connectgaps=False), row=1, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))

colors = px.colors.qualitative.Set1

name = "info_gain"
weights = m[name].sort_values(ascending=False)
# weights = weights.iloc[:5]
y = -w_avg(otherstd_, weights=weights.to_dict())

c, cm = yp.corr(y), smooth(yp).corr(smooth(y))
print(f"   {name_}: (-1)*w_avg({stat}, w={name}): {c:.4f}, {cm:.4f}", )
y = (y-y.median())/(y.max()-y.min())*r+u
fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{y: .5f}", line={"color": 'red', "width": 1},  connectgaps=False), row=1, col=1)


line = {"width": 1}
for row, cols in enumerate(cols_, 1):
    for c in cols:
        yo = -otherstd[c]
        if shift_drift_to_perf:
            y = shift_to_other(yo, yp, this_range, this_center)
        else:
            y = yo
        fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True,
                              customdata=smooth(yo),
                              legendgroup=str(c[0]),
                              name=str(c), hovertemplate="%{customdata:.3f} (on graph: %{y:.3f})", line=line,  connectgaps=False), row=row, col=1)

add_date_line(settings.PADCHEST_SPLIT_DATES[0], f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})")
add_date_line(settings.PADCHEST_SPLIT_DATES[1], f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})")

if nonfrontal_add_date is not None:
    add_date_line(nonfrontal_add_date, f"Lat. Added<br />({nonfrontal_add_date})")

if frontal_remove_date is not None:
    add_date_line(frontal_remove_date, f"Front Removed<br />({frontal_remove_date})")

fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600*len(cols_))
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], yp.index.max()])
fig.show()

html.append(df.to_html())
html.append(fig.to_html())

with open(fn, 'a') as f:
    print("<br />".join(html), file=f)


In [None]:
m.loc[metadata_cols].sort_values('abs(corr)', ascending=False)
