In [None]:
from model_drift import settings
from model_drift.helpers import column_xs, correlate_performance, mutual_info_performance, w_avg
import pandas as pd



In [None]:
other_df.sum(axis=1, skipna=False)

In [None]:
window = "30D"
stride = "D"
ref_frontal_only = True
target_frontal_only = True

fname = settings.TOP_DIR.joinpath(
    "results", "drift_csvs", f"combined_s{stride}-w{window}_frontalonly-ref{ref_frontal_only}-target{target_frontal_only}.csv")

combined_df = pd.read_csv(str(fname), index_col = 0, header = [0, 1, 2])


combined_df.index = pd.to_datetime(combined_df.index)
combined_df.head()


In [None]:
valid_ix = pd.date_range(settings.PADCHEST_SPLIT_DATES[0], settings.PADCHEST_SPLIT_DATES[1])

perf_col = column_xs(combined_df, "auroc")[0]
perf_df = combined_df[perf_col]

perf_df


In [None]:
other_cols = column_xs(combined_df, exclude=['auroc', 'count'])
other_df = combined_df[other_cols]

In [None]:
stat = ["pval", "distance"]
otherstd = other_df[column_xs(other_df, include=stat)].copy()
flip = column_xs(otherstd, include=["chi2"], exclude=['distance'])

if 0:
    otherstd[flip] = 1-otherstd[flip]
    
stats = otherstd.loc[settings.PADCHEST_SPLIT_DATES[0]:settings.PADCHEST_SPLIT_DATES[1]].agg(["mean", "std"])
# stats = otherstd.loc[settings.PADCHEST_SPLIT_DATES[0]:"2015-01-01"].agg(["mean", "std"])
# cannot divide by zero
std0 = stats.loc['std'] == 0
stats.loc["std", stats.loc['std'] == 0] = 1
# print(bad_cols)

# otherstd = otherstd.drop(bad_cols, axis=1)
# stats = stats.drop(bad_cols, axis=1)

otherstd = (otherstd-stats.loc['mean'])/(stats.loc["std"])

bad_cols = otherstd.columns[otherstd.isnull().max(axis=0)].tolist()

bad_cols += column_xs(otherstd, include=["Exposure_DICOM", "XRayTubeCurrent_DICOM", "BitsStored_DICOM",
                                         "PatientSex_DICOM", "Manufacturer_DICOM", "PixelAspectRatio_DICOM", "ExposureInuAs_DICOM", "SpatialResolution_DICOM", "RelativeXRayExposure_DICOM"])

bad_cols += column_xs(otherstd, include=["chi2"])


print(bad_cols)
# otherstd = otherstd.drop(bad_cols, axis=1)

vae_cols = [c for c in list(otherstd) if "mu." in c[0]]
score_cols = [c for c in list(otherstd) if "activation." in c[0]]
metadata_cols = sorted(set(otherstd).difference(vae_cols).difference(score_cols))

clip = 1
otherstd = otherstd.clip(-1*clip, clip)

In [None]:



all_corr_df = correlate_performance(perf_df.rename('auroc'), otherstd)
all_ig_df = mutual_info_performance(perf_df.rename('auroc'), otherstd, bins=25)


m_ = all_ig_df.to_frame().join(all_corr_df.abs().rename('abs(corr)'))
m_ = m_.join(m_.mean(axis=1).rename('mean[abs(corr),info_gain]'))
m_ = m_.assign(no_weights=1)
m_ = m_.fillna(0)
m_.sort_values(by='mean[abs(corr),info_gain]', ascending=False).head(20)


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px


ignore_bad_cols = True

m = m_.copy()
if ignore_bad_cols:
    m.loc[bad_cols] = 0

span = 90
def smooth(y: pd.DataFrame):
    ys = y.ewm(span=span, ignore_na=False).mean()
    ys[y.isna()] = None
    return ys

x = pd.date_range(perf_df.index.min(), perf_df.index.max())
yp = perf_df.reindex(x)

otherstd = otherstd.reindex(x)


fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", connectgaps=False), row=1, col=1)

u = yp.mean()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
print("correlation with auroc")

colors = px.colors.qualitative.Set1

for i, name in enumerate(m):
    weights = m[name].sort_values(ascending=False)
    # weights = weights.iloc[:5]
    y = -w_avg(otherstd.loc[x], weights=weights.to_dict())

    c, cm = yp.corr(y), smooth(yp).corr(smooth(y))
    print(f"   (-1)*w_avg({stat}, w={name}): {c:.4f}, {cm:.4f}", )
    y = (y-y.median())/(y.max()-y.min())*r+u
    fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=name,
                          name=name, hovertemplate="%{y: .5f}", line={"color": colors[i], "width": .8}, connectgaps=False), row=1, col=1)
    # fig.add_trace(go.Line(x=y.index, y=y, showlegend=False, legendgroup=name,
    #                       name=name, hovertemplate="%{y: .5f}", line={"color": colors[i], "width": .5, "dash": "dot"}), row=1, col=1)

fig.add_shape(type='line',
              x0=settings.PADCHEST_SPLIT_DATES[0],
              y0=0,
              x1=settings.PADCHEST_SPLIT_DATES[0],
              y1=1,
              line=dict(color='black', dash='dot'),
              xref='x',
              yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[0], y=1.08,
                   text=f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})", showarrow=False,)
fig.add_shape(type='line', x0=settings.PADCHEST_SPLIT_DATES[1], y0=0, x1=settings.PADCHEST_SPLIT_DATES[1], y1=1,
              line=dict(color='black', dash='dot'), xref='x', yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[1], y=1.08,
                   text=f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})", showarrow=False,)
fig.update_layout(
    title=f"(-1)*w_avg({stat}) and AUROC. stride: {stride}, window:{window} frontal only, ref: {ref_frontal_only}, target: {target_frontal_only} (smoothed with ewm(span={span}))")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.show()


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", connectgaps=False), row=1, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
print("correlation with auroc")

colors = px.colors.qualitative.Set1


for row, (name_, cols) in enumerate(zip(["metadata", "vae", "score", "vae+score"],  [metadata_cols, vae_cols, score_cols, vae_cols+score_cols]),1):
    for i, name in enumerate(['info_gain']):
        otherstd_ = otherstd[cols]
        weights = m[name].sort_values(ascending=False)
        # weights = weights.iloc[:5]
        y = -w_avg(otherstd_.loc[x], weights=weights.to_dict())

        c, cm = yp.corr(y), smooth(yp).corr(smooth(y))
        print(f"   {name_}: (-1)*w_avg({stat}, w={name}): {c:.4f}, {cm:.4f}", )
        y = (y-y.median())/(y.max()-y.min())*r+u
        fig.add_trace(go.Line(x=y.index, y=smooth(y), showlegend=True, legendgroup=name_,
                              name=name_, hovertemplate="%{y: .5f}", line={"color": colors[row], "width": 1}, connectgaps=False), row=1, col=1)
        # fig.add_trace(go.Line(x=y.index, y=y, showlegend=False, legendgroup=name,
        #                       name=name, hovertemplate="%{y: .5f}", line={"color": colors[i], "width": .5, "dash": "dot"}), row=1, col=1)

fig.add_shape(type='line',
              x0=settings.PADCHEST_SPLIT_DATES[0],
              y0=0,
              x1=settings.PADCHEST_SPLIT_DATES[0],
              y1=1,
              line=dict(color='black', dash='dot'),
              xref='x',
              yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[0], y=1.08,
                   text=f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})", showarrow=False,)
fig.add_shape(type='line', x0=settings.PADCHEST_SPLIT_DATES[1], y0=0, x1=settings.PADCHEST_SPLIT_DATES[1], y1=1,
              line=dict(color='black', dash='dot'), xref='x', yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[1], y=1.08,
                   text=f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})", showarrow=False,)
fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC. stride: {stride}, window:{window} frontal only, ref: {ref_frontal_only}, target: {target_frontal_only} (smoothed with ewm(span={span}))")
fig.update_layout(hovermode="x unified")
fig.update_layout(height=600)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.show()


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

cols = metadata_cols
otherstd_ = otherstd[cols]
cols_ = [column_xs(otherstd_, include='chi2'), column_xs(otherstd_, include='ks')]

fig = make_subplots(rows=len(cols_), cols=1, shared_xaxes=True, vertical_spacing=0.01)
fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=True,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'), connectgaps=False), row=1, col=1)

fig.add_trace(go.Line(x=x, y=smooth(yp), showlegend=False,
              name="AUROC", hovertemplate="%{y: .5f}", line=dict(color='blue'),  connectgaps=False), row=2, col=1)

u = yp.median()
r = yp.max()-yp.min()

single_disp = dict(line=dict(dash="dot", width=1))
print("correlation with auroc")

colors = px.colors.qualitative.Set1

name = "info_gain"
name_ = "metadata combined"
weights = m[name].sort_values(ascending=False)
# weights = weights.iloc[:5]
y = -w_avg(otherstd_, weights=weights.to_dict())

c, cm = yp.corr(y), smooth(yp).corr(smooth(y))
print(f"   {name_}: (-1)*w_avg({stat}, w={name}): {c:.4f}, {cm:.4f}", )
y = (y-y.median())/(y.max()-y.min())*r+u
fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=name_,
                      name=name_, hovertemplate="%{y: .5f}", line={"color": 'red', "width": 1},  connectgaps=False), row=1, col=1)

fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=False, legendgroup=name_,
                      name=name_, hovertemplate="%{y: .5f}", line={"color": 'red', "width": 1},  connectgaps=False), row=2, col=1)


line = {"width": 1}
for row, cols in enumerate(cols_, 1):
    for c in cols:
        y = otherstd[c]
        y = (y-y.median())/(y.max()-y.min())*r+u
        fig.add_trace(go.Line(x=x, y=smooth(y), showlegend=True, legendgroup=str(c),
                              name=str(c), hovertemplate="%{y: .5f}", line=line,  connectgaps=False), row=row, col=1)

fig.add_shape(type='line',
              x0=settings.PADCHEST_SPLIT_DATES[0],
              y0=0,
              x1=settings.PADCHEST_SPLIT_DATES[0],
              y1=1,
              line=dict(color='black', dash='dot'),
              xref='x',
              yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[0], y=1.03,
                   text=f"Val Start<br />({settings.PADCHEST_SPLIT_DATES[0]})", showarrow=False,)
fig.add_shape(type='line', x0=settings.PADCHEST_SPLIT_DATES[1], y0=0, x1=settings.PADCHEST_SPLIT_DATES[1], y1=1,
              line=dict(color='black', dash='dot'), xref='x', yref='paper'
              )
fig.add_annotation(textangle=0,
                   xref="x",
                   yref="paper", x=settings.PADCHEST_SPLIT_DATES[1], y=1.03,
                   text=f"Test Start<br />({settings.PADCHEST_SPLIT_DATES[1].strip()})", showarrow=False,)
fig.update_layout(
    title=f"(-1)*w_avg({stat},w={name}) and AUROC. stride: {stride}, window:{window} frontal only, ref: {ref_frontal_only}, target: {target_frontal_only} (smoothed with ewm(span={span}))")
fig.update_layout(hovermode="x")
fig.update_layout(height=1200)
fig.update_xaxes(range=[settings.PADCHEST_SPLIT_DATES[1], y.index.max()])
fig.show()


In [None]:
m.loc[score_cols].sort_values('info_gain', ascending=False)