# Setup

In [None]:
import gc
import random

import astropy.units as u
import ipywidgets as widgets
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sunpy.map
import sunpy.visualization.colormaps.color_tables as ct
from astropy.visualization import AsinhStretch, ImageNormalize
from IPython.display import clear_output, display
from matplotlib.patches import Rectangle
from sunpy.coordinates import frames
from tqdm import tqdm
import json
import random
from pathlib import Path

import ipywidgets as widgets
from IPython.display import display, clear_output

from contextlib import contextmanager

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from Library.Processing import *
from Library.IO import *
from Library.Model import *
from Library.Metrics import *
from Library.Config import *
from Library.CH import *
from Library.Plot import *

In [None]:
pd.set_option("display.width", 10000)
pd.set_option("display.max_colwidth", None)

# Data Processing

In [None]:
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")

In [None]:
train_df = df["20170501":"20170801"]

In [None]:
# set-wise subtraction
inf_df = df.loc[~df.index.isin(train_df.index)]

# Model

## Training

In [None]:
# train_model(train_df)

In [None]:
model = load_trained_model("A0", "D0")

In [None]:
row = df.iloc[4436]
row

In [None]:
p = fits_to_pmap(model, prepare_fits(row.fits_path)[1])


In [None]:
print(p.min(), p.max(), p.mean(), p.std())

In [None]:
import numpy as np, pandas as pd
from Library.Config import paths
from Library.Model import load_trained_model
from Library import IO

model = load_trained_model("A0", "D0")
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
val = df.iloc[-50:]  # or use your actual val split
stats = []

for row in val.itertuples():
    _, img = IO.prepare_fits(row.fits_path)
    x = IO.resize_for_model(img, model.architecture["img_size"])[None, ..., None]
    p = model.compiled_infer(x)[0, ..., 0]
    stats.append([p.min(), p.max(), p.mean(), p.std()])

print(np.mean(stats, axis=0))  # avg min/max/mean/std


In [None]:
from Library.IO import prepare_mask
for row in val.iloc[:5].itertuples():
    _, img = IO.prepare_fits(row.fits_path)
    x = IO.resize_for_model(img, model.architecture["img_size"])[None, ..., None]
    p = model.compiled_infer(x)[0, ..., 0]
    m = IO.resize_for_model(prepare_mask(row.mask_path), model.architecture["img_size"]) > 0
    print("inside:", p[m].mean(), "outside:", p[~m].mean(), "std:", p.std())


# Debug

In [None]:
import json
import numpy as np
import pandas as pd
from Library import IO
from Library.Model import load_pair
from Library.Config import paths

arch = json.load(open("Config/Model/Architecture/A0.json"))
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
row = df.iloc[0]

train_img, train_mask = load_pair(row.fits_path, row.mask_path, arch)

_, infer_img = IO.prepare_fits(row.fits_path)
infer_img = IO.resize_for_model(infer_img.astype(np.float32), arch["img_size"])
infer_img = infer_img[..., None]

def stats(x):
    return {
        "min": float(x.min()),
        "max": float(x.max()),
        "mean": float(x.mean()),
        "std": float(x.std()),
        "p1": float(np.percentile(x, 1)),
        "p99": float(np.percentile(x, 99)),
    }

print("train:", stats(train_img))
print("infer:", stats(infer_img))

diff = train_img - infer_img
print("diff:", stats(diff), "maxabs:", float(np.abs(diff).max()))


In [None]:
import tensorflow as tf

_, base_img = IO.prepare_fits(row.fits_path)
x = base_img[np.newaxis, ..., np.newaxis].astype(np.float32)
x = tf.image.resize(x, [arch["img_size"], arch["img_size"]], method="bilinear").numpy()
apply_img = x[0]
w
print("apply:", stats(apply_img))
diff2 = train_img - apply_img
print("train-apply diff:", stats(diff2), "maxabs:", float(np.abs(diff2).max()))


In [None]:
import pandas as pd, numpy as np
from Library.Config import paths
from Library.IO import prepare_mask

df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
train_df = df["20170101":"20171231"]  # adjust

def has_pos(p):
    m = prepare_mask(p)
    return m.sum() > 0

flags = train_df["mask_path"].apply(has_pos)
print("total:", len(train_df), "non-empty:", flags.sum())


In [None]:
ratios = train_df["mask_path"].sample(200).apply(
    lambda p: prepare_mask(p).mean()
)
print("mean mask ratio:", ratios.mean())


In [None]:
from Library import IO
import numpy as np
row = train_df.iloc[0]

_, img = IO.prepare_fits(row.fits_path)
mask = IO.prepare_mask(row.mask_path)

m0 = img[mask > 0].mean()
m1 = img[np.flipud(mask) > 0].mean()
print("inside mean (mask):", m0, "inside mean (flipped mask):", m1)


In [None]:
import json
import numpy as np
import pandas as pd
from Library.Config import paths
from Library.Model import build_unet, bce_dice_loss, dice_coef, load_pair

# Load a tiny slice
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
tiny = df.iloc[4444:4446]  # pick 1–2 rows you know have CHs

arch = json.load(open("Config/Model/Architecture/A0.json"))
arch["batch_size"] = 4

# Build training batch
imgs, masks = [], []
for row in tiny.itertuples():
    img, mask = load_pair(row.fits_path, row.mask_path, arch)
    imgs.append(img)
    masks.append(mask)

X = np.stack(imgs, axis=0).astype(np.float32)
Y = np.stack(masks, axis=0).astype(np.float32)

model = build_unet(arch)
model.compile(
    optimizer="adam",
    loss=bce_dice_loss,
    metrics=[dice_coef, "accuracy"],
)

hist = model.fit(X, Y, epochs=50, verbose=1)
pred = model.predict(X)

print("pred stats", pred.min(), pred.max(), pred.mean(), pred.std())


In [None]:
print("Y mean:", Y.mean(), "Y sum:", Y.sum())   # make sure labels aren’t near‑zero


In [None]:
model = build_unet(arch)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="binary_crossentropy"
)
model.fit(X, Y, epochs=50, batch_size=4, verbose=1)
pred = model.predict(X)
print(pred.min(), pred.max(), pred.mean(), pred.std())


In [None]:
w0 = [w.numpy().copy() for w in model.trainable_weights]
model.train_on_batch(X, Y)
w1 = [w.numpy() for w in model.trainable_weights]
max_delta = max(np.max(np.abs(a-b)) for a,b in zip(w0, w1))
print("max weight delta:", max_delta)


In [None]:
loss0 = model.evaluate(X, Y, verbose=0)
for _ in range(20):
    model.train_on_batch(X, Y)
loss1 = model.evaluate(X, Y, verbose=0)
print("loss0", loss0, "loss1", loss1)
