# Setup

In [18]:
import gc
import random

import astropy.units as u
import ipywidgets as widgets
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sunpy.map
import sunpy.visualization.colormaps.color_tables as ct
from astropy.visualization import AsinhStretch, ImageNormalize
from IPython.display import clear_output, display
from matplotlib.patches import Rectangle
from sunpy.coordinates import frames
from tqdm import tqdm
import json
import random
from pathlib import Path

import ipywidgets as widgets
from IPython.display import display, clear_output

from contextlib import contextmanager

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
from Library.Processing import *
from Library.IO import *
from Library.Model import *
from Library.Metrics import *
from Library.Config import *
from Library.CH import *
from Library.Plot import *

In [21]:
pd.set_option("display.width", 10000)
pd.set_option("display.max_colwidth", None)

# Data Processing

In [22]:
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")

In [23]:
train_df = df["20170501":"20170801"]

In [24]:
# set-wise subtraction
inf_df = df.loc[~df.index.isin(train_df.index)]

# Model

## Training

In [25]:
# train_model(train_df)

In [26]:
model = load_trained_model("A0", "D0")

/Users/aosh/Developer/helio-n/Outputs/Models/A0D0.keras


In [27]:
row = df.iloc[4436]
row

fits_path                 /Volumes/JetDrive 330/mnt~sun/FITS/2016/05/23/AIA20160523_0552_0193.fits
mask_path    /Volumes/JetDrive 330/mnt~sun/Masks/2016/05/AIA20160523_055205_0193_CH_MASK_FINAL.png
hmi_path            /Volumes/JetDrive 330/mnt~sun/HMI/2016/hmi.M_720s_nrt.20160523_060000_TAI.fits
Name: 20160523_0552, dtype: object

In [28]:
p = fits_to_pmap(model, prepare_fits(row.fits_path)[1])


In [29]:
print(p.min(), p.max(), p.mean(), p.std())

0.05510475 0.25103846 0.07873798 0.036562134


In [30]:
import numpy as np, pandas as pd
from Library.Config import paths
from Library.Model import load_trained_model
from Library import IO

model = load_trained_model("A0", "D0")
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
val = df.iloc[-50:]  # or use your actual val split
stats = []

for row in val.itertuples():
    _, img = IO.prepare_fits(row.fits_path)
    x = IO.resize_for_model(img, model.architecture["img_size"])[None, ..., None]
    p = model.compiled_infer(x)[0, ..., 0]
    stats.append([p.min(), p.max(), p.mean(), p.std()])

print(np.mean(stats, axis=0))  # avg min/max/mean/std


/Users/aosh/Developer/helio-n/Outputs/Models/A0D0.keras
[0.0820601  0.40685004 0.12276604 0.06570034]


In [31]:
from Library.IO import prepare_mask
for row in val.iloc[:5].itertuples():
    _, img = IO.prepare_fits(row.fits_path)
    x = IO.resize_for_model(img, model.architecture["img_size"])[None, ..., None]
    p = model.compiled_infer(x)[0, ..., 0]
    m = IO.resize_for_model(prepare_mask(row.mask_path), model.architecture["img_size"]) > 0
    print("inside:", p[m].mean(), "outside:", p[~m].mean(), "std:", p.std())


inside: 0.098082684 outside: 0.124989405 std: 0.06557498
inside: 0.09710738 outside: 0.124266 std: 0.065491796
inside: 0.09416887 outside: 0.12365583 std: 0.0653875
inside: 0.09423238 outside: 0.123800956 std: 0.06529721
inside: 0.09625677 outside: 0.12368114 std: 0.065312736


# Debug

In [10]:
import json
import numpy as np
import pandas as pd
from Library import IO
from Library.Model import load_pair
from Library.Config import paths

arch = json.load(open("Config/Model/Architecture/A0.json"))
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
row = df.iloc[0]

train_img, train_mask = load_pair(row.fits_path, row.mask_path, arch)

_, infer_img = IO.prepare_fits(row.fits_path)
infer_img = IO.resize_for_model(infer_img.astype(np.float32), arch["img_size"])
infer_img = infer_img[..., None]

def stats(x):
    return {
        "min": float(x.min()),
        "max": float(x.max()),
        "mean": float(x.mean()),
        "std": float(x.std()),
        "p1": float(np.percentile(x, 1)),
        "p99": float(np.percentile(x, 99)),
    }

print("train:", stats(train_img))
print("infer:", stats(infer_img))

diff = train_img - infer_img
print("diff:", stats(diff), "maxabs:", float(np.abs(diff).max()))


train: {'min': 7.060013012960553e-05, 'max': 0.993018627166748, 'mean': 0.1610996127128601, 'std': 0.17160111665725708, 'p1': 0.00011665590136544779, 'p99': 0.7966505885124207}
infer: {'min': 7.060013012960553e-05, 'max': 0.993018627166748, 'mean': 0.1610996127128601, 'std': 0.17160111665725708, 'p1': 0.00011665590136544779, 'p99': 0.7966505885124207}
diff: {'min': 0.0, 'max': 0.0, 'mean': 0.0, 'std': 0.0, 'p1': 0.0, 'p99': 0.0} maxabs: 0.0


In [11]:
import tensorflow as tf

_, base_img = IO.prepare_fits(row.fits_path)
x = base_img[np.newaxis, ..., np.newaxis].astype(np.float32)
x = tf.image.resize(x, [arch["img_size"], arch["img_size"]], method="bilinear").numpy()
apply_img = x[0]
w
print("apply:", stats(apply_img))
diff2 = train_img - apply_img
print("train-apply diff:", stats(diff2), "maxabs:", float(np.abs(diff2).max()))


NameError: name 'w' is not defined

In [12]:
import pandas as pd, numpy as np
from Library.Config import paths
from Library.IO import prepare_mask

df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
train_df = df["20170101":"20171231"]  # adjust

def has_pos(p):
    m = prepare_mask(p)
    return m.sum() > 0

flags = train_df["mask_path"].apply(has_pos)
print("total:", len(train_df), "non-empty:", flags.sum())


total: 738 non-empty: 737


In [13]:
ratios = train_df["mask_path"].sample(200).apply(
    lambda p: prepare_mask(p).mean()
)
print("mean mask ratio:", ratios.mean())


mean mask ratio: 0.02806097


In [14]:
from Library import IO
import numpy as np
row = train_df.iloc[0]

_, img = IO.prepare_fits(row.fits_path)
mask = IO.prepare_mask(row.mask_path)

m0 = img[mask > 0].mean()
m1 = img[np.flipud(mask) > 0].mean()
print("inside mean (mask):", m0, "inside mean (flipped mask):", m1)


inside mean (mask): 0.04611385560349061 inside mean (flipped mask): 0.15386372823906944


In [15]:
import json
import numpy as np
import pandas as pd
from Library.Config import paths
from Library.Model import build_unet, bce_dice_loss, dice_coef, load_pair

# Load a tiny slice
df = pd.read_parquet(paths["artifact_root"] + "Paths.parquet")
tiny = df.iloc[4444:4446]  # pick 1–2 rows you know have CHs

arch = json.load(open("Config/Model/Architecture/A0.json"))
arch["batch_size"] = 4

# Build training batch
imgs, masks = [], []
for row in tiny.itertuples():
    img, mask = load_pair(row.fits_path, row.mask_path, arch)
    imgs.append(img)
    masks.append(mask)

X = np.stack(imgs, axis=0).astype(np.float32)
Y = np.stack(masks, axis=0).astype(np.float32)

model = build_unet(arch)
model.compile(
    optimizer="adam",
    loss=bce_dice_loss,
    metrics=[dice_coef, "accuracy"],
)

hist = model.fit(X, Y, epochs=50, verbose=1)
pred = model.predict(X)

print("pred stats", pred.min(), pred.max(), pred.mean(), pred.std())


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 15s/step - accuracy: 0.7925 - dice_coef: 0.0313 - loss: 0.7944
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.6682 - dice_coef: 0.0620 - loss: 0.7926
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 560ms/step - accuracy: 0.9327 - dice_coef: 0.0815 - loss: 0.6953
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 734ms/step - accuracy: 0.9469 - dice_coef: 0.1089 - loss: 0.6571
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 811ms/step - accuracy: 0.9727 - dice_coef: 0.1326 - loss: 0.6247
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393ms/step - accuracy: 0.9847 - dice_coef: 0.1524 - loss: 0.5977
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.9877 - dice_coef: 0.1727 - loss: 0.5731   
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━

In [None]:
print("Y mean:", Y.mean(), "Y sum:", Y.sum())   # make sure labels aren’t near‑zero


Y mean: 0.020019531 Y sum: 656.0


In [None]:
model = build_unet(arch)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="binary_crossentropy"
)
model.fit(X, Y, epochs=50, batch_size=4, verbose=1)
pred = model.predict(X)
print(pred.min(), pred.max(), pred.mean(), pred.std())


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19s/step - loss: 0.4126
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.4104
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 724ms/step - loss: 0.4084
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 521ms/step - loss: 0.4065
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 465ms/step - loss: 0.4047
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step - loss: 0.4030
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 563ms/step - loss: 0.4014
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step - loss: 0.3999
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step - loss: 0.3983
Epoch 10/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step - loss: 0.3969
Epoch 11/50


In [None]:
w0 = [w.numpy().copy() for w in model.trainable_weights]
model.train_on_batch(X, Y)
w1 = [w.numpy() for w in model.trainable_weights]
max_delta = max(np.max(np.abs(a-b)) for a,b in zip(w0, w1))
print("max weight delta:", max_delta)


max weight delta: 0.00015913136


In [None]:
loss0 = model.evaluate(X, Y, verbose=0)
for _ in range(20):
    model.train_on_batch(X, Y)
loss1 = model.evaluate(X, Y, verbose=0)
print("loss0", loss0, "loss1", loss1)


loss0 0.7110035419464111 loss1 0.7006533741950989
