In [None]:
import os, sys, time, pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import importlib.util

In [None]:
cwd = os.getcwd()
candidates = [
    os.path.join(cwd, 'code-dataset2.py'),
]
helper_path = None
for p in candidates:
    if os.path.exists(p):
        helper_path = p
        break

if helper_path is None:
    raise FileNotFoundError(
        "Could not find 'code-dataset2.py' or 'code_dataset2.py' in the current working directory.\n"
        f"Tried: {candidates}\nPlease upload the helper file to the notebook folder."
    )

spec = importlib.util.spec_from_file_location("code_dataset2_mod", helper_path)
code_dataset2_mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(code_dataset2_mod)

# extract required functions (will raise a clear error if missing)
required_funcs = ['generate_and_load_dataset_from_files', 'build_custom_cnn']
missing = [f for f in required_funcs if not hasattr(code_dataset2_mod, f)]
if missing:
    raise AttributeError(f"Missing expected function(s) in helper module: {missing}. "
                         "Open code-dataset2.py and make sure the functions are defined.")

generate_and_load_dataset_from_files = code_dataset2_mod.generate_and_load_dataset_from_files
build_custom_cnn = code_dataset2_mod.build_custom_cnn

In [None]:
DATASET_DIR = '/content/drive/MyDrive/Dataset_2'  
IMG_HEIGHT = 128
IMG_WIDTH = 128
BATCH_SIZE = 16
EPOCHS = 50
INITIAL_LR = 0.001
MIN_REQUIRED_SAMPLES = 70

In [None]:
out_dir = os.path.join(cwd, 'pretrain_data')
os.makedirs(out_dir, exist_ok=True)
timestamp = time.strftime("%Y%m%d_%H%M%S")

In [None]:
print("Loading dataset from:", DATASET_DIR)
X, y = generate_and_load_dataset_from_files(DATASET_DIR)

if not hasattr(X, 'size') or not hasattr(y, 'size'):
    raise ValueError("generate_and_load_dataset_from_files must return numpy arrays (X, y).")

if X.size == 0 or y.size == 0:
    raise SystemExit("No data loaded. Check DATASET_DIR and helper function.")

n_samples = X.shape[0]
print(f"Loaded {n_samples} samples.")
if n_samples < MIN_REQUIRED_SAMPLES:
    raise SystemExit(f"Need at least {MIN_REQUIRED_SAMPLES} images to proceed (found {n_samples}).")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
input_shape = X_train.shape[1:]
model = build_custom_cnn(input_shape)

model.summary()

In [None]:
best_during_training_path = os.path.join(out_dir, 'best_model_vndvi_best.h5')
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(filepath=best_during_training_path,
                                   monitor='val_loss', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-5, verbose=1)
callbacks = [early_stopping, model_checkpoint, reduce_lr]

history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

In [None]:
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {loss:.4f}, MAE: {mae:.4f}")

# save full model (HDF5)
h5_path = os.path.join(out_dir, f'best_model_vndvi_{timestamp}.h5')
model.save(h5_path)
print("Saved full model (h5) to:", h5_path)

# save JSON + weights via pickle
pkl_path = os.path.join(out_dir, f'best_model_vndvi_pickle_{timestamp}.pkl')
model_dict = {
    'model_json': model.to_json(),
    'weights': model.get_weights()
}
with open(pkl_path, 'wb') as f:
    pickle.dump(model_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
print("Saved model JSON+weights (pickle) to:", pkl_path)

# list files in pretrain_data to confirm
print("\nFiles in", out_dir, ":")
for fn in sorted(os.listdir(out_dir)):
    print(" -", fn)