In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import rasterio
from tensorflow.keras.models import load_model, Model

In [None]:
# ==========================================
# 1. SETUP
# ==========================================
# Path to your Sentinel Quarterly Images
tif_folder = "/Users/ruben/Desktop/Thesis/TrainingData/Sentinel2/sample50-quarterly-2022"

# Path to the model you just trained
model_path = "cnn_viirs_proxy.h5"

# Output CSV
output_csv = "dynamic_features_per_quarter.csv"

In [None]:
# ==========================================
# 2. LOAD & MODIFY MODEL
# ==========================================
print("Loading Proxy Model...")
full_model = load_model(model_path)

# Extract output from the 'feature_vector' layer we named earlier
# This returns the 4096-dimensional vector instead of the class prediction
feature_extractor = Model(inputs=full_model.input, 
                          outputs=full_model.get_layer('feature_vector').output)

print("Model ready for feature extraction.")

In [None]:
# ==========================================
# 3. IMAGE PRE-PROCESSING FUNCTION
# ==========================================
def process_sentinel_tif(path):
    try:
        with rasterio.open(path) as src:
            # Read RGB bands (1, 2, 3)
            r = src.read(1)
            g = src.read(2)
            b = src.read(3)
            img = np.dstack((r, g, b))
            
            # Normalize (Same robust percentile stretch used in training)
            p2, p98 = np.percentile(img, (2, 98))
            if p98 == p2: p98 = 255
            img = np.clip((img - p2) / (p98 - p2), 0, 1)
            
            # Resize to 224x224 (Model Input Size)
            img = tf.image.resize(img, (224, 224)).numpy()
            return img
    except Exception as e:
        print(f"Error reading {path}: {e}")
        return None

In [None]:
# ==========================================
# 4. EXTRACTION LOOP
# ==========================================
data_records = []

print("Starting extraction (this may take a moment)...")

# Walk through all cluster folders
for root, dirs, files in os.walk(tif_folder):
    for file in files:
        if file.endswith(".tif") and "dhs_" in file:
            # Parse Filename: dhs_0001_2022_Q1.tif
            try:
                parts = file.replace('.tif', '').split('_')
                cluster_id = int(parts[1])
                quarter = parts[3] # "Q1", "Q2", etc.
                
                # Pre-process
                file_path = os.path.join(root, file)
                img = process_sentinel_tif(file_path)
                
                if img is not None:
                    # Add batch dimension (1, 224, 224, 3)
                    img_batch = np.expand_dims(img, axis=0)
                    
                    # EXTRACT FEATURES
                    features = feature_extractor.predict(img_batch, verbose=0)
                    
                    # Flatten to 1D array (4096 items)
                    feat_vec = features.flatten()
                    
                    # Store Result
                    record = {
                        'ClusterID': cluster_id,
                        'Quarter': quarter
                    }
                    # Add feat_0, feat_1 ... feat_4095
                    for i, val in enumerate(feat_vec):
                        record[f'CNN_{i}'] = val
                        
                    data_records.append(record)
                    
            except Exception as e:
                print(f"Skipping {file}: {e}")

In [None]:
# ==========================================
# 5. SAVE
# ==========================================
if len(data_records) > 0:
    df = pd.DataFrame(data_records)
    
    # Sort for tidiness
    df = df.sort_values(by=['ClusterID', 'Quarter'])
    
    # Save (Warning: This file will be large!)
    df.to_csv(output_csv, index=False)
    print("-" * 30)
    print(f"SUCCESS: Extracted features for {len(df)} images.")
    print(f"Shape: {df.shape} (Rows, 4096+ Features)")
    print(f"Saved to: {output_csv}")
else:
    print("FAILED: No features extracted.")