In [2]:
import tensorflow as tf

print("=== TensorFlow GPU Test ===")
print(f"TensorFlow version: {tf.__version__}")

# Check if GPU is available
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

# Check if built with CUDA
print(f"Built with CUDA: {tf.test.is_built_with_cuda()}")

# Check if built with cuDNN
print(f"Built with cuDNN: {tf.test.is_built_with_cudnn()}")

# List all available devices
print("\nAll available devices:")
for device in tf.config.list_physical_devices():
    print(f"  - {device}")

=== TensorFlow GPU Test ===
TensorFlow version: 2.20.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Built with CUDA: True


AttributeError: module 'tensorflow._api.v2.test' has no attribute 'is_built_with_cudnn'

In [3]:
import tensorflow as tf

print("=== Detailed GPU Information ===")

# Get all GPU devices
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print("✅ GPU(s) found!")
    for i, gpu in enumerate(gpus):
        print(f"\nGPU {i}:")
        print(f"  Name: {gpu.name}")
        print(f"  Device type: {gpu.device_type}")
        
        # Get device details (if available)
        try:
            details = tf.config.experimental.get_device_details(gpu)
            print(f"  Details: {details}")
        except:
            print("  Device details not available")
else:
    print("❌ No GPU devices found")

=== Detailed GPU Information ===
✅ GPU(s) found!

GPU 0:
  Name: /physical_device:GPU:0
  Device type: GPU
  Details: {'compute_capability': (8, 9), 'device_name': 'NVIDIA GeForce RTX 4050 Laptop GPU'}


In [4]:
import tensorflow as tf
import time

print("=== GPU Performance Test ===")

# Test on CPU
with tf.device('/CPU:0'):
    print("Testing on CPU...")
    start_time = time.time()
    a_cpu = tf.random.normal([10000, 10000])
    b_cpu = tf.random.normal([10000, 10000])
    c_cpu = tf.matmul(a_cpu, b_cpu)
    cpu_time = time.time() - start_time
    print(f"CPU time: {cpu_time:.4f} seconds")

# Test on GPU (if available)
if tf.config.list_physical_devices('GPU'):
    with tf.device('/GPU:0'):
        print("Testing on GPU...")
        start_time = time.time()
        a_gpu = tf.random.normal([10000, 10000])
        b_gpu = tf.random.normal([10000, 10000])
        c_gpu = tf.matmul(a_gpu, b_gpu)
        gpu_time = time.time() - start_time
        print(f"GPU time: {gpu_time:.4f} seconds")
        print(f"Speedup: {cpu_time/gpu_time:.2f}x faster!")
else:
    print("No GPU available for testing")

=== GPU Performance Test ===
Testing on CPU...


I0000 00:00:1757702550.877400   42075 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3855 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
2025-09-13 00:12:30.895822: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 400000000 exceeds 10% of free system memory.
2025-09-13 00:12:31.025302: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 400000000 exceeds 10% of free system memory.
2025-09-13 00:12:31.109708: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 400000000 exceeds 10% of free system memory.
2025-09-13 00:12:31.247667: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 400000000 exceeds 10% of free system memory.
2025-09-13 00:12:31.370156: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 400000000 exceeds 10% of free system me

CPU time: 3.6673 seconds
Testing on GPU...
GPU time: 0.1676 seconds
Speedup: 21.89x faster!


In [None]:
# emnist_byclass_finetune_optimized_4050.py
import os
import glob
import json
import random
import zipfile
import tarfile
import shutil
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import tensorflow as tf
from tensorflow import keras

# -----------------------
# User config - OPTIMIZED for RTX 4050
# -----------------------
DATASET_DIR = "/home/rand-jadav/by_class/by_class"
DATASET_ARCHIVE = "/home/rand-jadav/Downloads/by.zip"
IMG_SIZE = (128, 128)
BATCH_SIZE = 64  # Increased batch size
EPOCHS = 2
VAL_SPLIT = 0.15
RANDOM_SEED = 42
SHUFFLE_BUFFER = 10000
INVERT = True
PRETRAINED_MODEL = "best_model_byclass_finetuned.keras"
BEST_MODEL_PATH = "best_model_byclass_finetuned.keras"
LABELMAP_JSON = "byclass_labelmap.json"

VALID_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".tiff", ".tif"}
SUPPORTED_ARCHIVES = {".zip", ".tar", ".gz", ".bz2", ".tar.gz", ".tar.bz2"}

# =========================
# GPU Memory Optimization
# =========================
def setup_gpu_memory():
    try:
        gpus = tf.config.list_physical_devices('GPU')
        if not gpus:
            print("ℹ️ No GPU detected, using CPU")
            return
        
        # Set memory growth first
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        
        # Then set memory limit if needed
        try:
            for gpu in gpus:
                tf.config.set_logical_device_configuration(
                    gpu,
                    [tf.config.LogicalDeviceConfiguration(memory_limit=5120)]
                )
        except:
            print("⚠️  Could not set memory limit, using memory growth only")
        
        print(f"✅ GPU configured: {len(gpus)} Physical GPU, {len(tf.config.list_logical_devices('GPU'))} Logical GPU")
        
    except Exception as e:
        print(f"❌ GPU setup error: {e}")

# =========================
# Reproducibility
# =========================
def set_all_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

# =========================
# Dataset scanning
# =========================
def scan_dataset_byclass(byclass_dir: str) -> Tuple[List[str], List[int], Dict[str, int], Dict[int, str]]:
    filepaths = []
    str_labels = []
    byclass_path = Path(byclass_dir).resolve()
    
    if not byclass_path.exists():
        raise FileNotFoundError(f"Dataset directory '{byclass_dir}' not found.")
    
    # Use glob for faster directory scanning
    image_patterns = [f"**/*{ext}" for ext in VALID_EXTENSIONS]
    
    for pattern in image_patterns:
        for filepath in byclass_path.glob(pattern):
            if filepath.is_file():
                class_name = filepath.parent.name
                filepaths.append(str(filepath))
                str_labels.append(class_name)
    
    if len(filepaths) == 0:
        return [], [], {}, {}
    
    label_names = sorted(set(str_labels))
    label2idx = {lab: i for i, lab in enumerate(label_names)}
    idx2label = {i: lab for lab, i in label2idx.items()}
    int_labels = [label2idx[s] for s in str_labels]
    
    print(f"Scanned {len(filepaths)} valid images across {len(label2idx)} classes")
    return filepaths, int_labels, label2idx, idx2label

# =========================
# Preprocessing & Augmentation
# =========================
def load_and_preprocess_image(path, label):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=1, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    if INVERT:
        img = 1.0 - img
    return img, label

def get_simple_augmentation() -> keras.Sequential:
    return keras.Sequential([
        keras.layers.RandomRotation(0.05, fill_mode="constant", seed=RANDOM_SEED),
        keras.layers.RandomZoom(0.05, fill_mode="constant", seed=RANDOM_SEED+1),
        keras.layers.RandomContrast(0.1, seed=RANDOM_SEED+2),
    ], name="simple_data_augmentation")

AUGMENTOR = get_simple_augmentation()

def build_optimized_dataset(paths: List[str], labels: np.ndarray, training: bool, batch_size: int) -> tf.data.Dataset:
    """Optimized dataset building without caching"""
    ds = tf.data.Dataset.from_tensor_slices((tf.constant(paths), tf.constant(labels, dtype=tf.int32)))
    
    if training:
        ds = ds.shuffle(min(SHUFFLE_BUFFER, len(paths)), seed=RANDOM_SEED, reshuffle_each_iteration=True)
    
    ds = ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda img, label: (tf.ensure_shape(img, IMG_SIZE + (1,)), label),
                num_parallel_calls=tf.data.AUTOTUNE)
    
    if training:
        ds = ds.repeat()
        
        def _apply_aug(img, label):
            img_exp = tf.expand_dims(img, 0)
            img_aug = AUGMENTOR(img_exp, training=True)
            img_aug = tf.squeeze(img_aug, 0)
            return img_aug, label
        
        ds = ds.map(_apply_aug, num_parallel_calls=tf.data.AUTOTUNE)
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    
    return ds

# =========================
# Model Loading
# =========================
def load_pretrained_model(model_path, num_classes):
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Pretrained model not found: {model_path}")
    
    print(f"📥 Loading pretrained model from: {model_path}")
    model = keras.models.load_model(model_path)
    
    # Check if the model output matches our number of classes
    output_shape = model.output_shape
    if output_shape and len(output_shape) > 1 and output_shape[-1] != num_classes:
        print(f"⚠️  Model output shape {output_shape[-1]} doesn't match number of classes {num_classes}")
        print("🔄 Replacing the final classification layer...")
        
        # Remove the last layer
        model = keras.Model(inputs=model.input, outputs=model.layers[-2].output)
        
        # Add new classification layer
        x = keras.layers.Dense(num_classes, activation="softmax", name="new_classification")(model.output)
        model = keras.Model(inputs=model.input, outputs=x)
    
    return model

# =========================
# Training Utils
# =========================
def get_optimizer():
    # Increased learning rate for faster convergence
    return keras.optimizers.Adam(learning_rate=0.001)

def get_advanced_callbacks():
    return [
        keras.callbacks.ModelCheckpoint(
            BEST_MODEL_PATH, 
            monitor="val_accuracy", 
            save_best_only=True, 
            mode="max",
            verbose=1
        ),
        keras.callbacks.EarlyStopping(
            monitor="val_accuracy", 
            patience=3,
            restore_best_weights=True,
            min_delta=0.002,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", 
            factor=0.5, 
            patience=2,
            min_lr=1e-6,
            verbose=1
        )
    ]

def compute_class_weights(y_train):
    class_counts = np.bincount(y_train)
    total_samples = len(y_train)
    num_classes = len(class_counts)
    
    # Handle zero counts
    class_counts = np.where(class_counts == 0, 1, class_counts)
    
    class_weights = total_samples / (num_classes * class_counts.astype(float))
    class_weights = class_weights / np.mean(class_weights)
    
    return {i: float(weight) for i, weight in enumerate(class_weights)}

# =========================
# Stratified Split
# =========================
def stratified_split(filepaths, labels, test_size=0.15, random_state=42):
    """Simple stratified split implementation without scikit-learn"""
    from collections import defaultdict
    
    # Group by class
    class_groups = defaultdict(list)
    for path, label in zip(filepaths, labels):
        class_groups[label].append(path)
    
    train_paths, val_paths = [], []
    train_labels, val_labels = [], []
    
    for label, paths in class_groups.items():
        n_val = max(1, int(len(paths) * test_size))
        random.shuffle(paths)
        
        val_paths.extend(paths[:n_val])
        train_paths.extend(paths[n_val:])
        
        val_labels.extend([label] * n_val)
        train_labels.extend([label] * (len(paths) - n_val))
    
    # Shuffle the datasets
    train_data = list(zip(train_paths, train_labels))
    val_data = list(zip(val_paths, val_labels))
    
    random.shuffle(train_data)
    random.shuffle(val_data)
    
    train_paths, train_labels = zip(*train_data) if train_data else ([], [])
    val_paths, val_labels = zip(*val_data) if val_data else ([], [])
    
    return list(train_paths), list(val_paths), np.array(train_labels), np.array(val_labels)

# =========================
# Main
# =========================
def main():
    set_all_seeds(RANDOM_SEED)
    setup_gpu_memory()
    
    # Disable mixed precision for now to avoid issues
    try:
        from tensorflow.keras import mixed_precision
        mixed_precision.set_global_policy('float32')
        print("✅ Using float32 precision for stability")
    except:
        pass

    print("🔍 Checking dataset availability...")
    if not ensure_dataset_available():
        print("❌ Dataset preparation failed. Exiting.")
        return

    print("🔍 Scanning byclass dataset...")
    filepaths, labels, label2idx, idx2label = scan_dataset_byclass(DATASET_DIR)
    
    if len(filepaths) == 0:
        print("❌ No images found after extraction. Please check your archive file.")
        return
    
    print(f"Found {len(filepaths)} images across {len(label2idx)} classes")

    # Use our custom stratified split
    train_paths, val_paths, y_train, y_val = stratified_split(filepaths, labels, test_size=VAL_SPLIT, random_state=RANDOM_SEED)

    steps_per_epoch = max(1, len(train_paths) // BATCH_SIZE)
    validation_steps = max(1, len(val_paths) // BATCH_SIZE)

    class_weights = compute_class_weights(y_train)

    print("📥 Loading or creating model...")
    try:
        model = load_pretrained_model(PRETRAINED_MODEL, len(label2idx))
    except Exception as e:
        print(f"❌ Failed to load pretrained model: {e}")
        print("🔄 Creating a new model...")
        # Create a simpler model if loading fails
        inputs = keras.layers.Input(shape=IMG_SIZE + (1,))
        x = keras.layers.Conv2D(32, 3, padding="same", activation="relu")(inputs)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(64, 3, padding="same", activation="relu")(x)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(128, 3, padding="same", activation="relu")(x)
        x = keras.layers.GlobalAveragePooling2D()(x)
        x = keras.layers.Dense(256, activation="relu")(x)
        x = keras.layers.Dropout(0.5)(x)
        outputs = keras.layers.Dense(len(label2idx), activation="softmax")(x)
        model = keras.Model(inputs, outputs)
    
    model.compile(optimizer=get_optimizer(), 
                 loss="sparse_categorical_crossentropy", 
                 metrics=["accuracy"])
    model.summary()

    print(f"\n📊 Training samples: {len(train_paths)}")
    print(f"📊 Validation samples: {len(val_paths)}")
    print(f"📊 Steps per epoch: {steps_per_epoch}")
    print(f"📊 Batch size: {BATCH_SIZE}")
    print(f"📊 Learning rate: 0.001")

    print("\n🚀 Starting training...")
    
    # Build datasets
    ds_train = build_optimized_dataset(train_paths, y_train, training=True, batch_size=BATCH_SIZE)
    ds_val = build_optimized_dataset(val_paths, y_val, training=False, batch_size=BATCH_SIZE)

    # Test one batch to ensure it works
    print("Testing data pipeline...")
    for batch in ds_train.take(1):
        print(f"Batch shape: {batch[0].shape}, Labels shape: {batch[1].shape}")
        break

    history = model.fit(
        ds_train,
        epochs=EPOCHS,
        validation_data=ds_val,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        class_weight=class_weights,
        callbacks=get_advanced_callbacks(),
        verbose=1
    )

    print(f"\n✅ Training completed! Best model saved to: {BEST_MODEL_PATH}")

if __name__ == "__main__":
    main()

❌ GPU setup error: Physical devices cannot be modified after being initialized
✅ Using float32 precision for stability
🔍 Checking dataset availability...
✅ Dataset already exists at: /home/rand-jadav/by_class/by_class
🔍 Scanning byclass dataset...
Scanned 1342017 valid images across 64 classes
Found 1342017 images across 64 classes
📥 Loading or creating model...
📥 Loading pretrained model from: best_model_byclass_finetuned.keras



📊 Training samples: 1140745
📊 Validation samples: 201272
📊 Steps per epoch: 17824
📊 Batch size: 64
📊 Learning rate: 0.001

🚀 Starting training...
Testing data pipeline...
Batch shape: (64, 128, 128, 1), Labels shape: (64,)
Epoch 1/2


2025-09-13 13:47:44.281730: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_9}}
E0000 00:00:1757751467.679902   27397 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_13_1/dropout_7_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m  238/17824[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:07:46[0m 436ms/step - accuracy: 0.3950 - loss: 0.2599

In [2]:
# emnist_byclass_finetune_optimized_4050.py
import os
import glob
import json
import random
import zipfile
import tarfile
import shutil
from pathlib import Path
from typing import List, Tuple, Dict
from collections import Counter, defaultdict

import numpy as np
import tensorflow as tf
from tensorflow import keras

# -----------------------
# User config - OPTIMIZED for RTX 4050
# -----------------------
DATASET_DIR = "/home/rand-jadav/by_class/by_class"
DATASET_ARCHIVE = "/home/rand-jadav/Downloads/by.zip"
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 2
VAL_SPLIT = 0.15
RANDOM_SEED = 42
SHUFFLE_BUFFER = 10000
INVERT = True
PRETRAINED_MODEL = "best_model_byclass_finetuned.keras"
BEST_MODEL_PATH = "best_model_byclass__2.keras"
LABELMAP_JSON = "byclass_labelmap.json"

VALID_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".tiff", ".tif"}
SUPPORTED_ARCHIVES = {".zip", ".tar", ".gz", ".bz2", ".tar.gz", ".tar.bz2"}

# =========================
# Archive Extraction Functions
# =========================
def extract_archive(archive_path: str, extract_to: str) -> bool:
    """Extract supported archive formats automatically"""
    archive_path = Path(archive_path)
    extract_to = Path(extract_to)
    
    if not archive_path.exists():
        print(f"❌ Archive not found: {archive_path}")
        return False
    
    # Create extraction directory if it doesn't exist
    extract_to.mkdir(parents=True, exist_ok=True)
    
    try:
        if archive_path.suffix == ".zip":
            print(f"📦 Extracting ZIP archive: {archive_path}")
            with zipfile.ZipFile(archive_path, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
            print("✅ ZIP extraction completed")
            
        elif archive_path.suffix in [".tar", ".gz", ".bz2", ".tar.gz", ".tar.bz2"]:
            print(f"📦 Extracting TAR archive: {archive_path}")
            if archive_path.suffix in [".gz", ".tar.gz"]:
                mode = "r:gz"
            elif archive_path.suffix in [".bz2", ".tar.bz2"]:
                mode = "r:bz2"
            else:
                mode = "r"
            
            with tarfile.open(archive_path, mode) as tar_ref:
                tar_ref.extractall(extract_to)
            print("✅ TAR extraction completed")
            
        else:
            print(f"❌ Unsupported archive format: {archive_path.suffix}")
            return False
            
        return True
        
    except Exception as e:
        print(f"❌ Extraction failed: {e}")
        return False

def ensure_dataset_available():
    """Check if dataset exists, extract if needed"""
    dataset_path = Path(DATASET_DIR)
    archive_path = Path(DATASET_ARCHIVE)
    
    # Check if dataset already exists
    if dataset_path.exists() and any(dataset_path.iterdir()):
        print(f"✅ Dataset already exists at: {DATASET_DIR}")
        return True
    
    # Check if archive exists and extract
    if archive_path.exists():
        print(f"📦 Found archive: {archive_path}")
        print("🔄 Extracting dataset...")
        if extract_archive(archive_path, dataset_path):
            # Verify extraction
            if dataset_path.exists() and any(dataset_path.iterdir()):
                print("✅ Dataset extraction verified")
                return True
            else:
                print("❌ Extraction completed but no files found")
                return False
        else:
            return False
    else:
        print(f"❌ Neither dataset nor archive found:")
        print(f"   Dataset directory: {DATASET_DIR}")
        print(f"   Archive file: {DATASET_ARCHIVE}")
        return False

# =========================
# GPU Memory Optimization
# =========================
def setup_gpu_memory():
    try:
        gpus = tf.config.list_physical_devices('GPU')
        if not gpus:
            print("ℹ️ No GPU detected, using CPU")
            return
        
        # Set memory growth first
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        
        # Then set memory limit if needed
        try:
            for gpu in gpus:
                tf.config.set_logical_device_configuration(
                    gpu,
                    [tf.config.LogicalDeviceConfiguration(memory_limit=5120)]
                )
        except:
            print("⚠️  Could not set memory limit, using memory growth only")
        
        print(f"✅ GPU configured: {len(gpus)} Physical GPU, {len(tf.config.list_logical_devices('GPU'))} Logical GPU")
        
    except Exception as e:
        print(f"❌ GPU setup error: {e}")

# =========================
# Reproducibility
# =========================
def set_all_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

# =========================
# Dataset scanning with detailed class printing
# =========================
def scan_dataset_byclass(byclass_dir: str) -> Tuple[List[str], List[int], Dict[str, int], Dict[int, str]]:
    filepaths = []
    str_labels = []
    byclass_path = Path(byclass_dir).resolve()
    
    if not byclass_path.exists():
        raise FileNotFoundError(f"Dataset directory '{byclass_dir}' not found.")
    
    # Use glob for faster directory scanning
    image_patterns = [f"**/*{ext}" for ext in VALID_EXTENSIONS]
    
    for pattern in image_patterns:
        for filepath in byclass_path.glob(pattern):
            if filepath.is_file():
                # Get class name from parent directory
                class_name = filepath.parent.name
                filepaths.append(str(filepath))
                str_labels.append(class_name)
    
    if len(filepaths) == 0:
        return [], [], {}, {}
    
    label_names = sorted(set(str_labels))
    label2idx = {lab: i for i, lab in enumerate(label_names)}
    idx2label = {i: lab for lab, i in label2idx.items()}
    int_labels = [label2idx[s] for s in str_labels]
    
    print(f"Scanned {len(filepaths)} valid images across {len(label2idx)} classes")
    return filepaths, int_labels, label2idx, idx2label, str_labels  # Return str_labels as well

# =========================
# Print all classes with character mapping
# =========================
def print_all_classes_with_characters(label2idx, idx2label, str_labels):
    """Print all classes with their corresponding characters"""
    print("\n" + "="*80)
    print("DETAILED CLASS MAPPING WITH CHARACTERS")
    print("="*80)
    
    # Create a mapping from folder names to expected characters
    # This is based on EMNIST byclass structure
    folder_to_char = {
        'hsf_0': '0', 'hsf_1': '1', 'hsf_2': '2', 'hsf_3': '3', 'hsf_4': '4',
        'hsf_6': '5', 'hsf_7': '6', 'train_30': '7', 'train_32': '8', 'train_34': '9',
        'train_35': 'A', 'train_36': 'B', 'train_37': 'C', 'train_38': 'D', 'train_39': 'E',
        'train_41': 'F', 'train_42': 'G', 'train_43': 'H', 'train_44': 'I', 'train_45': 'J',
        'train_46': 'K', 'train_48': 'L', 'train_49': 'M', 'train_4a': 'N', 'train_4b': 'O',
        'train_4d': 'P', 'train_4e': 'Q', 'train_4f': 'R', 'train_50': 'S', 'train_51': 'T',
        'train_52': 'U', 'train_54': 'V', 'train_55': 'W', 'train_56': 'X', 'train_57': 'Y',
        'train_58': 'Z', 'train_59': 'a', 'train_5a': 'b', 'train_61': 'c', 'train_62': 'd',
        'train_63': 'e', 'train_64': 'f', 'train_65': 'g', 'train_66': 'h', 'train_67': 'i',
        'train_68': 'j', 'train_69': 'k', 'train_6a': 'l', 'train_6b': 'm', 'train_6c': 'n',
        'train_6d': 'o', 'train_6e': 'p', 'train_6f': 'q', 'train_70': 'r', 'train_71': 's',
        'train_72': 't', 'train_73': 'u', 'train_74': 'v', 'train_75': 'w', 'train_76': 'x',
        'train_77': 'y', 'train_78': 'z'
    }
    
    # Count samples per class
    class_counts = Counter(str_labels)
    
    print(f"{'Index':<6} {'Folder Name':<15} {'Character':<10} {'Sample Count':<12}")
    print("-" * 80)
    
    for folder_name, class_idx in sorted(label2idx.items(), key=lambda x: x[1]):
        character = folder_to_char.get(folder_name, 'Unknown')
        count = class_counts.get(folder_name, 0)
        print(f"{class_idx:<6} {folder_name:<15} {character:<10} {count:<12}")
    
    print("="*80)

# =========================
# Validation Accuracy Analysis Function
# =========================
def analyze_validation_accuracy(history, model, val_paths, y_val, idx2label):
    """Analyze and print validation accuracy details"""
    print("\n" + "="*60)
    print("VALIDATION ACCURACY ANALYSIS")
    print("="*60)
    
    if history and hasattr(history, 'history'):
        # Print final validation accuracy
        if 'val_accuracy' in history.history and len(history.history['val_accuracy']) > 0:
            final_val_acc = history.history['val_accuracy'][-1]
            print(f"Final Validation Accuracy: {final_val_acc:.4f} ({final_val_acc*100:.2f}%)")
        
        # Print training accuracy for comparison
        if 'accuracy' in history.history and len(history.history['accuracy']) > 0:
            final_train_acc = history.history['accuracy'][-1]
            print(f"Final Training Accuracy: {final_train_acc:.4f} ({final_train_acc*100:.2f}%)")
        
        # Print accuracy progression
        if 'val_accuracy' in history.history:
            print("\nValidation Accuracy Progression:")
            for epoch, acc in enumerate(history.history['val_accuracy'], 1):
                print(f"  Epoch {epoch}: {acc:.4f} ({acc*100:.2f}%)")
    
    # Calculate per-class accuracy if we have a model and validation data
    if model is not None and val_paths is not None and y_val is not None and len(val_paths) > 0:
        print("\nCalculating per-class validation accuracy...")
        
        try:
            # Create a small validation dataset for evaluation
            val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_paths), tf.constant(y_val, dtype=tf.int32)))
            val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
            val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
            
            # Get predictions
            predictions = model.predict(val_ds, verbose=1)
            predicted_classes = np.argmax(predictions, axis=1)
            
            # Calculate per-class accuracy
            class_correct = defaultdict(int)
            class_total = defaultdict(int)
            
            for true_label, pred_label in zip(y_val, predicted_classes):
                class_total[true_label] += 1
                if true_label == pred_label:
                    class_correct[true_label] += 1
            
            print(f"\nPer-class Validation Accuracy:")
            print(f"{'Class':<6} {'Character':<10} {'Accuracy':<10} {'Correct/Total':<15}")
            print("-" * 50)
            
            char_mapping = {
                'hsf_0': '0', 'hsf_1': '1', 'hsf_2': '2', 'hsf_3': '3', 'hsf_4': '4',
                'hsf_6': '5', 'hsf_7': '6', 'train_30': '7', 'train_32': '8', 'train_34': '9',
                'train_35': 'A', 'train_36': 'B', 'train_37': 'C', 'train_38': 'D', 'train_39': 'E',
                'train_41': 'F', 'train_42': 'G', 'train_43': 'H', 'train_44': 'I', 'train_45': 'J',
                'train_46': 'K', 'train_48': 'L', 'train_49': 'M', 'train_4a': 'N', 'train_4b': 'O',
                'train_4d': 'P', 'train_4e': 'Q', 'train_4f': 'R', 'train_50': 'S', 'train_51': 'T',
                'train_52': 'U', 'train_54': 'V', 'train_55': 'W', 'train_56': 'X', 'train_57': 'Y',
                'train_58': 'Z', 'train_59': 'a', 'train_5a': 'b', 'train_61': 'c', 'train_62': 'd',
                'train_63': 'e', 'train_64': 'f', 'train_65': 'g', 'train_66': 'h', 'train_67': 'i',
                'train_68': 'j', 'train_69': 'k', 'train_6a': 'l', 'train_6b': 'm', 'train_6c': 'n',
                'train_6d': 'o', 'train_6e': 'p', 'train_6f': 'q', 'train_70': 'r', 'train_71': 's',
                'train_72': 't', 'train_73': 'u', 'train_74': 'v', 'train_75': 'w', 'train_76': 'x',
                'train_77': 'y', 'train_78': 'z'
            }
            
            for class_idx in sorted(class_total.keys()):
                accuracy = class_correct.get(class_idx, 0) / class_total[class_idx] if class_total[class_idx] > 0 else 0
                folder_name = idx2label.get(class_idx, "Unknown")
                character = char_mapping.get(folder_name, '?')
                print(f"{class_idx:<6} {character:<10} {accuracy:.3f}       {class_correct.get(class_idx, 0):<3}/{class_total[class_idx]:<3}")
        
        except Exception as e:
            print(f"❌ Error during per-class accuracy calculation: {e}")
    else:
        print("⚠️  Skipping per-class accuracy analysis - missing data or model")

# =========================
# Preprocessing & Augmentation
# =========================
def load_and_preprocess_image(path, label):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=1, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    if INVERT:
        img = 1.0 - img
    return img, label

def get_simple_augmentation() -> keras.Sequential:
    return keras.Sequential([
        keras.layers.RandomRotation(0.05, fill_mode="constant", seed=RANDOM_SEED),
        keras.layers.RandomZoom(0.05, fill_mode="constant", seed=RANDOM_SEED+1),
        keras.layers.RandomContrast(0.1, seed=RANDOM_SEED+2),
    ], name="simple_data_augmentation")

AUGMENTOR = get_simple_augmentation()

def build_optimized_dataset(paths: List[str], labels: np.ndarray, training: bool, batch_size: int) -> tf.data.Dataset:
    """Optimized dataset building without caching"""
    if len(paths) == 0:
        raise ValueError("No paths provided for dataset building")
    
    ds = tf.data.Dataset.from_tensor_slices((tf.constant(paths), tf.constant(labels, dtype=tf.int32)))
    
    if training:
        ds = ds.shuffle(min(SHUFFLE_BUFFER, len(paths)), seed=RANDOM_SEED, reshuffle_each_iteration=True)
    
    ds = ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda img, label: (tf.ensure_shape(img, IMG_SIZE + (1,)), label),
                num_parallel_calls=tf.data.AUTOTUNE)
    
    if training:
        ds = ds.repeat()
        
        def _apply_aug(img, label):
            img_exp = tf.expand_dims(img, 0)
            img_aug = AUGMENTOR(img_exp, training=True)
            img_aug = tf.squeeze(img_aug, 0)
            return img_aug, label
        
        ds = ds.map(_apply_aug, num_parallel_calls=tf.data.AUTOTUNE)
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    
    return ds

# =========================
# Model Loading
# =========================
def load_pretrained_model(model_path, num_classes):
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Pretrained model not found: {model_path}")
    
    print(f"📥 Loading pretrained model from: {model_path}")
    model = keras.models.load_model(model_path)
    
    # Check if the model output matches our number of classes
    output_shape = model.output_shape
    if output_shape and len(output_shape) > 1 and output_shape[-1] != num_classes:
        print(f"⚠️  Model output shape {output_shape[-1]} doesn't match number of classes {num_classes}")
        print("🔄 Replacing the final classification layer...")
        
        # Remove the last layer
        model = keras.Model(inputs=model.input, outputs=model.layers[-2].output)
        
        # Add new classification layer
        x = keras.layers.Dense(num_classes, activation="softmax", name="new_classification")(model.output)
        model = keras.Model(inputs=model.input, outputs=x)
    
    return model

# =========================
# Training Utils
# =========================
def get_optimizer():
    # Increased learning rate for faster convergence
    return keras.optimizers.Adam(learning_rate=0.001)

def get_advanced_callbacks():
    return [
        keras.callbacks.ModelCheckpoint(
            BEST_MODEL_PATH, 
            monitor="val_accuracy", 
            save_best_only=True, 
            mode="max",
            verbose=1
        ),
        keras.callbacks.EarlyStopping(
            monitor="val_accuracy", 
            patience=3,
            restore_best_weights=True,
            min_delta=0.002,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", 
            factor=0.5, 
            patience=2,
            min_lr=1e-6,
            verbose=1
        )
    ]

def compute_class_weights(y_train):
    if len(y_train) == 0:
        return {}
    
    class_counts = np.bincount(y_train)
    total_samples = len(y_train)
    num_classes = len(class_counts)
    
    # Handle zero counts
    class_counts = np.where(class_counts == 0, 1, class_counts)
    
    class_weights = total_samples / (num_classes * class_counts.astype(float))
    class_weights = class_weights / np.mean(class_weights)
    
    return {i: float(weight) for i, weight in enumerate(class_weights)}

# =========================
# Stratified Split
# =========================
def stratified_split(filepaths, labels, test_size=0.15, random_state=42):
    """Simple stratified split implementation without scikit-learn"""
    if len(filepaths) == 0:
        return [], [], np.array([]), np.array([])
    
    # Group by class
    class_groups = defaultdict(list)
    for path, label in zip(filepaths, labels):
        class_groups[label].append(path)
    
    train_paths, val_paths = [], []
    train_labels, val_labels = [], []
    
    for label, paths in class_groups.items():
        n_val = max(1, int(len(paths) * test_size))
        random.shuffle(paths)
        
        val_paths.extend(paths[:n_val])
        train_paths.extend(paths[n_val:])
        
        val_labels.extend([label] * n_val)
        train_labels.extend([label] * (len(paths) - n_val))
    
    # Shuffle the datasets
    train_data = list(zip(train_paths, train_labels))
    val_data = list(zip(val_paths, val_labels))
    
    random.shuffle(train_data)
    random.shuffle(val_data)
    
    train_paths, train_labels = zip(*train_data) if train_data else ([], [])
    val_paths, val_labels = zip(*val_data) if val_data else ([], [])
    
    return list(train_paths), list(val_paths), np.array(train_labels), np.array(val_labels)

# =========================
# Main
# =========================
def main():
    set_all_seeds(RANDOM_SEED)
    setup_gpu_memory()
    
    # Disable mixed precision for now to avoid issues
    try:
        from tensorflow.keras import mixed_precision
        mixed_precision.set_global_policy('float32')
        print("✅ Using float32 precision for stability")
    except:
        pass

    print("🔍 Checking dataset availability...")
    if not ensure_dataset_available():
        print("❌ Dataset preparation failed. Exiting.")
        return

    print("🔍 Scanning byclass dataset...")
    filepaths, labels, label2idx, idx2label, str_labels = scan_dataset_byclass(DATASET_DIR)
    
    if len(filepaths) == 0:
        print("❌ No images found after extraction. Please check your archive file.")
        return
    
    print(f"Found {len(filepaths)} images across {len(label2idx)} classes")

    # Print all classes with characters
    print_all_classes_with_characters(label2idx, idx2label, str_labels)

    # Use our custom stratified split
    train_paths, val_paths, y_train, y_val = stratified_split(filepaths, labels, test_size=VAL_SPLIT, random_state=RANDOM_SEED)

    if len(train_paths) == 0:
        print("❌ No training data available after split")
        return

    steps_per_epoch = max(1, len(train_paths) // BATCH_SIZE)
    validation_steps = max(1, len(val_paths) // BATCH_SIZE)

    class_weights = compute_class_weights(y_train)

    print("📥 Loading or creating model...")
    try:
        model = load_pretrained_model(PRETRAINED_MODEL, len(label2idx))
    except Exception as e:
        print(f"❌ Failed to load pretrained model: {e}")
        print("🔄 Creating a new model...")
        # Create a simpler model if loading fails
        inputs = keras.layers.Input(shape=IMG_SIZE + (1,))
        x = keras.layers.Conv2D(32, 3, padding="same", activation="relu")(inputs)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(64, 3, padding="same", activation="relu")(x)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(128, 3, padding="same", activation="relu")(x)
        x = keras.layers.GlobalAveragePooling2D()(x)
        x = keras.layers.Dense(256, activation="relu")(x)
        x = keras.layers.Dropout(0.5)(x)
        outputs = keras.layers.Dense(len(label2idx), activation="softmax")(x)
        model = keras.Model(inputs, outputs)
    
    model.compile(optimizer=get_optimizer(), 
                 loss="sparse_categorical_crossentropy", 
                 metrics=["accuracy"])
    model.summary()

    print(f"\n📊 Training samples: {len(train_paths)}")
    print(f"📊 Validation samples: {len(val_paths)}")
    print(f"📊 Steps per epoch: {steps_per_epoch}")
    print(f"📊 Batch size: {BATCH_SIZE}")
    print(f"📊 Learning rate: 0.001")

    print("\n🚀 Starting training...")
    
    # Build datasets
    try:
        ds_train = build_optimized_dataset(train_paths, y_train, training=True, batch_size=BATCH_SIZE)
        ds_val = build_optimized_dataset(val_paths, y_val, training=False, batch_size=BATCH_SIZE)

        # Test one batch to ensure it works
        print("Testing data pipeline...")
        for batch in ds_train.take(1):
            print(f"Batch shape: {batch[0].shape}, Labels shape: {batch[1].shape}")
            break

        history = model.fit(
            ds_train,
            epochs=EPOCHS,
            validation_data=ds_val,
            steps_per_epoch=steps_per_epoch,
            validation_steps=validation_steps,
            class_weight=class_weights,
            callbacks=get_advanced_callbacks(),
            verbose=1
        )

        # Analyze validation accuracy
        analyze_validation_accuracy(history, model, val_paths, y_val, idx2label)

        print(f"\n✅ Training completed! Best model saved to: {BEST_MODEL_PATH}")
    
    except Exception as e:
        print(f"❌ Training failed with error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

❌ GPU setup error: Physical devices cannot be modified after being initialized
✅ Using float32 precision for stability
🔍 Checking dataset availability...
✅ Dataset already exists at: /home/rand-jadav/by_class/by_class
🔍 Scanning byclass dataset...
Scanned 1342017 valid images across 64 classes
Found 1342017 images across 64 classes

DETAILED CLASS MAPPING WITH CHARACTERS
Index  Folder Name     Character  Sample Count
--------------------------------------------------------------------------------
0      hsf_0           0          157747      
1      hsf_1           1          141750      
2      hsf_2           2          122646      
3      hsf_3           3          74195       
4      hsf_4           4          68783       
5      hsf_6           5          77614       
6      hsf_7           6          72748       
7      train_30        7          34803       
8      train_32        8          34184       
9      train_34        9          33432       
10     train_35        A    


📊 Training samples: 1140745
📊 Validation samples: 201272
📊 Steps per epoch: 35648
📊 Batch size: 32
📊 Learning rate: 0.001

🚀 Starting training...
Testing data pipeline...
Batch shape: (32, 128, 128, 1), Labels shape: (32,)
Epoch 1/2


2025-09-18 12:11:58.299757: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_9}}
E0000 00:00:1758177721.509386  114479 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_13_1/dropout_7_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m  121/35648[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:34:07[0m 260ms/step - accuracy: 0.4051 - loss: 0.2613

KeyboardInterrupt: 

In [5]:
# emnist_byclass_finetune_optimized_4050.py
import os
import glob
import json
import random
import zipfile
import tarfile
import shutil
from pathlib import Path
from typing import List, Tuple, Dict
from collections import Counter, defaultdict

import numpy as np
import tensorflow as tf
from tensorflow import keras

# -----------------------
# User config - OPTIMIZED for RTX 4050
# -----------------------
DATASET_DIR = "/home/rand-jadav/by_class/by_class"
DATASET_ARCHIVE = "/home/rand-jadav/Downloads/by.zip"
IMG_SIZE = (128, 128)
BATCH_SIZE = 16
EPOCHS = 2
VAL_SPLIT = 0.15
RANDOM_SEED = 42
SHUFFLE_BUFFER = 10000
INVERT = True
PRETRAINED_MODEL = "best_model_byclass_finetuned.keras"
BEST_MODEL_PATH = "best_model_byclass__2.keras"
LABELMAP_JSON = "byclass_labelmap.json"

VALID_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".tiff", ".tif"}
SUPPORTED_ARCHIVES = {".zip", ".tar", ".gz", ".bz2", ".tar.gz", ".tar.bz2"}

# =========================
# Archive Extraction Functions
# =========================
def extract_archive(archive_path: str, extract_to: str) -> bool:
    """Extract supported archive formats automatically"""
    archive_path = Path(archive_path)
    extract_to = Path(extract_to)
    
    if not archive_path.exists():
        print(f"❌ Archive not found: {archive_path}")
        return False
    
    # Create extraction directory if it doesn't exist
    extract_to.mkdir(parents=True, exist_ok=True)
    
    try:
        if archive_path.suffix == ".zip":
            print(f"📦 Extracting ZIP archive: {archive_path}")
            with zipfile.ZipFile(archive_path, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
            print("✅ ZIP extraction completed")
            
        elif archive_path.suffix in [".tar", ".gz", ".bz2", ".tar.gz", ".tar.bz2"]:
            print(f"📦 Extracting TAR archive: {archive_path}")
            if archive_path.suffix in [".gz", ".tar.gz"]:
                mode = "r:gz"
            elif archive_path.suffix in [".bz2", ".tar.bz2"]:
                mode = "r:bz2"
            else:
                mode = "r"
            
            with tarfile.open(archive_path, mode) as tar_ref:
                tar_ref.extractall(extract_to)
            print("✅ TAR extraction completed")
            
        else:
            print(f"❌ Unsupported archive format: {archive_path.suffix}")
            return False
            
        return True
        
    except Exception as e:
        print(f"❌ Extraction failed: {e}")
        return False

def ensure_dataset_available():
    """Check if dataset exists, extract if needed"""
    dataset_path = Path(DATASET_DIR)
    archive_path = Path(DATASET_ARCHIVE)
    
    # Check if dataset already exists
    if dataset_path.exists() and any(dataset_path.iterdir()):
        print(f"✅ Dataset already exists at: {DATASET_DIR}")
        return True
    
    # Check if archive exists and extract
    if archive_path.exists():
        print(f"📦 Found archive: {archive_path}")
        print("🔄 Extracting dataset...")
        if extract_archive(archive_path, dataset_path):
            # Verify extraction
            if dataset_path.exists() and any(dataset_path.iterdir()):
                print("✅ Dataset extraction verified")
                return True
            else:
                print("❌ Extraction completed but no files found")
                return False
        else:
            return False
    else:
        print(f"❌ Neither dataset nor archive found:")
        print(f"   Dataset directory: {DATASET_DIR}")
        print(f"   Archive file: {DATASET_ARCHIVE}")
        return False

# =========================
# GPU Memory Optimization
# =========================
def setup_gpu_memory():
    try:
        gpus = tf.config.list_physical_devices('GPU')
        if not gpus:
            print("ℹ️ No GPU detected, using CPU")
            return
        
        # Set memory growth first
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        
        # Then set memory limit if needed
        try:
            for gpu in gpus:
                tf.config.set_logical_device_configuration(
                    gpu,
                    [tf.config.LogicalDeviceConfiguration(memory_limit=5120)]
                )
        except:
            print("⚠️  Could not set memory limit, using memory growth only")
        
        print(f"✅ GPU configured: {len(gpus)} Physical GPU, {len(tf.config.list_logical_devices('GPU'))} Logical GPU")
        
    except Exception as e:
        print(f"❌ GPU setup error: {e}")

# =========================
# Reproducibility
# =========================
def set_all_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

# =========================
# Dataset scanning with detailed class printing
# =========================
def scan_dataset_byclass(byclass_dir: str) -> Tuple[List[str], List[int], Dict[str, int], Dict[int, str]]:
    filepaths = []
    str_labels = []
    byclass_path = Path(byclass_dir).resolve()
    
    if not byclass_path.exists():
        raise FileNotFoundError(f"Dataset directory '{byclass_dir}' not found.")
    
    # Use glob for faster directory scanning
    image_patterns = [f"**/*{ext}" for ext in VALID_EXTENSIONS]
    
    for pattern in image_patterns:
        for filepath in byclass_path.glob(pattern):
            if filepath.is_file():
                # Get class name from parent directory
                class_name = filepath.parent.name
                filepaths.append(str(filepath))
                str_labels.append(class_name)
    
    if len(filepaths) == 0:
        return [], [], {}, {}
    
    label_names = sorted(set(str_labels))
    label2idx = {lab: i for i, lab in enumerate(label_names)}
    idx2label = {i: lab for lab, i in label2idx.items()}
    int_labels = [label2idx[s] for s in str_labels]
    
    print(f"Scanned {len(filepaths)} valid images across {len(label2idx)} classes")
    return filepaths, int_labels, label2idx, idx2label, str_labels  # Return str_labels as well

# =========================
# Character detection from folder names
# =========================
def detect_character_from_folder(folder_name: str) -> str:
    """
    Try to detect the character from folder name using various strategies
    """
    # Common patterns in EMNIST byclass dataset
    folder_lower = folder_name.lower()
    
    # Check if folder name contains hex codes (common in EMNIST)
    if folder_name.startswith(('hsf_', 'train_', 'test_')):
        # Try to extract hex part and convert to character
        parts = folder_name.split('_')
        if len(parts) > 1:
            hex_part = parts[-1]
            try:
                # Try to interpret as hex code
                char_code = int(hex_part, 16)
                if 32 <= char_code <= 126:  # Printable ASCII range
                    return chr(char_code)
            except ValueError:
                pass
    
    # Check if folder name is already a single character
    if len(folder_name) == 1 and folder_name.isprintable():
        return folder_name
    
    # Check for common patterns
    if 'digit' in folder_lower or 'number' in folder_lower:
        # Look for digits in the name
        for char in folder_name:
            if char.isdigit():
                return char
    
    if 'letter' in folder_lower or 'char' in folder_lower:
        # Look for letters in the name
        for char in folder_name:
            if char.isalpha():
                return char
    
    # Try to find any printable character in the name
    for char in folder_name:
        if char.isprintable() and not char.isspace() and char not in ['_', '-']:
            return char
    
    # If all else fails, return the first character or question mark
    return folder_name[0] if folder_name else '?'

# =========================
# Print all classes with automatically detected characters
# =========================
def print_all_classes_with_characters(label2idx, idx2label, str_labels):
    """Print all classes with their automatically detected characters"""
    print("\n" + "="*80)
    print("DETAILED CLASS MAPPING WITH AUTOMATIC CHARACTER DETECTION")
    print("="*80)
    
    # Count samples per class
    class_counts = Counter(str_labels)
    
    # Detect characters for each folder
    folder_to_char = {}
    for folder_name in label2idx.keys():
        folder_to_char[folder_name] = detect_character_from_folder(folder_name)
    
    print(f"{'Index':<6} {'Folder Name':<20} {'Character':<10} {'Sample Count':<12}")
    print("-" * 80)
    
    for folder_name, class_idx in sorted(label2idx.items(), key=lambda x: x[1]):
        character = folder_to_char.get(folder_name, '?')
        count = class_counts.get(folder_name, 0)
        print(f"{class_idx:<6} {folder_name:<20} {character:<10} {count:<12}")
    
    print("="*80)
    
    # Return the character mapping for later use
    return folder_to_char

# =========================
# Validation Accuracy Analysis Function
# =========================
def analyze_validation_accuracy(history, model, val_paths, y_val, idx2label, folder_to_char):
    """Analyze and print validation accuracy details"""
    print("\n" + "="*60)
    print("VALIDATION ACCURACY ANALYSIS")
    print("="*60)
    
    if history and hasattr(history, 'history'):
        # Print final validation accuracy
        if 'val_accuracy' in history.history and len(history.history['val_accuracy']) > 0:
            final_val_acc = history.history['val_accuracy'][-1]
            print(f"Final Validation Accuracy: {final_val_acc:.4f} ({final_val_acc*100:.2f}%)")
        
        # Print training accuracy for comparison
        if 'accuracy' in history.history and len(history.history['accuracy']) > 0:
            final_train_acc = history.history['accuracy'][-1]
            print(f"Final Training Accuracy: {final_train_acc:.4f} ({final_train_acc*100:.2f}%)")
        
        # Print accuracy progression
        if 'val_accuracy' in history.history:
            print("\nValidation Accuracy Progression:")
            for epoch, acc in enumerate(history.history['val_accuracy'], 1):
                print(f"  Epoch {epoch}: {acc:.4f} ({acc*100:.2f}%)")
    
    # Calculate per-class accuracy if we have a model and validation data
    if model is not None and val_paths is not None and y_val is not None and len(val_paths) > 0:
        print("\nCalculating per-class validation accuracy...")
        
        try:
            # Create a small validation dataset for evaluation
            val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_paths), tf.constant(y_val, dtype=tf.int32)))
            val_ds = val_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
            val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
            
            # Get predictions
            predictions = model.predict(val_ds, verbose=1)
            predicted_classes = np.argmax(predictions, axis=1)
            
            # Calculate per-class accuracy
            class_correct = defaultdict(int)
            class_total = defaultdict(int)
            
            for true_label, pred_label in zip(y_val, predicted_classes):
                class_total[true_label] += 1
                if true_label == pred_label:
                    class_correct[true_label] += 1
            
            print(f"\nPer-class Validation Accuracy:")
            print(f"{'Class':<6} {'Character':<10} {'Accuracy':<10} {'Correct/Total':<15}")
            print("-" * 50)
            
            for class_idx in sorted(class_total.keys()):
                accuracy = class_correct.get(class_idx, 0) / class_total[class_idx] if class_total[class_idx] > 0 else 0
                folder_name = idx2label.get(class_idx, "Unknown")
                character = folder_to_char.get(folder_name, '?')
                print(f"{class_idx:<6} {character:<10} {accuracy:.3f}       {class_correct.get(class_idx, 0):<3}/{class_total[class_idx]:<3}")
        
        except Exception as e:
            print(f"❌ Error during per-class accuracy calculation: {e}")
    else:
        print("⚠️  Skipping per-class accuracy analysis - missing data or model")

# =========================
# Preprocessing & Augmentation
# =========================
def load_and_preprocess_image(path, label):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=1, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    if INVERT:
        img = 1.0 - img
    return img, label

def get_simple_augmentation() -> keras.Sequential:
    return keras.Sequential([
        keras.layers.RandomRotation(0.05, fill_mode="constant", seed=RANDOM_SEED),
        keras.layers.RandomZoom(0.05, fill_mode="constant", seed=RANDOM_SEED+1),
        keras.layers.RandomContrast(0.1, seed=RANDOM_SEED+2),
    ], name="simple_data_augmentation")

AUGMENTOR = get_simple_augmentation()

def build_optimized_dataset(paths: List[str], labels: np.ndarray, training: bool, batch_size: int) -> tf.data.Dataset:
    """Optimized dataset building without caching"""
    if len(paths) == 0:
        raise ValueError("No paths provided for dataset building")
    
    ds = tf.data.Dataset.from_tensor_slices((tf.constant(paths), tf.constant(labels, dtype=tf.int32)))
    
    if training:
        ds = ds.shuffle(min(SHUFFLE_BUFFER, len(paths)), seed=RANDOM_SEED, reshuffle_each_iteration=True)
    
    ds = ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda img, label: (tf.ensure_shape(img, IMG_SIZE + (1,)), label),
                num_parallel_calls=tf.data.AUTOTUNE)
    
    if training:
        ds = ds.repeat()
        
        def _apply_aug(img, label):
            img_exp = tf.expand_dims(img, 0)
            img_aug = AUGMENTOR(img_exp, training=True)
            img_aug = tf.squeeze(img_aug, 0)
            return img_aug, label
        
        ds = ds.map(_apply_aug, num_parallel_calls=tf.data.AUTOTUNE)
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    
    return ds

# =========================
# Model Loading
# =========================
def load_pretrained_model(model_path, num_classes):
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Pretrained model not found: {model_path}")
    
    print(f"📥 Loading pretrained model from: {model_path}")
    model = keras.models.load_model(model_path)
    
    # Check if the model output matches our number of classes
    output_shape = model.output_shape
    if output_shape and len(output_shape) > 1 and output_shape[-1] != num_classes:
        print(f"⚠️  Model output shape {output_shape[-1]} doesn't match number of classes {num_classes}")
        print("🔄 Replacing the final classification layer...")
        
        # Remove the last layer
        model = keras.Model(inputs=model.input, outputs=model.layers[-2].output)
        
        # Add new classification layer
        x = keras.layers.Dense(num_classes, activation="softmax", name="new_classification")(model.output)
        model = keras.Model(inputs=model.input, outputs=x)
    
    return model

# =========================
# Training Utils
# =========================
def get_optimizer():
    # Increased learning rate for faster convergence
    return keras.optimizers.Adam(learning_rate=0.001)

def get_advanced_callbacks():
    return [
        keras.callbacks.ModelCheckpoint(
            BEST_MODEL_PATH, 
            monitor="val_accuracy", 
            save_best_only=True, 
            mode="max",
            verbose=1
        ),
        keras.callbacks.EarlyStopping(
            monitor="val_accuracy", 
            patience=3,
            restore_best_weights=True,
            min_delta=0.002,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", 
            factor=0.5, 
            patience=2,
            min_lr=1e-6,
            verbose=1
        )
    ]

def compute_class_weights(y_train):
    if len(y_train) == 0:
        return {}
    
    class_counts = np.bincount(y_train)
    total_samples = len(y_train)
    num_classes = len(class_counts)
    
    # Handle zero counts
    class_counts = np.where(class_counts == 0, 1, class_counts)
    
    class_weights = total_samples / (num_classes * class_counts.astype(float))
    class_weights = class_weights / np.mean(class_weights)
    
    return {i: float(weight) for i, weight in enumerate(class_weights)}

# =========================
# Stratified Split
# =========================
def stratified_split(filepaths, labels, test_size=0.15, random_state=42):
    """Simple stratified split implementation without scikit-learn"""
    if len(filepaths) == 0:
        return [], [], np.array([]), np.array([])
    
    # Group by class
    class_groups = defaultdict(list)
    for path, label in zip(filepaths, labels):
        class_groups[label].append(path)
    
    train_paths, val_paths = [], []
    train_labels, val_labels = [], []
    
    for label, paths in class_groups.items():
        n_val = max(1, int(len(paths) * test_size))
        random.shuffle(paths)
        
        val_paths.extend(paths[:n_val])
        train_paths.extend(paths[n_val:])
        
        val_labels.extend([label] * n_val)
        train_labels.extend([label] * (len(paths) - n_val))
    
    # Shuffle the datasets
    train_data = list(zip(train_paths, train_labels))
    val_data = list(zip(val_paths, val_labels))
    
    random.shuffle(train_data)
    random.shuffle(val_data)
    
    train_paths, train_labels = zip(*train_data) if train_data else ([], [])
    val_paths, val_labels = zip(*val_data) if val_data else ([], [])
    
    return list(train_paths), list(val_paths), np.array(train_labels), np.array(val_labels)

# =========================
# Main
# =========================
def main():
    set_all_seeds(RANDOM_SEED)
    setup_gpu_memory()
    
    # Disable mixed precision for now to avoid issues
    try:
        from tensorflow.keras import mixed_precision
        mixed_precision.set_global_policy('float32')
        print("✅ Using float32 precision for stability")
    except:
        pass

    print("🔍 Checking dataset availability...")
    if not ensure_dataset_available():
        print("❌ Dataset preparation failed. Exiting.")
        return

    print("🔍 Scanning byclass dataset...")
    filepaths, labels, label2idx, idx2label, str_labels = scan_dataset_byclass(DATASET_DIR)
    
    if len(filepaths) == 0:
        print("❌ No images found after extraction. Please check your archive file.")
        return
    
    print(f"Found {len(filepaths)} images across {len(label2idx)} classes")

    # Print all classes with automatically detected characters
    folder_to_char = print_all_classes_with_characters(label2idx, idx2label, str_labels)

    # Use our custom stratified split
    train_paths, val_paths, y_train, y_val = stratified_split(filepaths, labels, test_size=VAL_SPLIT, random_state=RANDOM_SEED)

    if len(train_paths) == 0:
        print("❌ No training data available after split")
        return

    steps_per_epoch = max(1, len(train_paths) // BATCH_SIZE)
    validation_steps = max(1, len(val_paths) // BATCH_SIZE)

    class_weights = compute_class_weights(y_train)

    print("📥 Loading or creating model...")
    try:
        model = load_pretrained_model(PRETRAINED_MODEL, len(label2idx))
    except Exception as e:
        print(f"❌ Failed to load pretrained model: {e}")
        print("🔄 Creating a new model...")
        # Create a simpler model if loading fails
        inputs = keras.layers.Input(shape=IMG_SIZE + (1,))
        x = keras.layers.Conv2D(32, 3, padding="same", activation="relu")(inputs)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(64, 3, padding="same", activation="relu")(x)
        x = keras.layers.MaxPooling2D(2)(x)
        x = keras.layers.Conv2D(128, 3, padding="same", activation="relu")(x)
        x = keras.layers.GlobalAveragePooling2D()(x)
        x = keras.layers.Dense(256, activation="relu")(x)
        x = keras.layers.Dropout(0.5)(x)
        outputs = keras.layers.Dense(len(label2idx), activation="softmax")(x)
        model = keras.Model(inputs, outputs)
    
    model.compile(optimizer=get_optimizer(), 
                 loss="sparse_categorical_crossentropy", 
                 metrics=["accuracy"])
    model.summary()

    print(f"\n📊 Training samples: {len(train_paths)}")
    print(f"📊 Validation samples: {len(val_paths)}")
    print(f"📊 Steps per epoch: {steps_per_epoch}")
    print(f"📊 Batch size: {BATCH_SIZE}")
    print(f"📊 Learning rate: 0.001")

    print("\n🚀 Starting training...")
    
    # Build datasets
    try:
        ds_train = build_optimized_dataset(train_paths, y_train, training=True, batch_size=BATCH_SIZE)
        ds_val = build_optimized_dataset(val_paths, y_val, training=False, batch_size=BATCH_SIZE)

        # Test one batch to ensure it works
        print("Testing data pipeline...")
        for batch in ds_train.take(1):
            print(f"Batch shape: {batch[0].shape}, Labels shape: {batch[1].shape}")
            break

        history = model.fit(
            ds_train,
            epochs=EPOCHS,
            validation_data=ds_val,
            steps_per_epoch=steps_per_epoch,
            validation_steps=validation_steps,
            class_weight=class_weights,
            callbacks=get_advanced_callbacks(),
            verbose=1
        )

        # Analyze validation accuracy with the detected character mapping
        analyze_validation_accuracy(history, model, val_paths, y_val, idx2label, folder_to_char)

        print(f"\n✅ Training completed! Best model saved to: {BEST_MODEL_PATH}")
    
    except Exception as e:
        print(f"❌ Training failed with error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

❌ GPU setup error: Physical devices cannot be modified after being initialized
✅ Using float32 precision for stability
🔍 Checking dataset availability...
✅ Dataset already exists at: /home/rand-jadav/by_class/by_class
🔍 Scanning byclass dataset...
Scanned 1342017 valid images across 64 classes
Found 1342017 images across 64 classes

DETAILED CLASS MAPPING WITH AUTOMATIC CHARACTER DETECTION
Index  Folder Name          Character  Sample Count
--------------------------------------------------------------------------------
0      hsf_0                h          157747      
1      hsf_1                h          141750      
2      hsf_2                h          122646      
3      hsf_3                h          74195       
4      hsf_4                h          68783       
5      hsf_6                h          77614       
6      hsf_7                h          72748       
7      train_30             0          34803       
8      train_32             2          34184       
9     


📊 Training samples: 1140745
📊 Validation samples: 201272
📊 Steps per epoch: 71296
📊 Batch size: 16
📊 Learning rate: 0.001

🚀 Starting training...
Testing data pipeline...
Batch shape: (16, 128, 128, 1), Labels shape: (16,)
Epoch 1/2


2025-09-18 12:26:09.782531: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_9}}


[1m   30/71296[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:25:26[0m 122ms/step - accuracy: 0.3956 - loss: 0.3001

KeyboardInterrupt: 