# The Eye - SOM Quality Prediction CNN

This notebook allows you to test "The Eye" CNN model for predicting Self-Organizing Map (SOM) quality.

**The Eye** is a CNN trained to predict SOM quality by analyzing RGB composite visualizations:
- **Red Channel**: U-Matrix (cluster boundaries)
- **Green Channel**: Distance Map (quantization error)
- **Blue Channel**: Dead Neurons Map

## Quick Start
1. Upload your trained model (.keras file)
2. Upload RGB SOM maps or test set CSV
3. Run evaluation cells
4. Download predictions

## 1. Setup Environment

In [3]:
import os
import sys
import platform
from pathlib import Path

import numpy as np
import tensorflow as tf
import keras

# 1. Identifikace Rootu a Cest
project_root = Path(os.getcwd()).resolve()
model_path = project_root / "som_quality.keras"

print(f"{' PROJECT STATUS ':=^80}")
print(f"Root:      {project_root}")
print(f"OS:        {platform.system()} {platform.release()} ({platform.machine()})")
print(f"Python:    {sys.version.split()[0]} ({sys.executable})")

# 2. Verze klíčových knihoven
print(f"\n{' LIBRARIES ':-^80}")
print(f"TensorFlow: {tf.__version__}")
print(f"Keras:      {keras.__version__}")
print(f"NumPy:      {np.__version__}")

# 3. Hardware / Accel
gpus = tf.config.list_physical_devices('GPU')
gpu_status = f"ACTIVE ({gpus[0].name})" if gpus else "INACTIVE (CPU only)"
print(f"Metal GPU:  {gpu_status}")

# 4. Diagnostika problému s načítáním
print(f"\n{' MODEL DIAGNOSTICS ':-^80}")
print(f"Soubor existuje: {model_path.exists()}")
if model_path.exists():
    print(f"Velikost:        {model_path.stat().st_size / (1024*1024):.2f} MB")

# Detekce potenciálního konfliktu Keras 2 vs 3
is_keras_3 = hasattr(keras, "ops")
print(f"Keras engine:    {'Keras 3 (Modern)' if is_keras_3 else 'Keras 2 (Legacy)'}")
print(f"{'':=^80}")

Root:      /Users/tomas/OSU/Python/NexusSom/tests/Jupyter
OS:        Darwin 25.0.0 (arm64)
Python:    3.11.14 (/Users/tomas/OSU/Python/NexusSom/.venv/bin/python)

---------------------------------- LIBRARIES -----------------------------------
TensorFlow: 2.18.0
Keras:      3.13.0
NumPy:      1.26.4
Metal GPU:  ACTIVE (/physical_device:GPU:0)

------------------------------ MODEL DIAGNOSTICS -------------------------------
Soubor existuje: True
Velikost:        14.75 MB
Keras engine:    Keras 3 (Modern)


## 2. Upload Model

Upload your trained `.keras` model file.

In [None]:
from pathlib import Path
import keras

# Použití Pathlib pro robustnost
model_path = Path("som_quality.keras").resolve()

if not model_path.exists():
    raise FileNotFoundError(f"Model nebyl nalezen na adrese: {model_path}")

try:
    # V Kerasu 3 je .keras nativní a doporučený formát
    model = keras.models.load_model(model_path)
    print(f"✓ Model '{model_path.name}' úspěšně načten.")
except ValueError as e:
    # Specifická chyba pro Keras 3 (např. custom layers)
    print(f"⚠ Chyba při načítání (pravděpodobně custom objects): {e}")
    # Zde bys definoval custom_objects={'TvojeVrstva': TvojeVrstva}
except Exception as e:
    print(f"✗ Kritická chyba: {e}")

model.summary()

## 3. Helper Functions

In [None]:
def load_and_preprocess_image(filepath, image_size=(224, 224)):
    """Load and preprocess a single image"""
    try:
        img = Image.open(filepath).convert('RGB')
        img = img.resize(image_size, Image.LANCZOS)
        img_array = np.array(img, dtype=np.float32) / 255.0
        return img_array
    except Exception as e:
        print(f"Error loading image {filepath}: {e}")
        return None


def predict_single_image(model, image_path, image_size=(224, 224), threshold=0.5):
    """Predict quality for a single image"""
    img = load_and_preprocess_image(image_path, image_size)
    if img is None:
        return None

    img_batch = np.expand_dims(img, axis=0)
    prediction = model.predict(img_batch, verbose=0)[0][0]

    quality_label = "GOOD" if prediction >= threshold else "BAD"
    confidence = prediction if prediction >= 0.5 else (1 - prediction)

    return {
        'quality_score': prediction,
        'quality_label': quality_label,
        'confidence': confidence
    }


def visualize_prediction(image_path, prediction_result):
    """Visualize image with prediction"""
    img = Image.open(image_path)

    plt.figure(figsize=(10, 8))
    plt.imshow(img)
    plt.axis('off')

    # Add prediction text
    score = prediction_result['quality_score']
    label = prediction_result['quality_label']
    confidence = prediction_result['confidence']

    color = 'green' if label == 'GOOD' else 'red'
    title = f"Quality: {label}\nScore: {score:.4f} | Confidence: {confidence:.2%}"

    plt.title(title, fontsize=14, weight='bold', color=color, pad=10)
    plt.tight_layout()
    plt.show()


def calculate_metrics(predictions, labels, threshold=0.5):
    """Calculate evaluation metrics"""
    predictions = np.array(predictions)
    labels = np.array(labels)

    # Regression metrics
    errors = np.abs(predictions - labels)
    mae = np.mean(errors)
    rmse = np.sqrt(np.mean((predictions - labels) ** 2))

    # Classification metrics
    pred_classes = (predictions >= threshold).astype(int)
    true_classes = (labels >= threshold).astype(int)

    accuracy = np.mean(pred_classes == true_classes)
    tp = np.sum((pred_classes == 1) & (true_classes == 1))
    fp = np.sum((pred_classes == 1) & (true_classes == 0))
    fn = np.sum((pred_classes == 0) & (true_classes == 1))
    tn = np.sum((pred_classes == 0) & (true_classes == 0))

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return {
        'mae': mae,
        'rmse': rmse,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': {'tp': tp, 'fp': fp, 'tn': tn, 'fn': fn}
    }

print("✓ Helper functions loaded!")

## 4. Choose Testing Mode

Select one of the following:
- **Option A**: Test on a single RGB image
- **Option B**: Test on multiple images (ZIP file)
- **Option C**: Evaluate on test set (CSV with filepaths and labels)

### Option A: Test Single Image

In [None]:
# Upload a single RGB SOM image
if not IN_COLAB:
    print("⚠ Not in Colab - please set img_filename manually")
    img_filename = "example_rgb.png"  # Change this to your image path
else:
    print("Please upload a single RGB SOM image (PNG format)...")
    uploaded_img = files.upload()

    # Get the uploaded image filename
    img_filename = list(uploaded_img.keys())[0]
    print(f"\n✓ Image uploaded: {img_filename}")

# Predict
print("\nMaking prediction...")
result = predict_single_image(model, img_filename)

if result:
    print(f"\n{'='*80}")
    print("PREDICTION RESULT")
    print(f"{'='*80}")
    print(f"Quality Score: {result['quality_score']:.6f}")
    print(f"Quality Label: {result['quality_label']}")
    print(f"Confidence: {result['confidence']:.2%}")
    print(f"{'='*80}\n")

    # Visualize
    visualize_prediction(img_filename, result)
else:
    print("✗ Failed to process image")

### Option B: Test Multiple Images (ZIP)

In [None]:
# Upload a ZIP file containing RGB images
if not IN_COLAB:
    print("⚠ Not in Colab - please set zip_filename manually")
    zip_filename = "images.zip"  # Change this to your ZIP path
else:
    print("Please upload a ZIP file containing RGB SOM images...")
    uploaded_zip = files.upload()

    # Get the uploaded ZIP filename
    zip_filename = list(uploaded_zip.keys())[0]
    print(f"\n✓ ZIP uploaded: {zip_filename}")

# Extract ZIP
extract_dir = "uploaded_images"
os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"✓ Extracted to: {extract_dir}")

In [None]:
# Find all PNG images
image_files = []
for root, dirs, files_list in os.walk(extract_dir):
    for file in files_list:
        if file.endswith('.png'):
            image_files.append(os.path.join(root, file))

print(f"Found {len(image_files)} PNG images\n")

if len(image_files) == 0:
    print("✗ No PNG images found in ZIP file")
else:
    # Predict on all images
    results = []

    print("Making predictions...")
    for i, img_path in enumerate(image_files):
        if (i + 1) % 10 == 0:
            print(f"  Progress: {i+1}/{len(image_files)}")

        result = predict_single_image(model, img_path)
        if result:
            results.append({
                'filename': os.path.basename(img_path),
                'filepath': img_path,
                'quality_score': result['quality_score'],
                'quality_label': result['quality_label'],
                'confidence': result['confidence']
            })

    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('quality_score', ascending=False)

    # Summary
    print(f"\n{'='*80}")
    print("PREDICTION SUMMARY")
    print(f"{'='*80}\n")

    good_count = (results_df['quality_score'] >= 0.5).sum()
    bad_count = (results_df['quality_score'] < 0.5).sum()

    print(f"Total predictions: {len(results_df)}")
    print(f"  Good (score >= 0.5): {good_count}")
    print(f"  Bad (score < 0.5): {bad_count}")

    print(f"\nTop 10 Best Quality Maps:")
    print(results_df.head(10)[['filename', 'quality_score', 'quality_label', 'confidence']].to_string(index=False))

    print(f"\nTop 10 Worst Quality Maps:")
    print(results_df.tail(10)[['filename', 'quality_score', 'quality_label', 'confidence']].to_string(index=False))

    # Save results
    output_csv = "predictions.csv"
    results_df.to_csv(output_csv, index=False)
    print(f"\n✓ Predictions saved to: {output_csv}")

    # Download results
    if IN_COLAB:
        print("\nDownloading predictions.csv...")
        files.download(output_csv)
    else:
        print(f"\n✓ Results saved locally to {output_csv}")

In [None]:
# Visualize sample predictions
print("Visualizing sample predictions...\n")

# Show top 3 best and top 3 worst
sample_indices = list(results_df.head(3).index) + list(results_df.tail(3).index)

for idx in sample_indices:
    row = results_df.iloc[idx]
    result = {
        'quality_score': row['quality_score'],
        'quality_label': row['quality_label'],
        'confidence': row['confidence']
    }
    print(f"\nImage: {row['filename']}")
    visualize_prediction(row['filepath'], result)

### Option C: Evaluate on Test Set (CSV)

In [None]:
# Upload test set CSV
if not IN_COLAB:
    print("⚠ Not in Colab - please set csv_filename manually")
    csv_filename = "test_set.csv"  # Change this to your CSV path
else:
    print("Please upload test set CSV file (must have 'filepath' and 'quality_score' columns)...")
    uploaded_csv = files.upload()

    # Get the uploaded CSV filename
    csv_filename = list(uploaded_csv.keys())[0]
    print(f"\n✓ CSV uploaded: {csv_filename}")

# Load test set
test_df = pd.read_csv(csv_filename)
print(f"\nTest set size: {len(test_df)} samples")
print(f"Columns: {list(test_df.columns)}")

if 'filepath' not in test_df.columns or 'quality_score' not in test_df.columns:
    print("\n✗ Error: CSV must contain 'filepath' and 'quality_score' columns")
else:
    print("\n✓ Test set loaded successfully!")

In [None]:
# Upload images referenced in the CSV
if not IN_COLAB:
    print("⚠ Not in Colab - please set img_zip_filename manually")
    img_zip_filename = "test_images.zip"  # Change this to your ZIP path
else:
    print("Please upload a ZIP file containing the images referenced in the CSV...")
    uploaded_img_zip = files.upload()

    # Get the uploaded ZIP filename
    img_zip_filename = list(uploaded_img_zip.keys())[0]
    print(f"\n✓ ZIP uploaded: {img_zip_filename}")

# Extract ZIP
images_dir = "test_images"
os.makedirs(images_dir, exist_ok=True)

with zipfile.ZipFile(img_zip_filename, 'r') as zip_ref:
    zip_ref.extractall(images_dir)

print(f"✓ Extracted to: {images_dir}")

# Update filepaths in test_df to point to extracted location
# Assumes images are directly in the ZIP root
test_df['local_filepath'] = test_df['filepath'].apply(
    lambda x: os.path.join(images_dir, os.path.basename(x))
)

In [None]:
# Evaluate on test set
print(f"\n{'='*80}")
print("EVALUATING ON TEST SET")
print(f"{'='*80}\n")

# Load images and labels
images = []
labels = []
valid_indices = []

print("Loading images...")
for idx, row in test_df.iterrows():
    img = load_and_preprocess_image(row['local_filepath'])
    if img is not None:
        images.append(img)
        labels.append(row['quality_score'])
        valid_indices.append(idx)
    else:
        print(f"  Warning: Could not load {row['local_filepath']}")

images = np.array(images)
labels = np.array(labels)

print(f"\nLoaded {len(images)} images")

# Predict
print("\nMaking predictions...")
predictions = model.predict(images, verbose=1)
predictions = predictions.flatten()

# Calculate metrics
metrics = calculate_metrics(predictions, labels)

# Print results
print(f"\n{'='*80}")
print("EVALUATION RESULTS")
print(f"{'='*80}\n")

print("Regression Metrics:")
print(f"  Mean Absolute Error (MAE): {metrics['mae']:.6f}")
print(f"  Root Mean Squared Error (RMSE): {metrics['rmse']:.6f}")

print(f"\nClassification Metrics (threshold=0.5):")
print(f"  Accuracy: {metrics['accuracy']:.4f} ({metrics['accuracy']*100:.2f}%)")
print(f"  Precision: {metrics['precision']:.4f}")
print(f"  Recall: {metrics['recall']:.4f}")
print(f"  F1-Score: {metrics['f1']:.4f}")

cm = metrics['confusion_matrix']
print(f"\nConfusion Matrix:")
print(f"  True Positives: {cm['tp']}")
print(f"  False Positives: {cm['fp']}")
print(f"  True Negatives: {cm['tn']}")
print(f"  False Negatives: {cm['fn']}")

# Show sample predictions
print(f"\n{'='*80}")
print("SAMPLE PREDICTIONS")
print(f"{'='*80}\n")

for i in range(min(10, len(predictions))):
    pred_label = "GOOD" if predictions[i] >= 0.5 else "BAD"
    true_label = "GOOD" if labels[i] >= 0.5 else "BAD"
    match = "✓" if pred_label == true_label else "✗"
    print(f"{match} Sample {i+1}: Predicted={predictions[i]:.4f} ({pred_label}), True={labels[i]:.1f} ({true_label})")

In [None]:
# Visualize prediction distribution
plt.figure(figsize=(12, 5))

# Plot 1: Prediction vs True label scatter
plt.subplot(1, 2, 1)
plt.scatter(labels, predictions, alpha=0.5)
plt.plot([0, 1], [0, 1], 'r--', label='Perfect prediction')
plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5, label='Threshold')
plt.axvline(x=0.5, color='orange', linestyle='--', alpha=0.5)
plt.xlabel('True Quality Score')
plt.ylabel('Predicted Quality Score')
plt.title('Predictions vs True Labels')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Prediction distribution
plt.subplot(1, 2, 2)
plt.hist(predictions[labels == 1.0], bins=20, alpha=0.5, label='True GOOD', color='green')
plt.hist(predictions[labels == 0.0], bins=20, alpha=0.5, label='True BAD', color='red')
plt.axvline(x=0.5, color='black', linestyle='--', label='Threshold')
plt.xlabel('Predicted Quality Score')
plt.ylabel('Count')
plt.title('Prediction Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Model Performance Summary

**The Eye** CNN Model Capabilities:
- Predicts SOM quality from RGB composite visualizations
- Outputs quality score (0.0 = BAD, 1.0 = GOOD)
- High precision (avoids false positives)
- Can be integrated into evolutionary algorithms for real-time quality assessment

### RGB Channel Interpretation:
- **Red**: U-Matrix (cluster boundaries - dark=similar, yellow=boundaries)
- **Green**: Distance Map (quantization error)
- **Blue**: Dead Neurons Map (unused neurons)

### Quality Indicators:
- Clear cluster boundaries in U-Matrix
- Low quantization error (distance map)
- Few dead neurons
- Well-organized topology

---

**For more information:**
- GitHub: NexusSom Project
- Model trained on evolutionary algorithm (EA) results
- Fixed [0, 1.0] normalization for consistent visual interpretation