# Re-Imaging Price Trends - Image Generation & Comparison Analysis

**Purpose**: 
1. Generate candlestick chart images from stock price data
2. **Detailed comparison with original author data (img_data/)**
3. Statistical analysis and visual comparison
4. Performance benchmarking

**Next Step**: Run `2_model_training.ipynb` after completion

## 0. Environment Setup & Optimization Check

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

os.chdir('/content/drive/MyDrive/ReImaging_Price_Trends')
print(f"🚀 Current directory: {os.getcwd()}")
print(f"📁 Available files: {[f for f in os.listdir('.') if not f.startswith('.')]}")

# Check Numba JIT performance optimization
try:
    import numba
    print(f"\n⚡ Numba JIT available: {numba.__version__}")
    print("   🚀 Image generation speed improved by 50-100x!")
except ImportError:
    print("\n⚠️  Numba installation failed - check requirements.txt")
    print("   🐌 Using slower pure Python version")

# Check memory status
import psutil
memory = psutil.virtual_memory()
print(f"\n💾 Available memory: {memory.available // (1024**3):.1f}GB")
if memory.available < 2 * (1024**3):  # Less than 2GB
    print("⚠️  Warning: Low memory - recommend using --parallel 1 option")
    
print(f"\n✅ Environment setup completed!")

In [None]:
# Install requirements
!pip install -r requirements.txt

## 1. Data Availability Check

In [None]:
# Check data files availability and analyze
print("📊 DATA AVAILABILITY & ANALYSIS")
print("=" * 50)

# Updated to use unified dataset file
data_files = [
    'data_1992_2019_unified.parquet'
]

print("\n📈 Required data files:")
data_ready = True
total_size_mb = 0

for file in data_files:
    if os.path.exists(file):
        size_mb = os.path.getsize(file) / (1024**2)
        total_size_mb += size_mb
        print(f"   ✅ {file} ({size_mb:.1f}MB)")
        
        # Analyze data content
        try:
            df = pd.read_parquet(file)
            print(f"      Records: {len(df):,}, Companies: {df['code'].nunique():,}")
            print(f"      Period: {df['date'].min()} ~ {df['date'].max()}")
            
            # Check data quality
            missing_ohlc = df[['open', 'high', 'low', 'close']].isnull().any(axis=1).sum()
            missing_volume = df['volume'].isnull().sum()
            missing_returns = df[['ret5', 'ret20', 'ret60']].isnull().any(axis=1).sum()
            
            print(f"      Missing OHLC: {missing_ohlc:,} ({missing_ohlc/len(df)*100:.1f}%)")
            print(f"      Missing volume: {missing_volume:,} ({missing_volume/len(df)*100:.1f}%)")
            print(f"      Missing returns: {missing_returns:,} ({missing_returns/len(df)*100:.1f}%)")
            
            # Label distribution
            for period in [5, 20, 60]:
                label_col = f'label_{period}'
                if label_col in df.columns:
                    up_count = df[label_col].sum()
                    total_valid = df[label_col].notna().sum()
                    up_pct = up_count / total_valid * 100 if total_valid > 0 else 0
                    print(f"      {period}-day up: {up_count:,}/{total_valid:,} ({up_pct:.1f}%)")
            
            del df  # Free memory
            
        except Exception as e:
            print(f"      ❌ Error analyzing data: {e}")
    else:
        print(f"   ❌ {file} missing")
        data_ready = False

print(f"\n📋 Summary:")
print(f"   Data ready: {'✅ Yes' if data_ready else '❌ No'}")
print(f"   Total size: {total_size_mb:.1f}MB")

if not data_ready:
    print(f"\n⚠️  Missing data files!")
    print(f"   📝 Please run: datageneration.ipynb first")
else:
    print(f"\n🎉 All required data files available and analyzed!")
    print(f"   🚀 Ready to proceed with image analysis")

## 2. Image Generation - Our Reconstruction

In [None]:
# Generate 5-day images
print("🖼️  GENERATING 5-DAY IMAGES")
print("=" * 50)

start_time = time.time()

print("📊 Training data (1993-2000):")
!python chart_generator.py --image_days 5 --mode train

print("\n📊 Test data (2001-2019):")
!python chart_generator.py --image_days 5 --mode test

elapsed_5d = time.time() - start_time
print(f"\n⏱️  5-day generation completed in {elapsed_5d:.1f} seconds")

In [None]:
# Generate 20-day images  
print("🖼️  GENERATING 20-DAY IMAGES")
print("=" * 50)

start_time = time.time()

print("📊 Training data (1993-2000):")
!python chart_generator.py --image_days 20 --mode train

print("\n📊 Test data (2001-2019):")
!python chart_generator.py --image_days 20 --mode test

elapsed_20d = time.time() - start_time
print(f"\n⏱️  20-day generation completed in {elapsed_20d:.1f} seconds")

In [None]:
# Generate 60-day images
print("🖼️  GENERATING 60-DAY IMAGES")
print("=" * 50)

start_time = time.time()

print("📊 Training data (1993-2000):")
!python chart_generator.py --image_days 60 --mode train

print("\n📊 Test data (2001-2019):")
!python chart_generator.py --image_days 60 --mode test

elapsed_60d = time.time() - start_time
print(f"\n⏱️  60-day generation completed in {elapsed_60d:.1f} seconds")

total_elapsed = elapsed_5d + elapsed_20d + elapsed_60d
print(f"\n🎉 Total generation time: {total_elapsed:.1f} seconds ({total_elapsed/60:.1f} minutes)")

## 3. Generated Images Summary

In [None]:
# Check our generated reconstructed images
print("📊 IMAGE DATA SUMMARY")
print("=" * 60)

# Check available data directories
available_dirs = {
    'original_author': 'img_data',
    'reconstructed_filled': 'img_data_reconstructed_filled'
}

reconstructed_stats = {}

for version, base_dir in available_dirs.items():
    if os.path.exists(base_dir):
        total_images = 0
        total_size_gb = 0
        success_count = 0
        
        subdirs = ['weekly_5d', 'monthly_20d', 'quarterly_60d']
        
        for subdir in subdirs:
            img_dir = os.path.join(base_dir, subdir)
            
            if os.path.exists(img_dir):
                # Count .dat and .feather files
                dat_files = [f for f in os.listdir(img_dir) if f.endswith('.dat')]
                feather_files = [f for f in os.listdir(img_dir) if f.endswith('.feather')]
                
                # Calculate total size
                dir_size = sum(os.path.getsize(os.path.join(img_dir, f)) 
                             for f in os.listdir(img_dir) if f.endswith(('.dat', '.feather')))
                size_gb = dir_size / (1024**3)
                total_size_gb += size_gb
                
                # Estimate image count (rough calculation)
                dat_size = sum(os.path.getsize(os.path.join(img_dir, f)) for f in dat_files)
                
                if 'weekly_5d' in subdir:
                    image_size = 32 * 15
                elif 'monthly_20d' in subdir:
                    image_size = 64 * 60
                else:  # quarterly_60d
                    image_size = 96 * 180
                
                estimated_images = dat_size // image_size
                total_images += estimated_images
                success_count += 1
                
                print(f"✅ {version} {subdir:15}: {estimated_images:8,} images, {len(dat_files)} .dat, {len(feather_files)} .feather")
        
        reconstructed_stats[version] = {
            'images': total_images,
            'size_gb': total_size_gb,
            'success_count': success_count
        }
        
        print(f"\n📋 {version.upper()} SUMMARY:")
        print(f"   Success rate: {success_count}/3 ({success_count/3*100:.0f}%)")
        print(f"   Total images: {total_images:,}")
        print(f"   Total size: {total_size_gb:.2f}GB")
    else:
        print(f"❌ {version}: Directory {base_dir} not found")
        reconstructed_stats[version] = None

if reconstructed_stats.get('original_author') and reconstructed_stats.get('reconstructed_filled'):
    print(f"\n🎉 Both datasets available for comparison!")
elif reconstructed_stats.get('reconstructed_filled'):
    print(f"\n✅ Reconstructed filled data available for training")
else:
    print(f"\n⚠️  No image data found - need to generate images first")

## 4. 🔍 DETAILED COMPARISON: Original Author vs Our Reconstruction

In [None]:
# Check original author data availability
print("🔍 ORIGINAL AUTHOR DATA AVAILABILITY CHECK")
print("=" * 60)

original_base_dir = 'img_data'
original_available = False

if os.path.exists(original_base_dir):
    print(f"\n📂 Found original author data: {original_base_dir}/")
    original_available = True
    
    subdirs = ['weekly_5d', 'monthly_20d', 'quarterly_60d'] 
    original_total_images = 0
    original_total_size_gb = 0
    original_success_count = 0

    for subdir in subdirs:
        img_dir = os.path.join(original_base_dir, subdir)

        if os.path.exists(img_dir):
            # Check .dat and .feather files
            dat_files = [f for f in os.listdir(img_dir) if f.endswith('.dat')]
            feather_files = [f for f in os.listdir(img_dir) if f.endswith('.feather')]

            # Calculate total size
            dir_size = sum(os.path.getsize(os.path.join(img_dir, f)) 
                         for f in os.listdir(img_dir) if f.endswith(('.dat', '.feather')))
            size_gb = dir_size / (1024**3)
            original_total_size_gb += size_gb

            # Estimate image count
            dat_size = sum(os.path.getsize(os.path.join(img_dir, f)) for f in dat_files)
            
            if 'weekly_5d' in subdir:
                image_size = 32 * 15
            elif 'monthly_20d' in subdir:
                image_size = 64 * 60
            else:  # quarterly_60d
                image_size = 96 * 180
            
            estimated_images = dat_size // image_size
            original_total_images += estimated_images
            original_success_count += 1
            
            print(f"✅ {subdir:15}: {estimated_images:8,} images, {size_gb:6.2f}GB")
        else:
            print(f"❌ {subdir:15}: directory missing")
    
    if original_success_count > 0:
        print(f"\n📊 ORIGINAL AUTHOR SUMMARY:")
        print(f"   Available directories: {original_success_count}/3")
        print(f"   Total images: {original_total_images:,}")
        print(f"   Total size: {original_total_size_gb:.2f}GB")
    else:
        print(f"\n❌ No valid original author data found")
        original_available = False
        
else:
    print(f"\n❌ Original author data not found: {original_base_dir}/")
    print(f"   📝 Note: Original data not available for comparison")

In [None]:
# Side-by-side comparison analysis
if original_available and reconstructed_stats:
    print("📊 SIDE-BY-SIDE COMPARISON ANALYSIS")
    print("=" * 70)
    
    print(f"\n{'Directory':<15} {'Original':<20} {'Reconstructed':<20} {'Ratio':<10} {'Status':<10}")
    print("-" * 85)
    
    comparison_results = {}
    total_original_images = 0
    total_reconstructed_images = 0
    
    for dir_name in reconstructed_dirs.keys():
        orig_data = original_stats.get(dir_name)
        recon_data = reconstructed_stats.get(dir_name)
        
        if orig_data and recon_data:
            orig_images = orig_data['images']
            recon_images = recon_data['images']
            ratio = recon_images / orig_images if orig_images > 0 else 0
            
            total_original_images += orig_images
            total_reconstructed_images += recon_images
            
            # Status assessment
            if ratio >= 0.95:
                status = "✅ Good"
            elif ratio >= 0.8:
                status = "⚠️  OK"
            else:
                status = "❌ Poor"
            
            comparison_results[dir_name] = {
                'original': orig_images,
                'reconstructed': recon_images,
                'ratio': ratio,
                'status': status
            }
            
            print(f"{dir_name:<15} {orig_images:8,} images   {recon_images:8,} images   {ratio:8.3f}  {status:<10}")
        
        elif orig_data:
            print(f"{dir_name:<15} {orig_data['images']:8,} images   {'N/A':<15}  {'N/A':<8}  {'❌ Missing':<10}")
        elif recon_data:
            print(f"{dir_name:<15} {'N/A':<15}  {recon_data['images']:8,} images   {'N/A':<8}  {'❌ No Orig':<10}")
        else:
            print(f"{dir_name:<15} {'N/A':<15}  {'N/A':<15}  {'N/A':<8}  {'❌ Both':<10}")
    
    # Overall comparison
    if total_original_images > 0 and total_reconstructed_images > 0:
        overall_ratio = total_reconstructed_images / total_original_images
        
        print(f"\n🎯 OVERALL COMPARISON:")
        print(f"   Original total: {total_original_images:,} images")
        print(f"   Reconstructed total: {total_reconstructed_images:,} images")
        print(f"   Overall ratio: {overall_ratio:.3f}")
        print(f"   Difference: {total_reconstructed_images - total_original_images:+,} images")
        
        # Assessment
        if overall_ratio >= 0.95:
            assessment = "🎉 Excellent reconstruction! Very close to original."
        elif overall_ratio >= 0.8:
            assessment = "✅ Good reconstruction with minor differences."
        elif overall_ratio >= 0.6:
            assessment = "⚠️  Moderate reconstruction - significant data loss detected."
        else:
            assessment = "❌ Poor reconstruction - major data loss detected!"
        
        print(f"\n💡 Assessment: {assessment}")
        
        # Detailed insights
        print(f"\n🔍 Insights:")
        if overall_ratio < 0.9:
            print(f"   📉 Reconstruction has {(1-overall_ratio)*100:.1f}% fewer images")
            print(f"   🔍 Possible causes:")
            print(f"      - More strict NA filtering in our implementation")
            print(f"      - Different IPO/delisting filtering logic")
            print(f"      - Different sampling rate or window selection")
            print(f"   📝 Recommend: Review filtering logic in create_original_format.py")
        elif overall_ratio > 1.1:
            print(f"   📈 Reconstruction has {(overall_ratio-1)*100:.1f}% more images")
            print(f"   🔍 Possible causes:")
            print(f"      - Less strict filtering in our implementation")
            print(f"      - Different handling of missing data")
            print(f"   📝 This might actually be beneficial for training")
        else:
            print(f"   🎯 Image counts are very similar - good reconstruction quality")
            print(f"   ✅ Filtering logic appears to match the original paper")

elif not original_available:
    print("\n⚠️  COMPARISON NOT POSSIBLE")
    print("   📝 Original author data not available")
    print("   💡 To enable comparison:")
    print(f"      1. Obtain original img_data/ from paper authors")
    print(f"      2. Place in project root directory")
    print(f"      3. Re-run this analysis")
    
elif not reconstructed_stats:
    print("\n❌ RECONSTRUCTION FAILED")
    print("   📝 Our reconstruction was not successful")
    print("   💡 Check previous cells for error messages")
    
else:
    print("\n❌ UNEXPECTED ERROR")
    print("   📝 Both datasets appear to be missing")

## 5. Visual Sample Comparison

In [None]:
# Load and display sample images for visual comparison
if original_available and reconstructed_stats:
    print("🖼️  VISUAL SAMPLE COMPARISON")
    print("=" * 60)
    
    # Focus on monthly_20d for visual comparison
    focus_dir = 'monthly_20d'
    
    original_dir = os.path.join('img_data', focus_dir)
    reconstructed_dir = os.path.join('img_data_reconstructed', focus_dir)
    
    if os.path.exists(original_dir) and os.path.exists(reconstructed_dir):
        # Find common files
        original_files = set(os.listdir(original_dir))
        reconstructed_files = set(os.listdir(reconstructed_dir))
        common_dat_files = [f for f in original_files.intersection(reconstructed_files) 
                           if f.endswith('.dat')]
        
        if common_dat_files:
            sample_file = common_dat_files[0]  # Take first common .dat file
            print(f"\n📊 Comparing sample file: {sample_file}")
            
            # Load sample images
            def load_sample_images(dat_path, num_samples=3, image_height=64, image_width=60):
                """Load sample images from .dat file"""
                images = []
                image_size = image_height * image_width
                
                try:
                    with open(dat_path, 'rb') as f:
                        file_size = os.path.getsize(dat_path)
                        max_images = file_size // image_size
                        num_to_load = min(num_samples, max_images)
                        
                        for i in range(num_to_load):
                            f.seek(i * image_size)
                            img_data = f.read(image_size)
                            if len(img_data) == image_size:
                                img_array = np.frombuffer(img_data, dtype=np.uint8)
                                img_2d = img_array.reshape(image_height, image_width)
                                images.append(img_2d)
                except Exception as e:
                    print(f"   ❌ Error loading {dat_path}: {e}")
                    return []
                
                return images
            
            # Load samples from both datasets
            original_images = load_sample_images(os.path.join(original_dir, sample_file))
            reconstructed_images = load_sample_images(os.path.join(reconstructed_dir, sample_file))
            
            if original_images and reconstructed_images:
                # Create visualization
                num_samples = min(len(original_images), len(reconstructed_images), 3)
                
                fig, axes = plt.subplots(2, num_samples, figsize=(5*num_samples, 10))
                fig.suptitle(f'Visual Comparison: Original vs Reconstructed\n({sample_file})', 
                            fontsize=16, fontweight='bold')
                
                for i in range(num_samples):
                    # Original images (top row)
                    axes[0, i].imshow(original_images[i], cmap='gray', aspect='auto')
                    axes[0, i].set_title(f'Original #{i+1}', fontweight='bold', color='blue')
                    axes[0, i].set_xlabel('Trading Days (3 pixels each)')
                    axes[0, i].set_ylabel('Price Level')
                    
                    # Reconstructed images (bottom row)
                    axes[1, i].imshow(reconstructed_images[i], cmap='gray', aspect='auto')
                    axes[1, i].set_title(f'Reconstructed #{i+1}', fontweight='bold', color='red')
                    axes[1, i].set_xlabel('Trading Days (3 pixels each)')
                    axes[1, i].set_ylabel('Price Level')
                    
                    # Add statistics annotations
                    orig_nonzero = np.count_nonzero(original_images[i])
                    recon_nonzero = np.count_nonzero(reconstructed_images[i])
                    
                    axes[0, i].text(0.02, 0.98, f'White pixels: {orig_nonzero}', 
                                   transform=axes[0, i].transAxes, verticalalignment='top',
                                   bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
                    
                    axes[1, i].text(0.02, 0.98, f'White pixels: {recon_nonzero}', 
                                   transform=axes[1, i].transAxes, verticalalignment='top',
                                   bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
                    
                    # Calculate pixel similarity
                    pixel_match = np.sum(original_images[i] == reconstructed_images[i])
                    total_pixels = original_images[i].size
                    similarity_pct = pixel_match / total_pixels * 100
                    
                    # Add similarity info
                    axes[1, i].text(0.02, 0.02, f'Similarity: {similarity_pct:.1f}%', 
                                   transform=axes[1, i].transAxes, verticalalignment='bottom',
                                   bbox=dict(boxstyle='round', 
                                            facecolor='lightgreen' if similarity_pct > 80 else 
                                                     'yellow' if similarity_pct > 50 else 'lightcoral', 
                                            alpha=0.8))
                
                plt.tight_layout()
                plt.show()
                
                # Print detailed comparison statistics
                print(f"\n📊 Detailed Comparison Statistics:")
                total_similarity = 0
                
                for i in range(num_samples):
                    orig_img = original_images[i]
                    recon_img = reconstructed_images[i]
                    
                    # Pixel-level comparison
                    exact_match = np.sum(orig_img == recon_img)
                    total_pixels = orig_img.size
                    pixel_similarity = exact_match / total_pixels * 100
                    total_similarity += pixel_similarity
                    
                    # Pattern comparison (non-zero pixels)
                    orig_pattern = (orig_img > 0).astype(int)
                    recon_pattern = (recon_img > 0).astype(int)
                    pattern_match = np.sum(orig_pattern == recon_pattern)
                    pattern_similarity = pattern_match / total_pixels * 100
                    
                    print(f"\n   Sample #{i+1}:")
                    print(f"     Exact pixel match: {exact_match:,}/{total_pixels:,} ({pixel_similarity:.1f}%)")
                    print(f"     Pattern similarity: {pattern_match:,}/{total_pixels:,} ({pattern_similarity:.1f}%)")
                    print(f"     Original non-zero pixels: {np.count_nonzero(orig_img):,}")
                    print(f"     Reconstructed non-zero pixels: {np.count_nonzero(recon_img):,}")
                
                avg_similarity = total_similarity / num_samples
                print(f"\n🎯 Average pixel similarity: {avg_similarity:.1f}%")
                
                # Overall assessment
                if avg_similarity >= 90:
                    print(f"   🎉 Excellent! Images are nearly identical")
                elif avg_similarity >= 70:
                    print(f"   ✅ Good! Images are quite similar")
                elif avg_similarity >= 50:
                    print(f"   ⚠️  Moderate similarity - some differences detected")
                else:
                    print(f"   ❌ Low similarity - significant differences detected!")
                    print(f"   🔍 Recommend investigating image generation algorithm")
                
            else:
                print(f"   ❌ Failed to load sample images for comparison")
        else:
            print(f"   ❌ No common .dat files found for visual comparison")
    else:
        print(f"   ❌ Required directories not found for visual comparison")
        print(f"      Original: {original_dir} ({'✅' if os.path.exists(original_dir) else '❌'})")
        print(f"      Reconstructed: {reconstructed_dir} ({'✅' if os.path.exists(reconstructed_dir) else '❌'})")

else:
    print("⚠️  VISUAL COMPARISON SKIPPED")
    print("   Cannot compare - missing original or reconstructed data")

## 6. Performance Benchmark Summary

In [None]:
# Final comprehensive summary
print("📋 COMPREHENSIVE GENERATION & COMPARISON SUMMARY")
print("=" * 80)

# Generation performance summary
if 'total_elapsed' in locals():
    print(f"\n⚡ GENERATION PERFORMANCE:")
    print(f"   Total generation time: {total_elapsed:.1f} seconds ({total_elapsed/60:.1f} minutes)")
    if reconstructed_stats:
        total_images = sum(stats['images'] for stats in reconstructed_stats.values() if stats)
        if total_images > 0:
            images_per_second = total_images / total_elapsed
            print(f"   Images generated: {total_images:,}")
            print(f"   Generation speed: {images_per_second:.1f} images/second")

# Comparison summary
if original_available and reconstructed_stats:
    print(f"\n🔍 COMPARISON SUMMARY:")
    print(f"   Original data: ✅ Available")
    print(f"   Reconstructed data: ✅ Available")
    if 'overall_ratio' in locals():
        print(f"   Image count ratio: {overall_ratio:.3f}")
        print(f"   Quality assessment: {assessment.split(': ')[1] if ': ' in assessment else assessment}")
    if 'avg_similarity' in locals():
        print(f"   Visual similarity: {avg_similarity:.1f}%")
elif original_available:
    print(f"\n🔍 COMPARISON SUMMARY:")
    print(f"   Original data: ✅ Available")
    print(f"   Reconstructed data: ❌ Generation failed")
elif reconstructed_stats:
    print(f"\n🔍 COMPARISON SUMMARY:")
    print(f"   Original data: ❌ Not available")
    print(f"   Reconstructed data: ✅ Successfully generated")
    print(f"   Note: Cannot compare without original data")
else:
    print(f"\n❌ GENERATION FAILED")
    print(f"   Both original and reconstructed data unavailable")

# Next steps
print(f"\n📚 NEXT STEPS:")

if reconstructed_stats and sum(1 for stats in reconstructed_stats.values() if stats) == 3:
    print(f"   ✅ Image generation completed successfully")
    print(f"   🚀 Ready to proceed with model training")
    print(f"   📝 Run: ipynb/2_model_training.ipynb")
    
    if original_available and 'overall_ratio' in locals():
        if overall_ratio < 0.8:
            print(f"\n   ⚠️  Data Quality Recommendations:")
            print(f"      📊 Investigate reconstruction algorithm differences")
            print(f"      🔍 Review filtering logic in create_original_format.py")
            print(f"      📈 Consider adjusting parameters to match original better")
        elif 'avg_similarity' in locals() and avg_similarity < 70:
            print(f"\n   ⚠️  Visual Quality Recommendations:")
            print(f"      🖼️  Investigate image generation algorithm")
            print(f"      🎨 Check price normalization and pixel mapping")
            print(f"      📏 Verify image dimensions and data structures")
        else:
            print(f"\n   🎉 High quality reconstruction achieved!")
            print(f"   ✅ Both quantity and visual similarity are excellent")
else:
    print(f"   ❌ Image generation incomplete or failed")
    print(f"   🔧 Debug create_original_format.py execution")
    print(f"   📋 Check error logs and data file availability")

# Save summary to file
summary_data = {
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'generation_time_seconds': total_elapsed if 'total_elapsed' in locals() else None,
    'original_available': original_available,
    'reconstructed_stats': reconstructed_stats,
    'comparison_ratio': overall_ratio if 'overall_ratio' in locals() else None,
    'visual_similarity': avg_similarity if 'avg_similarity' in locals() else None
}

import json
os.makedirs('analysis_results', exist_ok=True)
with open('analysis_results/image_generation_summary.json', 'w') as f:
    json.dump(summary_data, f, indent=2, default=str)

print(f"\n💾 Summary saved to: analysis_results/image_generation_summary.json")
print(f"\n🎉 Image generation and comparison analysis completed!")