# Image Properties Analysis

This notebook analyzes the technical properties of the scraped images, including:
- File size distributions and statistics
- Image dimensions analysis
- Color mode breakdown
- Format distribution and popularity

## Setup and Data Loading

In [None]:
import sys
sys.path.append('../visualizers')
sys.path.append('../utils')

from image_analysis import create_combined_image_analysis
from data_loader import load_report_data, get_file_size_stats, get_dimension_stats, format_bytes
from plot_helpers import apply_global_style, display_config

apply_global_style()

data = load_report_data('../../sample_report.json')
print("Data loaded successfully!")
print(f"Generated at: {data.get('generated_at', 'Unknown')}")

## File Size Analysis

Distribution and statistics of image file sizes:

In [None]:
charts = create_combined_image_analysis(data)

size_fig = charts['file_size_histogram']
size_fig.show(config=display_config())

## Image Dimensions Analysis

Width and height distribution patterns:

In [None]:
dimension_fig = charts['dimension_scatter']
dimension_fig.show(config=display_config())

## Dimension Ranges Comparison

Detailed breakdown of minimum, average, and maximum dimensions:

In [None]:
ranges_fig = charts['dimension_ranges']
ranges_fig.show(config=display_config())

## Color Mode Distribution

Breakdown of color modes (RGB, RGBA, etc.):

In [None]:
color_fig = charts['color_mode_distribution']
color_fig.show(config=display_config())

## File Format Popularity

Most common image formats in the dataset:

In [None]:
format_fig = charts['format_popularity']
format_fig.show(config=display_config())

## Combined Size and Format Analysis

Relationship between file size and format distribution:

In [None]:
combined_fig = charts['size_format_analysis']
combined_fig.show(config=display_config())

## Technical Statistics Summary

Key technical insights from the image analysis:

In [None]:
file_stats = get_file_size_stats(data)
dim_stats = get_dimension_stats(data)

print("=== FILE SIZE STATISTICS ===")
print(f"Average Size: {format_bytes(file_stats.get('average_bytes', 0))}")
print(f"Minimum Size: {format_bytes(file_stats.get('min_bytes', 0))}")
print(f"Maximum Size: {format_bytes(file_stats.get('max_bytes', 0))}")
print(f"Total Storage: {format_bytes(file_stats.get('total_bytes', 0))}")

print("\n=== DIMENSION STATISTICS ===")
print(f"Average Dimensions: {dim_stats.get('avg_width', 0):.0f} x {dim_stats.get('avg_height', 0):.0f} pixels")
print(f"Min Dimensions: {dim_stats.get('min_width', 0)} x {dim_stats.get('min_height', 0)} pixels")
print(f"Max Dimensions: {dim_stats.get('max_width', 0)} x {dim_stats.get('max_height', 0)} pixels")

avg_aspect_ratio = dim_stats.get('avg_width', 1) / dim_stats.get('avg_height', 1)
print(f"Average Aspect Ratio: {avg_aspect_ratio:.2f}:1")

## Format and Color Mode Summary

In [None]:
from utils import extract_quantitative_stats

quant_stats = extract_quantitative_stats(data)
formats = quant_stats.get('formats', {})
color_modes = quant_stats.get('color_modes', {})

print("=== TOP FILE FORMATS ===")
sorted_formats = sorted(formats.items(), key=lambda x: x[1], reverse=True)
for fmt, count in sorted_formats[:5]:
    percentage = (count / sum(formats.values())) * 100
    print(f"{fmt.upper()}: {count:,} images ({percentage:.1f}%)")

print("\n=== COLOR MODES ===")
sorted_modes = sorted(color_modes.items(), key=lambda x: x[1], reverse=True)
for mode, count in sorted_modes:
    percentage = (count / sum(color_modes.values())) * 100
    print(f"{mode}: {count:,} images ({percentage:.1f}%)")

## Export Options

Save charts for technical documentation:

In [None]:
# Uncomment to save charts
# from plot_helpers import save_plot
# 
# save_plot(size_fig, 'file_size_analysis', 'png', width=1000, height=500)
# save_plot(dimension_fig, 'dimension_scatter', 'png', width=1000, height=600)
# save_plot(color_fig, 'color_mode_distribution', 'png', width=800, height=600)
# save_plot(format_fig, 'format_popularity', 'png', width=800, height=600)
# save_plot(combined_fig, 'size_format_combined', 'html')
# 
# print("Technical analysis charts saved to visualizations/output/")