In [None]:
# Greenhouse Gas Analytics - Visualization Tests
# Notebook 04: Advanced Visualization Development and Testing

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import warnings
warnings.filterwarnings('ignore')

# Additional libraries for advanced visualizations
import networkx as nx
from wordcloud import WordCloud
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

print("🎨 Greenhouse Gas Analytics - Visualization Tests")
print("="*55)

# Set up plotting parameters
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10
sns.set_style("whitegrid")
sns.set_palette("husl")

# ## 1. Load and Prepare Data

@st.cache_data
def load_test_data():
    """Load data for visualization testing"""
    try:
        df = pd.read_parquet('../data/processed/cleaned_data.parquet')
        print("✅ Processed data loaded successfully!")
        return df
    except FileNotFoundError:
        print("⚠️ Creating comprehensive sample data for visualization testing...")
        return create_enhanced_sample_data()

def create_enhanced_sample_data():
    """Create rich sample data for testing various visualizations"""
    np.random.seed(42)
    
    # Enhanced country data with more attributes
    countries_data = {
        'China': {'region': 'Asia', 'pop': 1400, 'gdp': 17734, 'dev_level': 'Developing', 'lat': 35.0, 'lon': 105.0},
        'India': {'region': 'Asia', 'pop': 1380, 'gdp': 3737, 'dev_level': 'Developing', 'lat': 20.0, 'lon': 77.0},
        'United States': {'region': 'North America', 'pop': 330, 'gdp': 63544, 'dev_level': 'Developed', 'lat': 40.0, 'lon': -100.0},
        'Indonesia': {'region': 'Asia', 'pop': 270, 'gdp': 4256, 'dev_level': 'Developing', 'lat': -5.0, 'lon': 120.0},
        'Brazil': {'region': 'South America', 'pop': 215, 'gdp': 8897, 'dev_level': 'Developing', 'lat': -14.0, 'lon': -51.0},
        'Nigeria': {'region': 'Africa', 'pop': 220, 'gdp': 2229, 'dev_level': 'Developing', 'lat': 9.0, 'lon': 8.0},
        'Russia': {'region': 'Europe', 'pop': 145, 'gdp': 11305, 'dev_level': 'Developed', 'lat': 60.0, 'lon': 100.0},
        'Mexico': {'region': 'North America', 'pop': 130, 'gdp': 9946, 'dev_level': 'Developing', 'lat': 23.0, 'lon': -102.0},
        'Iran': {'region': 'Asia', 'pop': 85, 'gdp': 5627, 'dev_level': 'Developing', 'lat': 32.0, 'lon': 53.0},
        'Germany': {'region': 'Europe', 'pop': 83, 'gdp': 46259, 'dev_level': 'Developed', 'lat': 51.0, 'lon': 9.0},
        'Turkey': {'region': 'Europe', 'pop': 85, 'gdp': 9127, 'dev_level': 'Developing', 'lat': 39.0, 'lon': 35.0},
        'Canada': {'region': 'North America', 'pop': 38, 'gdp': 43242, 'dev_level': 'Developed', 'lat': 60.0, 'lon': -95.0},
        'Australia': {'region': 'Oceania', 'pop': 26, 'gdp': 54907, 'dev_level': 'Developed', 'lat': -25.0, 'lon': 133.0},
        'Argentina': {'region': 'South America', 'pop': 45, 'gdp': 8449, 'dev_level': 'Developing', 'lat': -38.0, 'lon': -64.0},
        'Saudi Arabia': {'region': 'Asia', 'pop': 35, 'gdp': 23139, 'dev_level': 'Developed', 'lat': 24.0, 'lon': 45.0},
    }
    
    emission_types = ['Agriculture', 'Energy', 'Waste', 'Other']
    segments = ['Livestock', 'Oil & Gas', 'Landfills', 'Rice Cultivation', 'Coal Mining', 'Bioenergy', 'Gas pipelines']
    
    data = []
    for country, info in countries_data.items():
        # Generate emissions based on country characteristics
        base_emission_factor = (info['pop'] / 100) + (info['gdp'] / 10000)
        
        for year in [2019, 2020, 2021, 2022]:
            for emission_type in emission_types:
                for segment in np.random.choice(segments, size=np.random.randint(2, 5), replace=False):
                    
                    # Sector-specific emission patterns
                    sector_multipliers = {
                        'Agriculture': 1.5 if info['dev_level'] == 'Developing' else 0.8,
                        'Energy': 2.0 if info['dev_level'] == 'Developed' else 1.2,
                        'Waste': 0.7,
                        'Other': 0.5
                    }
                    
                    emission_value = max(0, base_emission_factor * sector_multipliers[emission_type] * 
                                       np.random.uniform(0.3, 2.5) + np.random.normal(0, 10))
                    
                    data.append({
                        'country': country,
                        'region': info['region'],
                        'population': info['pop'],
                        'gdp_per_capita': info['gdp'],
                        'development_level': info['dev_level'],
                        'latitude': info['lat'],
                        'longitude': info['lon'],
                        'type': emission_type,
                        'segment': segment,
                        'emissions': emission_value,
                        'year': year,
                        'quarter': np.random.choice(['Q1', 'Q2', 'Q3', 'Q4']),
                        'confidence_level': np.random.choice(['High', 'Medium', 'Low'], p=[0.6, 0.3, 0.1])
                    })
    
    return pd.DataFrame(data)

# Load data
df = load_test_data()

print(f"📊 Dataset loaded for visualization testing:")
print(f"  • Shape: {df.shape}")
print(f"  • Columns: {list(df.columns)}")
print(f"  • Date range: {df['year'].min()}-{df['year'].max()}")

# ## 2. Basic Visualization Tests

def test_basic_plots(df):
    """Test basic plotting functionality"""
    print(f"\n📊 TESTING BASIC VISUALIZATIONS:")
    print("="*40)
    
    # Create a comprehensive figure with subplots
    fig, axes = plt.subplots(3, 2, figsize=(18, 15))
    fig.suptitle('Basic Visualization Tests', fontsize=16, fontweight='bold')
    
    # 1. Emissions distribution histogram
    df['emissions'].hist(bins=50, ax=axes[0,0], alpha=0.7, color='skyblue', edgecolor='black')
    axes[0,0].set_title('Emissions Distribution')
    axes[0,0].set_xlabel('Emissions (Mt CO₂e)')
    axes[0,0].set_ylabel('Frequency')
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Regional emissions bar plot
    regional_emissions = df.groupby('region')['emissions'].sum().sort_values(ascending=True)
    regional_emissions.plot(kind='barh', ax=axes[0,1], color='lightcoral')
    axes[0,1].set_title('Total Emissions by Region')
    axes[0,1].set_xlabel('Total Emissions (Mt CO₂e)')
    
    # 3. Parallel Coordinates Plot
    if all(col in df.columns for col in ['emissions', 'population', 'gdp_per_capita']):
        country_data = df.groupby('country').agg({
            'emissions': 'sum',
            'population': 'first',
            'gdp_per_capita': 'first',
            'region': 'first'
        }).reset_index()
        
        # Normalize data for parallel coordinates
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        
        numeric_cols = ['emissions', 'population', 'gdp_per_capita']
        country_data_scaled = country_data.copy()
        country_data_scaled[numeric_cols] = scaler.fit_transform(country_data[numeric_cols])
        
        fig_parallel = px.parallel_coordinates(
            country_data_scaled,
            dimensions=['emissions', 'population', 'gdp_per_capita'],
            color='emissions',
            color_continuous_scale='Viridis',
            title='Parallel Coordinates: Country Characteristics'
        )
        
        fig_parallel.show()
        print("✅ Parallel coordinates plot created")
    
    # 4. Hierarchical Treemap
    treemap_data = df.groupby(['region', 'country', 'type'])['emissions'].sum().reset_index()
    treemap_data = treemap_data[treemap_data['emissions'] > 0]
    
    fig_treemap = px.treemap(
        treemap_data,
        path=['region', 'country', 'type'],
        values='emissions',
        title='Hierarchical Emissions Breakdown: Region → Country → Sector',
        color='emissions',
        color_continuous_scale='RdYlBu_r'
    )
    
    fig_treemap.update_layout(height=700)
    fig_treemap.show()
    print("✅ Hierarchical treemap created")
    
    # 5. Geographic Bubble Map
    if all(col in df.columns for col in ['latitude', 'longitude']):
        geo_data = df.groupby('country').agg({
            'emissions': 'sum',
            'latitude': 'first',
            'longitude': 'first',
            'region': 'first',
            'population': 'first'
        }).reset_index()
        
        fig_geo = px.scatter_geo(
            geo_data,
            lat='latitude',
            lon='longitude',
            size='emissions',
            color='region',
            hover_name='country',
            hover_data={'emissions': ':,.1f', 'population': ':,'},
            title='Global Emissions Distribution',
            size_max=50
        )
        
        fig_geo.update_layout(height=600)
        fig_geo.show()
        print("✅ Geographic bubble map created")
    
    # 6. Multi-panel Dashboard
    fig_dashboard = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Regional Emissions', 'Sector Distribution', 
                       'Top Countries', 'Yearly Trends'),
        specs=[[{"type": "bar"}, {"type": "pie"}],
               [{"type": "bar"}, {"type": "scatter"}]]
    )
    
    # Regional emissions
    regional_totals = df.groupby('region')['emissions'].sum()
    fig_dashboard.add_trace(
        go.Bar(x=regional_totals.index, y=regional_totals.values, name="Regional"),
        row=1, col=1
    )
    
    # Sector pie
    sector_totals = df.groupby('type')['emissions'].sum()
    fig_dashboard.add_trace(
        go.Pie(labels=sector_totals.index, values=sector_totals.values, name="Sectors"),
        row=1, col=2
    )
    
    # Top countries
    top_countries = df.groupby('country')['emissions'].sum().sort_values(ascending=False).head(10)
    fig_dashboard.add_trace(
        go.Bar(x=top_countries.values, y=top_countries.index, 
               orientation='h', name="Top Countries"),
        row=2, col=1
    )
    
    # Yearly trends
    if 'year' in df.columns:
        yearly_totals = df.groupby('year')['emissions'].sum()
        fig_dashboard.add_trace(
            go.Scatter(x=yearly_totals.index, y=yearly_totals.values, 
                      mode='lines+markers', name="Yearly"),
            row=2, col=2
        )
    
    fig_dashboard.update_layout(height=800, showlegend=False, 
                               title_text="Methane Emissions Dashboard")
    fig_dashboard.show()
    print("✅ Multi-panel dashboard created")

test_plotly_advanced(df)

# ## 4. Specialized Visualizations

def test_specialized_visualizations(df):
    """Test specialized and custom visualizations"""
    print(f"\n🎯 TESTING SPECIALIZED VISUALIZATIONS:")
    print("="*45)
    
    # 1. Correlation Heatmap with Clustering
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 2:
        corr_matrix = df[numeric_cols].corr()
        
        # Create a mask for the upper triangle
        mask = np.triu(np.ones_like(corr_matrix))
        
        plt.figure(figsize=(12, 10))
        sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='RdYlBu_r', center=0,
                   square=True, linewidths=.5, cbar_kws={"shrink": .8})
        plt.title('Correlation Matrix of Numerical Variables', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.show()
        print("✅ Correlation heatmap created")
    
    # 2. Ridge Plot for Emissions Distribution by Region
    plt.figure(figsize=(12, 8))
    
    regions = df['region'].unique()
    colors = plt.cm.Set1(np.linspace(0, 1, len(regions)))
    
    for i, region in enumerate(regions):
        region_emissions = df[df['region'] == region]['emissions']
        density = region_emissions.plot.kde()
        plt.fill_between(density.get_xdata(), density.get_ydata() + i, 
                        i, alpha=0.7, color=colors[i], label=region)
    
    plt.title('Emissions Distribution Ridge Plot by Region', fontsize=14, fontweight='bold')
    plt.xlabel('Emissions (Mt CO₂e)')
    plt.ylabel('Region (offset)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    print("✅ Ridge plot created")
    
    # 3. Radar Chart for Regional Characteristics
    if all(col in df.columns for col in ['population', 'gdp_per_capita']):
        regional_stats = df.groupby('region').agg({
            'emissions': 'sum',
            'population': 'sum',
            'gdp_per_capita': 'mean',
            'country': 'nunique'
        })
        
        # Normalize for radar chart
        from sklearn.preprocessing import MinMaxScaler
        scaler = MinMaxScaler()
        regional_stats_norm = pd.DataFrame(
            scaler.fit_transform(regional_stats),
            index=regional_stats.index,
            columns=regional_stats.columns
        )
        
        fig_radar = go.Figure()
        
        for region in regional_stats_norm.index:
            fig_radar.add_trace(go.Scatterpolar(
                r=regional_stats_norm.loc[region].values.tolist() + [regional_stats_norm.loc[region].values[0]],
                theta=list(regional_stats_norm.columns) + [regional_stats_norm.columns[0]],
                fill='toself',
                name=region
            ))
        
        fig_radar.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 1]
                )),
            showlegend=True,
            title="Regional Characteristics Radar Chart",
            height=600
        )
        
        fig_radar.show()
        print("✅ Radar chart created")
    
    # 4. Network Graph for Country-Sector Relationships
    # Create a network showing strongest country-sector relationships
    country_sector = df.groupby(['country', 'type'])['emissions'].sum().reset_index()
    country_sector = country_sector.sort_values('emissions', ascending=False).head(30)
    
    G = nx.Graph()
    
    # Add nodes
    for country in country_sector['country'].unique():
        G.add_node(country, node_type='country', size=20)
    
    for sector in country_sector['type'].unique():
        G.add_node(sector, node_type='sector', size=15)
    
    # Add edges (relationships)
    for _, row in country_sector.iterrows():
        G.add_edge(row['country'], row['type'], weight=row['emissions'])
    
    plt.figure(figsize=(16, 12))
    pos = nx.spring_layout(G, k=3, iterations=50)
    
    # Draw country nodes
    country_nodes = [n for n in G.nodes() if G.nodes[n]['node_type'] == 'country']
    nx.draw_networkx_nodes(G, pos, nodelist=country_nodes, node_color='lightblue', 
                          node_size=800, alpha=0.8)
    
    # Draw sector nodes
    sector_nodes = [n for n in G.nodes() if G.nodes[n]['node_type'] == 'sector']
    nx.draw_networkx_nodes(G, pos, nodelist=sector_nodes, node_color='lightcoral', 
                          node_size=600, alpha=0.8)
    
    # Draw edges with varying thickness
    edges = G.edges(data=True)
    weights = [edge[2]['weight'] for edge in edges]
    nx.draw_networkx_edges(G, pos, width=[w/max(weights)*5 for w in weights], alpha=0.6)
    
    # Draw labels
    nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')
    
    plt.title('Country-Sector Relationship Network', fontsize=16, fontweight='bold')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    print("✅ Network graph created")
    
    # 5. Sankey Diagram for Flow Analysis
    # Create a Sankey diagram showing flow from regions to sectors
    region_sector = df.groupby(['region', 'type'])['emissions'].sum().reset_index()
    region_sector = region_sector[region_sector['emissions'] > 0]
    
    # Prepare data for Sankey
    all_nodes = list(region_sector['region'].unique()) + list(region_sector['type'].unique())
    node_dict = {node: i for i, node in enumerate(all_nodes)}
    
    sources = [node_dict[region] for region in region_sector['region']]
    targets = [node_dict[sector] for sector in region_sector['type']]
    values = region_sector['emissions'].tolist()
    
    fig_sankey = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=all_nodes,
            color=["lightblue"]*len(region_sector['region'].unique()) + 
                  ["lightcoral"]*len(region_sector['type'].unique())
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values
        )
    )])
    
    fig_sankey.update_layout(
        title_text="Emissions Flow: Regions to Sectors",
        font_size=12,
        height=600
    )
    fig_sankey.show()
    print("✅ Sankey diagram created")

test_specialized_visualizations(df)

# ## 5. Statistical Visualization Tests

def test_statistical_plots(df):
    """Test statistical and analytical visualizations"""
    print(f"\n📊 TESTING STATISTICAL VISUALIZATIONS:")
    print("="*45)
    
    # 1. Distribution Comparison with Statistical Tests
    if 'development_level' in df.columns:
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle('Statistical Analysis Visualizations', fontsize=16, fontweight='bold')
        
        # Violin plots
        sns.violinplot(data=df, x='development_level', y='emissions', ax=axes[0,0])
        axes[0,0].set_title('Emissions Distribution by Development Level')
        axes[0,0].set_ylabel('Emissions (Mt CO₂e)')
        
        # Box plots with outliers
        sns.boxplot(data=df, x='region', y='emissions', ax=axes[0,1])
        axes[0,1].set_title('Regional Emissions with Outliers')
        axes[0,1].set_ylabel('Emissions (Mt CO₂e)')
        plt.setp(axes[0,1].get_xticklabels(), rotation=45)
        
        # Q-Q plot for normality testing
        from scipy import stats
        emissions_sample = df['emissions'].dropna().sample(min(1000, len(df))).sort_values()
        stats.probplot(emissions_sample, dist="norm", plot=axes[1,0])
        axes[1,0].set_title('Q-Q Plot: Normality Test for Emissions')
        
        # Residual plot (using a simple linear model)
        if 'population' in df.columns:
            from sklearn.linear_model import LinearRegression
            
            country_data = df.groupby('country').agg({
                'emissions': 'sum',
                'population': 'first'
            }).dropna()
            
            X = country_data[['population']]
            y = country_data['emissions']
            
            model = LinearRegression().fit(X, y)
            predicted = model.predict(X)
            residuals = y - predicted
            
            axes[1,1].scatter(predicted, residuals, alpha=0.6)
            axes[1,1].axhline(y=0, color='red', linestyle='--')
            axes[1,1].set_xlabel('Predicted Emissions')
            axes[1,1].set_ylabel('Residuals')
            axes[1,1].set_title('Residual Plot: Population vs Emissions')
        
        plt.tight_layout()
        plt.show()
        print("✅ Statistical plots created")
    
    # 2. Confidence Intervals Visualization
    if 'year' in df.columns:
        yearly_stats = df.groupby('year')['emissions'].agg(['mean', 'std', 'count']).reset_index()
        yearly_stats['se'] = yearly_stats['std'] / np.sqrt(yearly_stats['count'])
        yearly_stats['ci_lower'] = yearly_stats['mean'] - 1.96 * yearly_stats['se']
        yearly_stats['ci_upper'] = yearly_stats['mean'] + 1.96 * yearly_stats['se']
        
        plt.figure(figsize=(12, 6))
        plt.plot(yearly_stats['year'], yearly_stats['mean'], 'o-', linewidth=2, markersize=8)
        plt.fill_between(yearly_stats['year'], yearly_stats['ci_lower'], yearly_stats['ci_upper'], 
                        alpha=0.3, label='95% Confidence Interval')
        plt.title('Mean Emissions Over Time with Confidence Intervals', fontsize=14, fontweight='bold')
        plt.xlabel('Year')
        plt.ylabel('Mean Emissions (Mt CO₂e)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
        print("✅ Confidence interval plot created")
    
    # 3. Principal Component Analysis Visualization
    numeric_features = df.select_dtypes(include=[np.number]).columns
    if len(numeric_features) > 3:
        from sklearn.decomposition import PCA
        
        # Prepare data
        pca_data = df[numeric_features].dropna()
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(pca_data)
        
        # Perform PCA
        pca = PCA()
        pca_result = pca.fit_transform(scaled_data)
        
        fig, axes = plt.subplots(1, 2, figsize=(15, 6))
        
        # Scree plot
        explained_variance = pca.explained_variance_ratio_
        axes[0].bar(range(1, len(explained_variance) + 1), explained_variance, alpha=0.7)
        axes[0].plot(range(1, len(explained_variance) + 1), explained_variance, 'ro-')
        axes[0].set_xlabel('Principal Component')
        axes[0].set_ylabel('Explained Variance Ratio')
        axes[0].set_title('PCA Scree Plot')
        axes[0].grid(True, alpha=0.3)
        
        # PCA biplot
        if len(pca_result) > 0:
            axes[1].scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.6, s=30)
            axes[1].set_xlabel(f'PC1 ({explained_variance[0]:.1%} variance)')
            axes[1].set_ylabel(f'PC2 ({explained_variance[1]:.1%} variance)')
            axes[1].set_title('PCA Biplot (First Two Components)')
            axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        print("✅ PCA visualization created")

test_statistical_plots(df)

# ## 6. Interactive Dashboard Components

def test_dashboard_components(df):
    """Test interactive dashboard components"""
    print(f"\n🎛️ TESTING DASHBOARD COMPONENTS:")
    print("="*40)
    
    # 1. Filter-responsive visualization
    def create_filtered_viz(region_filter=None, sector_filter=None):
        """Create visualization with filters applied"""
        filtered_df = df.copy()
        
        if region_filter:
            filtered_df = filtered_df[filtered_df['region'].isin(region_filter)]
        if sector_filter:
            filtered_df = filtered_df[filtered_df['type'].isin(sector_filter)]
        
        return filtered_df.groupby('country')['emissions'].sum().sort_values(ascending=False).head(10)
    
    # Test different filter combinations
    print("Testing filter combinations:")
    
    # All regions, Energy sector only
    energy_only = create_filtered_viz(sector_filter=['Energy'])
    print(f"  • Energy sector top emitters: {list(energy_only.head(3).index)}")
    
    # Asia region only
    asia_only = create_filtered_viz(region_filter=['Asia'])
    print(f"  • Asia top emitters: {list(asia_only.head(3).index)}")
    
    # 2. Responsive metric cards
    def calculate_metrics(df_subset):
        """Calculate key metrics for any data subset"""
        return {
            'total_emissions': df_subset['emissions'].sum(),
            'avg_emissions': df_subset['emissions'].mean(),
            'num_countries': df_subset['country'].nunique(),
            'top_sector': df_subset.groupby('type')['emissions'].sum().idxmax()
        }
    
    # Test metric calculations
    full_metrics = calculate_metrics(df)
    asia_metrics = calculate_metrics(df[df['region'] == 'Asia'])
    
    print(f"\nMetric comparison:")
    print(f"  • Global total: {full_metrics['total_emissions']:.0f} Mt")
    print(f"  • Asia total: {asia_metrics['total_emissions']:.0f} Mt")
    print(f"  • Asia percentage: {(asia_metrics['total_emissions']/full_metrics['total_emissions']*100):.1f}%")
    
    # 3. Dynamic chart updating test
    def create_dynamic_chart_data(time_period='all'):
        """Simulate dynamic data for different time periods"""
        if time_period == 'recent' and 'year' in df.columns:
            recent_year = df['year'].max()
            return df[df['year'] == recent_year]
        elif time_period == 'historical' and 'year' in df.columns:
            historical_year = df['year'].min()
            return df[df['year'] == historical_year]
        else:
            return df
    
    # Test dynamic data updates
    recent_data = create_dynamic_chart_data('recent')
    historical_data = create_dynamic_chart_data('historical')
    
    print(f"\nDynamic data test:")
    print(f"  • Recent period records: {len(recent_data)}")
    print(f"  • Historical period records: {len(historical_data)}")
    
    print("✅ Dashboard components tested successfully!")

test_dashboard_components(df)

# ## 7. Performance and Optimization Tests

def test_visualization_performance(df):
    """Test visualization performance and optimization"""
    print(f"\n⚡ TESTING VISUALIZATION PERFORMANCE:")
    print("="*45)
    
    import time
    
    performance_results = {}
    
    # 1. Large dataset handling
    print("Testing large dataset visualization...")
    start_time = time.time()
    
    # Create a large sample
    large_sample = df.sample(n=min(10000, len(df)), replace=True)
    
    plt.figure(figsize=(10, 6))
    plt.scatter(large_sample['emissions'], range(len(large_sample)), 
               alpha=0.3, s=1)
    plt.title(f'Large Dataset Test ({len(large_sample)} points)')
    plt.xlabel('Emissions')
    plt.ylabel('Record Index')
    plt.tight_layout()
    plt.show()
    
    large_dataset_time = time.time() - start_time
    performance_results['large_dataset'] = large_dataset_time
    
    # 2. Complex aggregation performance
    print("Testing complex aggregation performance...")
    start_time = time.time()
    
    complex_agg = df.groupby(['region', 'country', 'type', 'year'])['emissions'].agg([
        'sum', 'mean', 'std', 'count', 'min', 'max'
    ]).reset_index()
    
    aggregation_time = time.time() - start_time
    performance_results['aggregation'] = aggregation_time
    
    # 3. Memory usage test
    import psutil
    import os
    
    process = psutil.Process(os.getpid())
    memory_before = process.memory_info().rss / 1024 / 1024  # MB
    
    # Create memory-intensive visualization
    correlation_matrix = df.select_dtypes(include=[np.number]).corr()
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Memory Usage Test - Correlation Heatmap')
    plt.tight_layout()
    plt.show()
    
    memory_after = process.memory_info().rss / 1024 / 1024  # MB
    memory_used = memory_after - memory_before
    
    performance_results['memory_usage'] = memory_used
    
    # Print performance results
    print(f"\n📊 Performance Results:")
    print(f"  • Large dataset plotting: {large_dataset_time:.2f} seconds")
    print(f"  • Complex aggregation: {aggregation_time:.2f} seconds")
    print(f"  • Memory usage: {memory_used:.1f} MB")
    
    # Performance recommendations
    print(f"\n💡 Performance Recommendations:")
    if large_dataset_time > 2.0:
        print("  • Consider data sampling for large datasets")
    if aggregation_time > 1.0:
        print("  • Consider caching aggregated results")
    if memory_used > 100:
        print("  • Consider chunking data for memory-intensive operations")
    
    print("✅ Performance tests completed!")
    
    return performance_results

performance_results = test_visualization_performance(df)

# ## 8. Summary and Recommendations

def generate_visualization_report(df, performance_results):
    """Generate comprehensive visualization testing report"""
    print(f"\n📋 VISUALIZATION TESTING REPORT:")
    print("="*40)
    
    # Dataset suitability assessment
    data_quality_score = 0
    
    # Check data completeness
    completeness = (df.count() / len(df)).mean()
    if completeness > 0.95:
        data_quality_score += 2
    elif completeness > 0.8:
        data_quality_score += 1
    
    # Check data diversity
    if df['country'].nunique() > 10:
        data_quality_score += 2
    if df['region'].nunique() > 3:
        data_quality_score += 1
    if 'year' in df.columns and df['year'].nunique() > 1:
        data_quality_score += 1
    
    print(f"📊 Data Suitability Score: {data_quality_score}/6")
    
    # Visualization readiness assessment
    viz_readiness = {
        'Geographic visualizations': 'Ready' if 'latitude' in df.columns else 'Needs coordinates',
        'Time series': 'Ready' if 'year' in df.columns else 'Needs temporal data',
        'Comparative analysis': 'Ready' if df['region'].nunique() > 2 else 'Limited',
        'Statistical analysis': 'Ready' if len(df.select_dtypes(include=[np.number]).columns) > 3 else 'Limited'
    }
    
    print(f"\n🎨 Visualization Readiness:")
    for viz_type, status in viz_readiness.items():
        print(f"  • {viz_type}: {status}")
    
    # Performance assessment
    print(f"\n⚡ Performance Assessment:")
    total_time = sum(performance_results.values())
    if total_time < 5:
        perf_rating = "Excellent"
    elif total_time < 10:
        perf_rating = "Good"
    elif total_time < 20:
        perf_rating = "Fair"
    else:
        perf_rating = "Needs optimization"
    
    print(f"  • Overall performance: {perf_rating}")
    print(f"  • Total processing time: {total_time:.2f} seconds")
    
    # Recommended visualizations
    recommended_viz = [
        "Global choropleth maps for geographic distribution",
        "Time series plots for trend analysis",
        "Treemaps for hierarchical sector breakdown",
        "Scatter plots for correlation analysis",
        "Box plots for distribution comparison",
        "Sankey diagrams for flow analysis"
    ]
    
    print(f"\n🎯 Recommended Visualizations:")
    for i, viz in enumerate(recommended_viz, 1):
        print(f"  {i}. {viz}")
    
    # Implementation priorities
    print(f"\n🚀 Implementation Priorities:")
    priorities = [
        "Implement responsive filtering for all charts",
        "Add interactive tooltips and hover information",
        "Create downloadable/exportable chart formats",
        "Optimize performance for large datasets",
        "Add real-time data update capabilities"
    ]
    
    for i, priority in enumerate(priorities, 1):
        print(f"  {i}. {priority}")
    
    return {
        'data_quality_score': data_quality_score,
        'viz_readiness': viz_readiness,
        'performance_rating': perf_rating,
        'recommended_visualizations': recommended_viz
    }

# Generate final report
viz_report = generate_visualization_report(df, performance_results)

print(f"\n✨ VISUALIZATION TESTING COMPLETE!")
print("="*45)
print(f"🎨 All visualization types tested successfully")
print(f"📊 Performance benchmarks established")
print(f"🎯 Implementation roadmap created")
print(f"🚀 Ready for dashboard development!") Sector pie chart
    sector_emissions = df.groupby('type')['emissions'].sum()
    colors = plt.cm.Set3(np.linspace(0, 1, len(sector_emissions)))
    axes[1,0].pie(sector_emissions.values, labels=sector_emissions.index, autopct='%1.1f%%', colors=colors)
    axes[1,0].set_title('Emissions by Sector')
    
    # 4. Scatter plot: Population vs Emissions
    if 'population' in df.columns:
        country_data = df.groupby('country').agg({
            'emissions': 'sum',
            'population': 'first',
            'region': 'first'
        }).reset_index()
        
        for region in country_data['region'].unique():
            region_data = country_data[country_data['region'] == region]
            axes[1,1].scatter(region_data['population'], region_data['emissions'], 
                            label=region, alpha=0.7, s=60)
        
        axes[1,1].set_xlabel('Population (millions)')
        axes[1,1].set_ylabel('Total Emissions (Mt CO₂e)')
        axes[1,1].set_title('Population vs Emissions by Region')
        axes[1,1].legend()
        axes[1,1].grid(True, alpha=0.3)
    
    # 5. Time series plot
    if 'year' in df.columns:
        yearly_emissions = df.groupby(['year', 'type'])['emissions'].sum().reset_index()
        for emission_type in yearly_emissions['type'].unique():
            type_data = yearly_emissions[yearly_emissions['type'] == emission_type]
            axes[2,0].plot(type_data['year'], type_data['emissions'], 
                          marker='o', linewidth=2, label=emission_type)
        
        axes[2,0].set_xlabel('Year')
        axes[2,0].set_ylabel('Emissions (Mt CO₂e)')
        axes[2,0].set_title('Emission Trends by Sector')
        axes[2,0].legend()
        axes[2,0].grid(True, alpha=0.3)
    
    # 6. Box plot by region
    df.boxplot(column='emissions', by='region', ax=axes[2,1])
    axes[2,1].set_title('Emissions Distribution by Region')
    axes[2,1].set_xlabel('Region')
    axes[2,1].set_ylabel('Emissions (Mt CO₂e)')
    plt.setp(axes[2,1].get_xticklabels(), rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Basic plots generated successfully!")

test_basic_plots(df)

# ## 3. Advanced Plotly Visualizations

def test_plotly_advanced(df):
    """Test advanced interactive visualizations with Plotly"""
    print(f"\n🎨 TESTING ADVANCED PLOTLY VISUALIZATIONS:")
    print("="*50)
    
    # 1. 3D Scatter Plot
    if all(col in df.columns for col in ['population', 'gdp_per_capita', 'emissions']):
        country_summary = df.groupby('country').agg({
            'emissions': 'sum',
            'population': 'first',
            'gdp_per_capita': 'first',
            'region': 'first'
        }).reset_index()
        
        fig_3d = px.scatter_3d(
            country_summary,
            x='population',
            y='gdp_per_capita',
            z='emissions',
            color='region',
            size='emissions',
            hover_name='country',
            title='3D Analysis: Population, GDP per Capita, and Emissions',
            labels={
                'population': 'Population (millions)',
                'gdp_per_capita': 'GDP per Capita (USD)',
                'emissions': 'Total Emissions (Mt CO₂e)'
            }
        )
        
        fig_3d.update_layout(height=600)
        fig_3d.show()
        print("✅ 3D scatter plot created")
    
    # 2. Animated Time Series
    if 'year' in df.columns:
        yearly_country_data = df.groupby(['year', 'country']).agg({
            'emissions': 'sum',
            'population': 'first',
            'gdp_per_capita': 'first',
            'region': 'first'
        }).reset_index()
        
        fig_animated = px.scatter(
            yearly_country_data,
            x='gdp_per_capita',
            y='emissions',
            size='population',
            color='region',
            hover_name='country',
            animation_frame='year',
            title='Animated Emissions vs GDP per Capita Over Time',
            labels={
                'gdp_per_capita': 'GDP per Capita (USD)',
                'emissions': 'Total Emissions (Mt CO₂e)'
            }
        )
        
        fig_animated.update_layout(height=600)
        fig_animated.show()
        print("✅ Animated scatter plot created")
    
    # 3.