# GenomeExplorer: Interactive Genome Analysis

This notebook demonstrates how to use the GenomeExplorer API to analyze NEAT genomes stored in the database.

## Prerequisites
- Run `test_full_lifecycle.py` to populate the database with example data
- Or run your own experiments with database integration enabled

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from explaneat.db import db
from explaneat.analysis import GenomeExplorer

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

# Initialize database
db.init_db()
print("Database initialized!")

## 1. List Available Experiments

In [None]:
# Get list of all experiments
experiments_df = GenomeExplorer.list_experiments()

# Display experiments
print(f"Found {len(experiments_df)} experiments:\n")
experiments_df[['experiment_id', 'name', 'status', 'generations', 'best_fitness', 'created_at']]

## 2. Load Best Genome from an Experiment

In [None]:
# Select the most recent completed experiment
completed_experiments = experiments_df[experiments_df['status'] == 'completed']

if completed_experiments.empty:
    print("No completed experiments found!")
else:
    experiment_id = completed_experiments.iloc[0]['experiment_id']
    experiment_name = completed_experiments.iloc[0]['name']
    
    print(f"Loading best genome from: {experiment_name}")
    print(f"Experiment ID: {experiment_id}")
    
    # Create GenomeExplorer instance
    explorer = GenomeExplorer.load_best_genome(experiment_id)

## 3. Genome Summary

In [None]:
# Display comprehensive summary
explorer.summary()

## 4. Network Visualization

In [None]:
# Visualize the network structure
explorer.show_network(figsize=(12, 8), layout='hierarchical')

In [None]:
# Analyze node properties
explorer.visualizer.plot_node_properties()

In [None]:
# Analyze connection properties
explorer.visualizer.plot_connection_properties()

## 5. Ancestry Analysis

In [None]:
# Get ancestry tree
ancestry_df = explorer.get_ancestry_tree(max_generations=10)

print(f"Found {len(ancestry_df)} ancestors")
if not ancestry_df.empty:
    print("\nAncestry tree:")
    ancestry_df[['neat_genome_id', 'generation', 'fitness', 'num_nodes', 'num_connections']]

In [None]:
# Get lineage statistics
lineage_stats = explorer.ancestry_analyzer.get_lineage_statistics()

if lineage_stats:
    print("Lineage Statistics:")
    print(f"  Length: {lineage_stats['lineage_length']} generations")
    
    fp = lineage_stats['fitness_progression']
    print(f"\nFitness Progression:")
    print(f"  Trend: {fp['fitness_trend']}")
    print(f"  Initial: {fp['initial_fitness']:.3f}")
    print(f"  Final: {fp['final_fitness']:.3f}")
    print(f"  Best: {fp['best_fitness']:.3f}")
    print(f"  Average: {fp['average_fitness']:.3f}")
    
    cp = lineage_stats['complexity_progression']
    print(f"\nComplexity Progression:")
    print(f"  Trend: {cp['complexity_trend']}")
    print(f"  Nodes: {cp['initial_nodes']} → {cp['final_nodes']} (max: {cp['max_nodes']})")
    print(f"  Connections: {cp['initial_connections']} → {cp['final_connections']} (max: {cp['max_connections']})")

In [None]:
# Plot ancestry fitness progression
if len(ancestry_df) > 1:
    explorer.plot_ancestry_fitness()

## 6. Gene Origins Analysis

In [None]:
# Trace when genes were introduced
gene_origins_df = explorer.trace_gene_origins()

if not gene_origins_df.empty:
    print(f"Analyzed {len(gene_origins_df)} genes\n")
    
    # Summary by generation
    origin_summary = gene_origins_df.groupby(['gene_type', 'origin_generation']).size().unstack(fill_value=0)
    print("Genes introduced by generation:")
    origin_summary

In [None]:
# Show newest genes
if not gene_origins_df.empty:
    newest_genes = gene_origins_df.nlargest(5, 'origin_generation')
    print("5 Most Recently Added Genes:")
    for _, gene in newest_genes.iterrows():
        print(f"  {gene['gene_type']}: {gene['gene_id']} (gen {gene['origin_generation']})")

## 7. Training Performance

In [None]:
# Plot training metrics if available
if explorer.genome_info.training_metrics:
    explorer.plot_training_metrics()
    
    # Create DataFrame for analysis
    metrics_df = pd.DataFrame(explorer.genome_info.training_metrics)
    
    print("\nTraining Summary:")
    print(f"  Final loss: {metrics_df['loss'].iloc[-1]:.4f}")
    print(f"  Final accuracy: {metrics_df['accuracy'].iloc[-1]:.4f}")
    print(f"  Loss reduction: {metrics_df['loss'].iloc[0] - metrics_df['loss'].iloc[-1]:.4f}")
else:
    print("No training metrics available")

## 8. Performance Context

In [None]:
# Get performance context within population and experiment
context = explorer.get_performance_context()

print("Performance Context:")
print(f"  Rank in generation: {context['generation_rank']}/{context['generation_size']}")
print(f"  Generation best fitness: {context['generation_best']:.3f}")
print(f"  Generation mean fitness: {context['generation_mean']:.3f} (±{context['generation_std']:.3f})")
print(f"  Is best in generation: {context['is_generation_best']}")
print(f"  Experiment best fitness: {context['experiment_best_fitness']:.3f}")
print(f"  Total experiment generations: {context['experiment_generations']}")

## 9. Compare with Ancestors

In [None]:
# Compare with oldest ancestor if available
if len(ancestry_df) > 1:
    oldest_generation = ancestry_df['generation'].min()
    comparison = explorer.compare_with_ancestor(oldest_generation)
    
    if 'error' not in comparison:
        print(f"Comparison with ancestor from generation {oldest_generation}:")
        print(f"\nFitness change: {comparison['fitness_change']:.3f}")
        
        sc = comparison['structure_changes']
        print(f"\nStructure changes:")
        print(f"  Nodes: +{sc['nodes_added']} / -{sc['nodes_removed']}")
        print(f"  Connections: +{sc['connections_added']} / -{sc['connections_removed']}")
        
        if sc['added_nodes']:
            print(f"  Added nodes: {sc['added_nodes']}")
        if sc['added_connections']:
            print(f"  Added connections: {sc['added_connections'][:5]}..." if len(sc['added_connections']) > 5 else f"  Added connections: {sc['added_connections']}")
        
        pc = comparison['parameter_changes']
        print(f"\nParameter changes:")
        print(f"  Average weight change: {pc['avg_weight_change']:.3f}")
        print(f"  Average bias change: {pc['avg_bias_change']:.3f}")
else:
    print("No ancestors available for comparison")

## 10. Export Data for External Analysis

In [None]:
# Export all genome data
export_data = explorer.export_genome_data()

print(f"Exported data contains {len(export_data)} sections:")
for key in export_data.keys():
    if isinstance(export_data[key], list):
        print(f"  - {key}: {len(export_data[key])} items")
    elif isinstance(export_data[key], dict):
        print(f"  - {key}: {len(export_data[key])} fields")
    else:
        print(f"  - {key}")

# Example: Access specific data
print(f"\nGenome fitness: {export_data['genome_info']['fitness']:.3f}")
print(f"Number of nodes: {len(export_data['neat_genome_nodes'])}")
print(f"Number of connections: {len(export_data['neat_genome_connections'])}")

## 11. Interactive Exploration

Now you can explore the genome interactively using the `explorer` object:

In [None]:
# Example: Get specific node information
node_id = 0  # Output node
if node_id in explorer.neat_genome.nodes:
    node = explorer.neat_genome.nodes[node_id]
    print(f"Node {node_id} properties:")
    print(f"  Bias: {node.bias:.3f}")
    print(f"  Response: {node.response:.3f}")
    print(f"  Activation: {node.activation}")
    print(f"  Aggregation: {node.aggregation}")

In [None]:
# Example: List all connections
print("Network connections:")
for conn_key, conn in explorer.neat_genome.connections.items():
    if conn.enabled:
        print(f"  {conn_key[0]} → {conn_key[1]}: weight={conn.weight:.3f}")

## Summary

This notebook demonstrated how to:
1. Load genomes from the database
2. Analyze network structure and properties
3. Trace evolutionary ancestry
4. Identify when genes were introduced
5. Compare genomes across generations
6. Export data for external analysis

The `GenomeExplorer` class provides a comprehensive API for understanding how NEAT genomes evolve and what makes them successful.