## What do my notes look like?

Because your lab workbench and your notes have been using wikilinks to tie together thoughts and observations (right? You've been doing that? Yes?) there will be emergent structures in your thinking that you might not have spotted yet.

That is to say, we network our thinking, stitching together thoughts and ideas. This notebook uses what you've explored this week (via python) to visualize and quickly analyze the structure of your workbench. The first cell defines all of the functions we'll use; the second cell executes those functions against the [[wikilink-index.json]] file that stores the information about how your notes interlink. (You can find that json file in the top-level of your workbench folder. If it's not there, go to View -> Command Pallette -> PKM: Build/Rebuild Wikilink Index to create it.)

In [None]:
import json
import matplotlib.pyplot as plt
import networkx as nx
import re

def load_wikilink_data(filepath):
    """Load the wikilink JSON file."""
    with open(filepath, 'r') as f:
        return json.load(f)

def clean_filename(filename):
    """Remove file extensions and clean up filenames for display."""
    # Remove common file extensions
    filename = re.sub(r'\.(md|ipynb|json|csv|py)$', '', filename)
    # Remove directory paths
    filename = filename.split('/')[-1]
    return filename

def filter_checkpoints_and_clean(data):
    """Filter out checkpoint files and clean the data."""
    links = data.get('links', {})
    filtered_links = {}
    
    for source, targets in links.items():
        # Skip checkpoint files
        if 'checkpoint' in source.lower():
            continue
            
        clean_source = clean_filename(source)
        clean_targets = []
        
        # Handle both list and dict formats for targets
        target_list = targets if isinstance(targets, list) else list(targets.keys())
        
        for target in target_list:
            # Skip checkpoint files and complex references
            if ('checkpoint' not in target.lower() and 
                not target.startswith('`') and  # Skip code snippets
                not target.startswith('http') and  # Skip URLs
                '#' not in target):  # Skip specific cell/section references for cleaner view
                clean_targets.append(clean_filename(target))
        
        if clean_targets:  # Only add if there are valid targets
            filtered_links[clean_source] = clean_targets
    
    return filtered_links

def create_graph(filtered_links):
    """Create a NetworkX graph from the filtered wikilink data."""
    G = nx.Graph()
    
    for source, targets in filtered_links.items():
        for target in targets:
            if source != target:  # Avoid self-loops
                G.add_edge(source, target)
    
    return G

def show_graph_info(G):
    """Print basic information about the graph."""
    print(f"📊 Graph Statistics:")
    print(f"   Nodes (notes): {len(G.nodes())}")
    print(f"   Edges (connections): {len(G.edges())}")
    
    if len(G.nodes()) > 0:
        avg_connections = sum(dict(G.degree()).values()) / len(G.nodes())
        print(f"   Average connections per note: {avg_connections:.1f}")
        
        # Find most connected notes
        degrees = dict(G.degree())
        most_connected = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:5]
        print(f"\n🔗 Most connected notes:")
        for note, connections in most_connected:
            print(f"   {note}: {connections} connections")

def plot_graph(G, filtered_links):
    """Create a clean visualization of the graph."""
    fig, ax = plt.subplots(figsize=(14, 10))
    
    if len(G.nodes()) == 0:
        ax.text(0.5, 0.5, 'No valid links found\n(Try creating some [[wikilinks]] in your notes!)', 
                ha='center', va='center', fontsize=16)
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
    else:
        # Use spring layout for better node distribution
        pos = nx.spring_layout(G, k=1, iterations=50)
        
        # Calculate node sizes based on degree (number of connections)
        degrees = dict(G.degree())
        node_sizes = [max(300, degrees[node] * 100) for node in G.nodes()]
        
        # Create color map based on degree
        node_colors = [degrees[node] for node in G.nodes()]
        
        # Draw the graph
        nodes = nx.draw_networkx_nodes(G, pos, 
                                      node_size=node_sizes,
                                      node_color=node_colors,
                                      cmap=plt.cm.viridis,
                                      alpha=0.8,
                                      ax=ax)
        
        nx.draw_networkx_edges(G, pos, 
                              alpha=0.5, 
                              edge_color='gray',
                              width=1,
                              ax=ax)
        
        # Add labels with better positioning
        labels = {node: node for node in G.nodes()}
        nx.draw_networkx_labels(G, pos, labels, 
                               font_size=8, 
                               font_weight='bold',
                               font_color='black',
                               ax=ax)
        
        # Add a color bar for node colors
        if len(set(node_colors)) > 1:  # Only add colorbar if there's variation
            plt.colorbar(nodes, ax=ax, label='Number of connections', shrink=0.8)
    
    ax.set_title('Your Personal Knowledge Network 🧠\n(Node size = number of connections)', 
                 fontsize=16, fontweight='bold', pad=20)
    ax.axis('off')
    
    plt.tight_layout()
    plt.show()

def analyze_knowledge_network(filtered_links):
    """Provide insights about the knowledge network structure."""
    print(f"\n🎯 Knowledge Network Insights:")
    
    # Count different types of connections
    total_connections = sum(len(targets) for targets in filtered_links.values())
    print(f"   Total wikilink connections: {total_connections}")
    
    # Find notes that are referenced but don't exist yet (red links)
    all_sources = set(filtered_links.keys())
    all_targets = set()
    for targets in filtered_links.values():
        all_targets.update(targets)
    
    missing_notes = all_targets - all_sources
    if missing_notes:
        print(f"   📝 Notes referenced but not created yet: {len(missing_notes)}")
        print(f"      Examples: {', '.join(list(missing_notes)[:10])}")
    
    # Suggest connections
    if len(filtered_links) > 0:
        print(f"\n💡 Tips for building your knowledge network:")
        print(f"   • Create the missing notes to strengthen connections")
        print(f"   • Look for opportunities to link related concepts")
        print(f"   • Most connected notes might be good 'hub' pages")

## Now let's do it!

Run the next cell. Which notes are most important? Why? What does that tell you about your own thinking?

In [None]:
# Load and process the data
data = load_wikilink_data('../wikilink-index.json')
filtered_links = filter_checkpoints_and_clean(data)

# Create the graph
graph = create_graph(filtered_links)

# Show statistics and insights
show_graph_info(graph)
analyze_knowledge_network(filtered_links)

# Create the visualization
plot_graph(graph, filtered_links)