In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import FancyArrowPatch
from ipywidgets import interact, Dropdown, IntSlider

# Load the CSV file
df = pd.read_csv('references.csv')

# Parse birth year from the 'birth_death' column, handling negative years
df['birth_year'] = df['birth_death'].str.extract(r'(-?\d{3,4})').astype(float)

# Define the function to plot references interactively
def plot_references(philosopher, top_references=10, top_referenced_by=10, threshold=1):
    # Clear and create new figure in one step
    plt.close('all')  # Close all existing figures
    fig, ax = plt.subplots(figsize=(16, 12), dpi=150)

    # Get philosophers referenced the most by the selected philosopher
    df_philosopher = df[df['author_of_book'] == philosopher]
    top_referenced = df_philosopher['full_author_referenced'].value_counts().head(top_references).index

    # Get philosophers who referenced the selected philosopher the most
    top_referenced_by_philosophers = df[df['full_author_referenced'] == philosopher]['author_of_book'].value_counts().head(top_referenced_by).index

    # Create a display set including top referenced and top referencing philosophers
    display_philosophers = list(set(top_referenced).union(set(top_referenced_by_philosophers), {philosopher}))

    # Filter DataFrame to include only rows where both authors and references are in the display set
    df_display = df[(df['author_of_book'].isin(display_philosophers)) & (df['full_author_referenced'].isin(display_philosophers))]

    # Count references for bubble sizes (only for references among the selected philosophers)
    restricted_referenced_counts = df_display['full_author_referenced'].value_counts()
    reference_counts = df['author_of_book'].value_counts()

    # Generate coordinates with x-axis as birth year and y-axis as number of references made
    coordinates = {}
    for philosopher_name in display_philosophers:
        philosopher_rows = df[df['author_of_book'] == philosopher_name]
        if not philosopher_rows.empty:
            birth_year = philosopher_rows['birth_year'].iloc[0]
            num_references_made = reference_counts.get(philosopher_name, 0)
            if pd.notna(birth_year) and np.isfinite(num_references_made):
                coordinates[philosopher_name] = (birth_year, num_references_made)

    # Plot each philosopher's point and label it with the first name only
    for philosopher_name, (x, y) in coordinates.items():
        first_name = philosopher_name.split()[0].rstrip(',')
        
        # Plot the primary point
        ax.scatter(x, y, s=100, c='skyblue', edgecolor='k', alpha=0.7)
        ax.text(x + 5, y, first_name, fontsize=10)

        # Plot the bubble based on the number of times they were referenced by other top philosophers
        if philosopher_name in restricted_referenced_counts:
            bubble_size = restricted_referenced_counts[philosopher_name] * 30
            ax.scatter(x, y, s=bubble_size, c='skyblue', alpha=0.3, edgecolor='none')  # Translucent bubble

    # Track unique connections to avoid multiple arrows for the same connection
    unique_connections = set()

    # Draw curved arrows for each unique reference from one philosopher to another if the count is above threshold
    reference_counts_pairs = df_display.groupby(['author_of_book', 'full_author_referenced']).size()
    for (source, target), count in reference_counts_pairs.items():
        if source != target and count >= threshold and (source, target) not in unique_connections:
            unique_connections.add((source, target))

            # Ensure both philosophers have coordinates
            if source in coordinates and target in coordinates:
                source_coord = coordinates[source]
                target_coord = coordinates[target]

                # Create a curved arrow with FancyArrowPatch
                arrow = FancyArrowPatch(
                    posA=source_coord, posB=target_coord,
                    connectionstyle="arc3,rad=0.2",  # Adds slight curve
                    arrowstyle="->,head_length=0.4,head_width=0.2",  # Larger arrowhead
                    color='gray',
                    alpha=0.5,
                    linewidth=1
                )
                ax.add_patch(arrow)

    # Set axis labels and remove ticks for a cleaner look
    ax.tick_params(axis='both', labelsize=10)
    ax.set_xlabel("Birth Year")
    ax.set_ylabel("Number of References Made")
    ax.set_title(f"Top References for {philosopher}")
    
    plt.tight_layout()
    return fig

# Create dropdown and sliders
philosopher_options = df['author_of_book'].unique()
interact(
    plot_references,
    philosopher=Dropdown(options=philosopher_options, description="Philosopher"),
    top_references=IntSlider(value=10, min=1, max=20, step=1, description="Top Referenced"),
    top_referenced_by=IntSlider(value=10, min=1, max=20, step=1, description="Top Referenced By"),
    threshold=IntSlider(value=1, min=1, max=10, step=1, description="Threshold")
)

interactive(children=(Dropdown(description='Philosopher', options=('Buchanan', 'Higginson, Thomas Wentworth', …

<function __main__.plot_references(philosopher, top_references=10, top_referenced_by=10, threshold=1)>