# 2025 IWRC Seed Fund Fact Sheet Visualizations

This notebook creates visualizations for page 4 of the 2025 fact sheet:
1. Pie chart of research keywords from columns O and P
2. Map of Illinois showing funded institutions

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import numpy as np

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [None]:
# Load the 2025 data
df = pd.read_excel('fact sheet data.xlsx', sheet_name='2025 data')
print(f"Loaded {len(df)} projects from 2025 data")
print(f"Columns: {df.shape[1]}")
df.head()

## Part 1: Research Keywords Pie Chart

Analyzing keywords from columns O (Keyword 2) and P (Keyword 3)

In [None]:
# Combine keywords from columns O and P
keyword2 = df['Keyword 2'].dropna().tolist()
keyword3 = df['Keyword 3'].dropna().tolist()

all_keywords = keyword2 + keyword3

print(f"Total keywords: {len(all_keywords)}")
print(f"From Keyword 2: {len(keyword2)}")
print(f"From Keyword 3: {len(keyword3)}")

# Count keyword frequencies
keyword_counts = Counter(all_keywords)
print(f"\nUnique keywords: {len(keyword_counts)}")
print("\nTop 10 keywords:")
for keyword, count in keyword_counts.most_common(10):
    print(f"  {keyword}: {count}")

In [None]:
# Prepare data for pie chart
# Group smaller categories together for cleaner visualization
sorted_keywords = keyword_counts.most_common()

# Take top 10 keywords and group the rest as "Other"
top_n = 10
top_keywords = dict(sorted_keywords[:top_n])
other_count = sum(count for _, count in sorted_keywords[top_n:])

if other_count > 0:
    top_keywords['Other'] = other_count

# Create pie chart
fig, ax = plt.subplots(figsize=(12, 8))

labels = list(top_keywords.keys())
sizes = list(top_keywords.values())
colors = sns.color_palette('husl', len(labels))

# Create the pie chart
wedges, texts, autotexts = ax.pie(
    sizes, 
    labels=labels, 
    colors=colors,
    autopct='%1.1f%%',
    startangle=90,
    textprops={'fontsize': 11, 'weight': 'bold'}
)

# Make percentage text white for better visibility
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontsize(10)

ax.set_title('2025 IWRC Seed Fund Projects\nResearch Topic Distribution', 
             fontsize=16, weight='bold', pad=20)

plt.tight_layout()
plt.savefig('2025_keyword_pie_chart.png', dpi=300, bbox_inches='tight')
print("Saved: 2025_keyword_pie_chart.png")
plt.show()

## Part 2: Illinois Institutions Map

Creating a map showing funded institutions across Illinois

In [None]:
# Analyze institutions
institutions = df['Institution'].value_counts()
print("Institutions funded in 2025:")
print(institutions)
print(f"\nTotal institutions: {len(institutions)}")
print(f"Total projects: {institutions.sum()}")

In [None]:
# Clean up institution names and get city information
institution_data = df.groupby(['Institution', 'City']).size().reset_index(name='Project Count')

# Also calculate total funding per institution
funding_by_institution = df.groupby('Institution')['Award Amount'].sum().reset_index()
funding_by_institution.columns = ['Institution', 'Total Funding']

# Merge the data
institution_summary = institution_data.merge(funding_by_institution, on='Institution')

print("Institution Summary:")
print(institution_summary.to_string(index=False))

In [None]:
# Create coordinate mapping for Illinois institutions
# Note: These are approximate coordinates for each city/institution
coordinates = {
    'Champaign': (40.1164, -88.2434),
    'Urbana': (40.1106, -88.2073),
    'Carbondale': (37.7272, -89.2167),
    'Normal': (40.5142, -88.9906),
    'Chicago': (41.8781, -87.6298),
    'Charleston': (39.4961, -88.1781),
    'Evanston': (42.0451, -87.6877),
    'Godfrey': (38.9556, -90.1868),
    'Edwardsville': (38.8114, -89.9531)
}

# Add coordinates to institution data
institution_summary['Latitude'] = institution_summary['City'].map(lambda x: coordinates.get(x, (40.6331, -89.3985))[0])
institution_summary['Longitude'] = institution_summary['City'].map(lambda x: coordinates.get(x, (40.6331, -89.3985))[1])

print("\nInstitution data with coordinates:")
print(institution_summary[['Institution', 'City', 'Project Count', 'Latitude', 'Longitude']].to_string(index=False))

In [None]:
# Create Illinois map with actual state boundaries using geopandas and plotly
import json
import geopandas as gpd
from shapely.geometry import shape
import matplotlib.patches as mpatches

# Load the GeoJSON data and filter for Illinois (FIPS code starts with 17)
with open('illinois_counties.json', 'r') as f:
    geojson_data = json.load(f)

# Filter for Illinois counties (FIPS codes starting with 17)
illinois_features = [f for f in geojson_data['features'] if f['id'].startswith('17')]

# Create a simplified GeoJSON with just Illinois
illinois_geojson = {
    'type': 'FeatureCollection',
    'features': illinois_features
}

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(illinois_geojson['features'])
gdf = gdf.set_crs('EPSG:4326')  # WGS84 coordinate system

# Create the map
fig, ax = plt.subplots(figsize=(12, 16))

# Plot Illinois counties
gdf.boundary.plot(ax=ax, linewidth=0.5, edgecolor='gray', alpha=0.5)
gdf.plot(ax=ax, color='#E8F4F8', edgecolor='#666666', linewidth=0.8, alpha=0.6)

# Plot institutions
scatter = ax.scatter(
    institution_summary['Longitude'],
    institution_summary['Latitude'],
    s=institution_summary['Project Count'] * 150,  # Size based on project count
    c=institution_summary['Project Count'],
    cmap='YlOrRd',
    alpha=0.9,
    edgecolors='black',
    linewidth=2.5,
    zorder=5
)

# Add institution labels
for idx, row in institution_summary.iterrows():
    # Shorten institution names for display
    short_name = row['Institution'].replace('University of Illinois Urbana-Champaign', 'UIUC')
    short_name = short_name.replace('Southern Illinois University', 'SIU')
    short_name = short_name.replace('Illinois State University', 'ISU')
    short_name = short_name.replace('Illinois Institute of Technology', 'IIT')
    short_name = short_name.replace('University of Illinois Chicago', 'UIC')
    short_name = short_name.replace('University of Illinois', 'U of I')
    short_name = short_name.replace('Southern Illinois University Carbondale', 'SIU Carbondale')
    short_name = short_name.replace('Eastern Illinois University', 'EIU')
    short_name = short_name.replace('Northwestern University', 'Northwestern')
    short_name = short_name.replace('Lewis and Clark Community College', 'Lewis & Clark CC')
    
    # Special handling for entries with issues
    if 'James Angel' in short_name:
        short_name = 'UIUC'
    if 'Not for profit' in short_name:
        short_name = 'Non-profit Org'
    
    ax.annotate(
        f"{short_name}\n({row['Project Count']} project{'s' if row['Project Count'] > 1 else ''})",
        xy=(row['Longitude'], row['Latitude']),
        xytext=(12, 12),
        textcoords='offset points',
        fontsize=9,
        weight='bold',
        bbox=dict(boxstyle='round,pad=0.6', facecolor='white', edgecolor='#333333', alpha=0.9, linewidth=1.5),
        zorder=6,
        ha='left'
    )

# Add colorbar
cbar = plt.colorbar(scatter, ax=ax, pad=0.02, shrink=0.8)
cbar.set_label('Number of Projects', rotation=270, labelpad=25, fontsize=13, weight='bold')
cbar.ax.tick_params(labelsize=11)

# Labels and title
ax.set_xlabel('Longitude', fontsize=13, weight='bold')
ax.set_ylabel('Latitude', fontsize=13, weight='bold')
ax.set_title('2025 IWRC Seed Fund\nFunded Institutions Across Illinois', 
             fontsize=18, weight='bold', pad=20)

# Remove axes for cleaner look (optional)
# ax.set_xticks([])
# ax.set_yticks([])
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)
# ax.spines['left'].set_visible(False)

plt.tight_layout()
plt.savefig('2025_illinois_institutions_map.png', dpi=300, bbox_inches='tight', facecolor='white')
print("Saved: 2025_illinois_institutions_map.png (with actual Illinois map)")
plt.show()

## Summary Statistics

In [None]:
print("=" * 60)
print("2025 IWRC SEED FUND SUMMARY")
print("=" * 60)
print(f"\nTotal Projects: {len(df)}")
print(f"Total Institutions: {df['Institution'].nunique()}")
print(f"Total Funding: ${df['Award Amount'].sum():,.2f}")
print(f"Average Award: ${df['Award Amount'].mean():,.2f}")
print(f"\nResearch Topics Covered: {len(keyword_counts)}")
print(f"Total Keyword Mentions: {len(all_keywords)}")
print("\nTop 5 Research Topics:")
for i, (keyword, count) in enumerate(keyword_counts.most_common(5), 1):
    print(f"  {i}. {keyword}: {count} mentions")
print("\nTop 3 Institutions by Project Count:")
for i, (inst, count) in enumerate(institutions.head(3).items(), 1):
    print(f"  {i}. {inst}: {count} projects")
print("=" * 60)

## Part 3: Interactive Visualizations using Plotly

Creating interactive versions of both the pie chart and Illinois map