In [1]:
!pip install folium plotly  # For interactive maps

import pandas as pd
import folium
import plotly.express as px
from IPython.display import display



In [2]:
df = pd.read_csv('gpu_clusters.csv')

# Basic info
print(f"Total clusters: {len(df)}")
print(df['Country'].value_counts().head(10))
print(df['Owner'].value_counts().head(10))

# Filter operational/high-certainty for analysis
df = df[df['Status'] == 'Operational']  # Or 'Existing'
df = df[df['Include in Standard Analysis'] == True]

# Convert performance (example: H100 equivalents to float)
df['H100 equivalents'] = pd.to_numeric(df['H100 equivalents'], errors='coerce')
df.dropna(subset=['H100 equivalents'], inplace=True)

df.head()

Total clusters: 786
Country
China                       253
United States of America    213
Japan                        33
France                       19
Korea (Republic of)          18
Germany                      13
India                        13
Brazil                       11
Saudi Arabia                 11
United Arab Emirates         10
Name: count, dtype: int64
Owner
Google                     41
Meta AI                    25
NVIDIA                     22
Microsoft                  22
Alibaba                    22
US Department of Energy    17
Tesla                      12
Oracle                     11
Sesterce                   10
Amazon                      8
Name: count, dtype: int64


Unnamed: 0,Name,Status,Certainty,Single cluster?,Max OP/s (log),H100 equivalents,Chip type (primary),Chip quantity (primary),Country,Owner,...,Decommissioned Date (if applicable),Largest existing cluster when first operational,% of largest cluster when first operational,Source 1,Source 2,Source 3,Source 4,Source 5,latitude,longitude


In [3]:
# Top owners by capacity
top_owners = df.groupby('Owner')['H100 equivalents'].sum().sort_values(ascending=False).head(10)
fig = px.bar(top_owners, title="Top 10 AI Cluster Owners by H100 Equivalents (2025)")
fig.show()

# Top countries
top_countries = df.groupby('Country')['H100 equivalents'].sum().sort_values(ascending=False)
fig2 = px.pie(top_countries, title="Global AI Compute Capacity by Country")
fig2.show()

In [4]:
# Hardcode known lat/long for major clusters (real 2025 examples from public reports)
locations = {
    'xAI Colossus Memphis': (35.1495, -90.0490),  # Memphis, TN
    'Meta GenAI Iowa': (41.5868, -93.6250),       # Des Moines area
    'Microsoft Iowa': (41.5868, -93.6250),
    'Oracle OCI Supercluster': (37.7749, -122.4194),  # Example US West
    'Northern Virginia Hyperscalers': (38.9072, -77.0369),  # Ashburn hotspot
    # Add more if you want from Location column
}

# Create base map
m = folium.Map(location=[20, 0], zoom_start=2, tiles='CartoDB positron')

# Add points (size by capacity if matched, else default)
for cluster, coord in locations.items():
    folium.CircleMarker(
        location=coord,
        radius=10,  # Scale by capacity later if more data
        popup=cluster,
        color='crimson',
        fill=True,
        fill_color='red'
    ).add_to(m)

# Bonus: Cluster markers for density
from folium.plugins import MarkerCluster
marker_cluster = MarkerCluster().add_to(m)
for cluster, coord in locations.items():
    folium.Marker(coord, popup=cluster).add_to(marker_cluster)

m  # Displays the map

# 2025 AI Infrastructure Geospatial Analysis

## Overview
Analysis of global GPU clusters powering AI (training/inference) using Epoch AI's 2025 dataset. Visualizes capacity hotspots, owner dominance, and key locations.

Data Source: https://epoch.ai/data/gpu-clusters (Dec 2025 update)

## Key Findings
- US dominates (~70-80% capacity), with private sector leading.
- Hotspots: Northern Virginia (hyperscalers), Iowa/Memphis for new builds.
- Growth: Rapid private expansions (xAI, Meta, Microsoft).

## Interactive Map
Points show major known AI data center/GPU cluster locations—click for details.