In [1]:
# Data Manipulation & Analysis
import pandas as pd
import numpy as np

# Visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Display & Styling
from IPython.display import HTML, display
import warnings
warnings.filterwarnings('ignore')

# Configure matplotlib and seaborn styling
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Configure plotly defaults
px.defaults.template = "plotly_white"

print("‚úì All libraries imported successfully!")

‚úì All libraries imported successfully!


## 1Ô∏è‚É£ Import Required Libraries

# ü¶Ñ Global Unicorn Companies Analysis Dashboard
## Comprehensive Analysis of 1,328+ Private Companies Valued at $1B+

**Objective**: Analyze the unicorn ecosystem globally, understanding market concentration, geographic distribution, industry trends, and key player insights.

**Dataset**: 1,328 private companies valued at $1 billion or more across 30+ countries
**Total Market Value**: $5.81 Trillion USD
**Last Updated**: January 2026

## 2Ô∏è‚É£ Load and Prepare Data

In [2]:
# Load the dataset
df = pd.read_csv(r'd:\Work\2025\product\approachable.dev\repo code\approachable-cohort-samples\data analysis\unicorn.csv')

# Display initial data info
print("üìä Dataset Overview:")
print(f"   ‚Ä¢ Total Companies: {len(df):,}")
print(f"   ‚Ä¢ Columns: {len(df.columns)}")
print(f"   ‚Ä¢ Column Names: {', '.join(df.columns.tolist())}")
print(f"\nüìç Missing Values:\n{df.isnull().sum()}")

# Display first few rows
print("\n‚ú® Sample Data:")
df.head()

üìä Dataset Overview:
   ‚Ä¢ Total Companies: 1,328
   ‚Ä¢ Columns: 7
   ‚Ä¢ Column Names: company, valuation ($B), date joined, country, city, industry, select investors

üìç Missing Values:
company              0
valuation ($B)       0
date joined          0
country              2
city                23
industry             0
select investors    34
dtype: int64

‚ú® Sample Data:


Unnamed: 0,company,valuation ($B),date joined,country,city,industry,select investors
0,OpenAI,$500,7/22/2019,United States,San Francisco,Enterprise Tech,"Khosla Ventures, Thrive Capital, Sequoia Capital"
1,ByteDance,$480/td>,04-07-2017,China,Beijing,Media & Entertainment,"Sequoia Capital China, SIG Asia Investments, S..."
2,SpaceX,$400,12-01-2012,United States,Hawthorne,Industrials,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
3,Anthropic,$350,02-03-2023,United States,San Francisco,Enterprise Tech,Google
4,Databricks,$100,02-05-2019,United States,San Francisco,Enterprise Tech,"Andreessen Horowitz, New Enterprise Associates..."


In [3]:
# Clean and prepare the data
# Extract numeric valuation values
df['valuation_numeric'] = df['valuation ($B)'].str.replace('$', '', regex=False).str.replace('/td>', '', regex=False).astype(float)

# Extract year from date joined
df['date joined'] = pd.to_datetime(df['date joined'], errors='coerce')
df['year_joined'] = df['date joined'].dt.year

# Create valuation buckets
def categorize_valuation(val):
    if val <= 5:
        return '$1-5B'
    elif val <= 10:
        return '$5-10B'
    elif val <= 50:
        return '$10-50B'
    elif val <= 100:
        return '$50-100B'
    else:
        return '$100B+'

df['valuation_bucket'] = df['valuation_numeric'].apply(categorize_valuation)

print("‚úì Data cleaning completed!")
print(f"\nüìà Valuation Statistics (in $Billions):")
print(f"   ‚Ä¢ Total Market Value: ${df['valuation_numeric'].sum():,.0f}B")
print(f"   ‚Ä¢ Average: ${df['valuation_numeric'].mean():,.2f}B")
print(f"   ‚Ä¢ Median: ${df['valuation_numeric'].median():,.2f}B")
print(f"   ‚Ä¢ Min: ${df['valuation_numeric'].min():,.2f}B")
print(f"   ‚Ä¢ Max: ${df['valuation_numeric'].max():,.2f}B")

‚úì Data cleaning completed!

üìà Valuation Statistics (in $Billions):
   ‚Ä¢ Total Market Value: $5,813B
   ‚Ä¢ Average: $4.38B
   ‚Ä¢ Median: $1.60B
   ‚Ä¢ Min: $1.00B
   ‚Ä¢ Max: $500.00B


## 3Ô∏è‚É£ Statistical Summary and Insights

In [4]:
# Create comprehensive summary statistics
summary_stats = {
    'Metric': [
        'Total Companies',
        'Total Market Value',
        'Average Valuation',
        'Median Valuation',
        'Countries Represented',
        'Industries Covered',
        'Most Common Industry',
        'Most Common Country'
    ],
    'Value': [
        f"{len(df):,}",
        f"${df['valuation_numeric'].sum():,.0f}B",
        f"${df['valuation_numeric'].mean():,.2f}B",
        f"${df['valuation_numeric'].median():,.2f}B",
        f"{df['country'].nunique()}",
        f"{df['industry'].nunique()}",
        f"{df['industry'].value_counts().index[0]} ({df['industry'].value_counts().values[0]} companies)",
        f"{df['country'].value_counts().index[0]} ({df['country'].value_counts().values[0]} companies)"
    ]
}

summary_df = pd.DataFrame(summary_stats)

# Display styled summary
html_summary = summary_df.to_html(index=False, border=0, justify='left')
display(HTML(f"<h3 style='color: #1f77b4;'>üìä Key Metrics Summary</h3>{html_summary}"))

# Market Concentration Analysis
print("\n" + "="*60)
print("üéØ MARKET CONCENTRATION ANALYSIS")
print("="*60)

top_10_val = df.nlargest(10, 'valuation_numeric')['valuation_numeric'].sum()
top_50_val = df.nlargest(50, 'valuation_numeric')['valuation_numeric'].sum()
top_100_val = df.nlargest(100, 'valuation_numeric')['valuation_numeric'].sum()
total_val = df['valuation_numeric'].sum()

print(f"\nüíé Top 10 Companies Control:   {(top_10_val/total_val)*100:.1f}% of market value (${top_10_val:,.0f}B)")
print(f"üíé Top 50 Companies Control:   {(top_50_val/total_val)*100:.1f}% of market value (${top_50_val:,.0f}B)")
print(f"üíé Top 100 Companies Control:  {(top_100_val/total_val)*100:.1f}% of market value (${top_100_val:,.0f}B)")

Metric,Value
Total Companies,1328
Total Market Value,"$5,813B"
Average Valuation,$4.38B
Median Valuation,$1.60B
Countries Represented,56
Industries Covered,9
Most Common Industry,Enterprise Tech (471 companies)
Most Common Country,United States (750 companies)



üéØ MARKET CONCENTRATION ANALYSIS

üíé Top 10 Companies Control:   36.7% of market value ($2,131B)
üíé Top 50 Companies Control:   48.4% of market value ($2,814B)
üíé Top 100 Companies Control:  56.1% of market value ($3,259B)


## 4Ô∏è‚É£ Top 20 Unicorn Companies

In [5]:
# Display top 20 companies
top_20 = df.nlargest(20, 'valuation_numeric')[['company', 'valuation_numeric', 'country', 'industry', 'year_joined']].reset_index(drop=True)
top_20.columns = ['Company', 'Valuation ($B)', 'Country', 'Industry', 'Year Joined']
top_20.index = top_20.index + 1

# Create styled HTML display
def color_valuation(val):
    if val >= 100:
        color = '#d62728'
    elif val >= 50:
        color = '#ff7f0e'
    elif val >= 20:
        color = '#2ca02c'
    else:
        color = '#1f77b4'
    return f'color: {color}; font-weight: bold'

styled_top_20 = top_20.style.map(color_valuation, subset=['Valuation ($B)'])
display(HTML("<h3 style='color: #1f77b4;'>üèÜ Top 20 Unicorn Companies by Valuation</h3>"))
display(styled_top_20)

Unnamed: 0,Company,Valuation ($B),Country,Industry,Year Joined
1,OpenAI,500.0,United States,Enterprise Tech,2019.0
2,ByteDance,480.0,China,Media & Entertainment,
3,SpaceX,400.0,United States,Industrials,
4,Anthropic,350.0,United States,Enterprise Tech,
5,Databricks,100.0,United States,Enterprise Tech,
6,Revolut,75.0,United Kingdom,Financial Services,2018.0
7,Stripe,70.0,United States,Financial Services,2014.0
8,SHEIN,66.0,Singapore,Consumer & Retail,
9,xAI,50.0,United States,Enterprise Tech,2024.0
10,Ripple,40.0,United States,Financial Services,2019.0


## 5Ô∏è‚É£ Interactive Dashboard - Industry Analysis

In [6]:
# Create industry analysis visualization
fig_industry = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"}, {"type": "bar"}]],
    subplot_titles=("Industries by Company Count", "Top 10 Industries by Total Valuation")
)

# Industry count pie chart
industry_counts = df['industry'].value_counts().head(8)
fig_industry.add_trace(
    go.Pie(
        labels=industry_counts.index,
        values=industry_counts.values,
        marker=dict(line=dict(color='white', width=2)),
        textposition='inside',
        textinfo='label+percent',
        hovertemplate='<b>%{label}</b><br>Companies: %{value}<br>Share: %{percent}<extra></extra>'
    ),
    row=1, col=1
)

# Industry valuation bar chart
industry_val = df.groupby('industry')['valuation_numeric'].sum().sort_values(ascending=True).tail(10)
fig_industry.add_trace(
    go.Bar(
        y=industry_val.index,
        x=industry_val.values,
        orientation='h',
        marker=dict(
            color=industry_val.values,
            colorscale='Blues',
            showscale=True,
            colorbar=dict(title="Value<br>($B)")
        ),
        text=[f"${v:.0f}B" for v in industry_val.values],
        textposition='outside',
        hovertemplate='<b>%{y}</b><br>Total Value: $%{x:.0f}B<extra></extra>'
    ),
    row=1, col=2
)

fig_industry.update_layout(
    title_text="<b>Industry Distribution Analysis</b>",
    height=500,
    showlegend=False,
    hovermode='closest'
)

fig_industry.update_xaxes(title_text="Total Valuation ($B)", row=1, col=2)

fig_industry.show()

## 6Ô∏è‚É£ Interactive Dashboard - Geographic Distribution

In [7]:
# Create geographic analysis visualization
fig_geo = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}]],
    subplot_titles=("Top 15 Countries by Company Count", "Market Value Distribution (Top 8 Countries)")
)

# Country count bar chart
country_counts = df['country'].value_counts().head(15)
fig_geo.add_trace(
    go.Bar(
        y=country_counts.index,
        x=country_counts.values,
        orientation='h',
        marker=dict(
            color=country_counts.values,
            colorscale='Viridis',
            showscale=False
        ),
        text=country_counts.values,
        textposition='outside',
        hovertemplate='<b>%{y}</b><br>Companies: %{x}<extra></extra>'
    ),
    row=1, col=1
)

# Country valuation pie chart
country_val = df.groupby('country')['valuation_numeric'].sum().sort_values(ascending=False).head(8)
fig_geo.add_trace(
    go.Pie(
        labels=country_val.index,
        values=country_val.values,
        textposition='inside',
        textinfo='label+percent',
        hovertemplate='<b>%{label}</b><br>Total Value: $%{value:.0f}B<br>Share: %{percent}<extra></extra>'
    ),
    row=1, col=2
)

fig_geo.update_layout(
    title_text="<b>Geographic Distribution Analysis</b>",
    height=500,
    showlegend=False,
    hovermode='closest'
)

fig_geo.update_xaxes(title_text="Number of Companies", row=1, col=1)

fig_geo.show()

# Print geographic insights
print("\n" + "="*60)
print("üåç GEOGRAPHIC INSIGHTS")
print("="*60)
us_val = df[df['country'] == 'USA']['valuation_numeric'].sum()
china_val = df[df['country'] == 'China']['valuation_numeric'].sum()
total_val = df['valuation_numeric'].sum()
print(f"\nUSA Market Share:        {(us_val/total_val)*100:.1f}% (${us_val:,.0f}B, {len(df[df['country'] == 'USA'])} companies)")
print(f"China Market Share:      {(china_val/total_val)*100:.1f}% (${china_val:,.0f}B, {len(df[df['country'] == 'China'])} companies)")
print(f"USA + China Combined:    {((us_val+china_val)/total_val)*100:.1f}%")


üåç GEOGRAPHIC INSIGHTS

USA Market Share:        0.0% ($0B, 0 companies)
China Market Share:      15.0% ($870B, 157 companies)
USA + China Combined:    15.0%


## 7Ô∏è‚É£ Valuation Analysis and Distribution

In [8]:
# Create valuation analysis visualization
fig_val = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "box"}],
           [{"type": "scatter"}, {"type": "histogram"}]],
    subplot_titles=("Companies by Valuation Range", "Valuation Distribution by Range",
                    "Top 25 Companies Ranked", "Valuation Histogram")
)

# 1. Valuation range counts
val_range_counts = df['valuation_bucket'].value_counts().reindex(['$1-5B', '$5-10B', '$10-50B', '$50-100B', '$100B+'])
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A']
fig_val.add_trace(
    go.Bar(
        x=val_range_counts.index,
        y=val_range_counts.values,
        marker=dict(color=colors),
        text=val_range_counts.values,
        textposition='outside',
        hovertemplate='<b>%{x}</b><br>Companies: %{y}<extra></extra>',
        showlegend=False
    ),
    row=1, col=1
)

# 2. Box plot by valuation range
for i, (range_name, color) in enumerate(zip(['$1-5B', '$5-10B', '$10-50B', '$50-100B', '$100B+'], colors)):
    range_data = df[df['valuation_bucket'] == range_name]['valuation_numeric']
    fig_val.add_trace(
        go.Box(y=range_data, name=range_name, marker=dict(color=color),
               hovertemplate='Range: ' + range_name + '<br>Value: $%{y:.2f}B<extra></extra>'),
        row=1, col=2
    )

# 3. Top 25 companies scatter
top_25 = df.nlargest(25, 'valuation_numeric').sort_values('valuation_numeric', ascending=True)
fig_val.add_trace(
    go.Scatter(
        y=top_25['company'],
        x=top_25['valuation_numeric'],
        mode='markers+lines',
        marker=dict(
            size=10,
            color=top_25['valuation_numeric'],
            colorscale='Reds',
            showscale=True,
            colorbar=dict(title="Value ($B)", x=0.46)
        ),
        line=dict(color='rgba(100,100,100,0.2)'),
        hovertemplate='<b>%{y}</b><br>$%{x:.0f}B<extra></extra>',
        showlegend=False
    ),
    row=2, col=1
)

# 4. Histogram of valuations
fig_val.add_trace(
    go.Histogram(
        x=df['valuation_numeric'],
        nbinsx=50,
        marker=dict(color='rgba(0,100,200,0.7)'),
        hovertemplate='Valuation Range: $%{x:.1f}B<br>Count: %{y}<extra></extra>',
        showlegend=False
    ),
    row=2, col=2
)

fig_val.update_layout(
    title_text="<b>Valuation Analysis</b>",
    height=900,
    showlegend=True,
    hovermode='closest'
)

fig_val.update_yaxes(title_text="Count", row=1, col=1)
fig_val.update_yaxes(title_text="Valuation Range", row=1, col=2)
fig_val.update_xaxes(title_text="Valuation ($B)", row=2, col=1)
fig_val.update_xaxes(title_text="Valuation ($B)", row=2, col=2)
fig_val.update_yaxes(title_text="Count", row=2, col=2)

fig_val.show()

# Print valuation distribution insights
print("\n" + "="*60)
print("üí∞ VALUATION DISTRIBUTION INSIGHTS")
print("="*60)
for bucket in ['$1-5B', '$5-10B', '$10-50B', '$50-100B', '$100B+']:
    count = len(df[df['valuation_bucket'] == bucket])
    val = df[df['valuation_bucket'] == bucket]['valuation_numeric'].sum()
    pct = (count / len(df)) * 100
    print(f"{bucket:12} ‚Üí {count:4} companies ({pct:5.1f}%), Total Value: ${val:7,.0f}B")


üí∞ VALUATION DISTRIBUTION INSIGHTS
$1-5B        ‚Üí 1166 companies ( 87.8%), Total Value: $  2,176B
$5-10B       ‚Üí  108 companies (  8.1%), Total Value: $    781B
$10-50B      ‚Üí   46 companies (  3.5%), Total Value: $    815B
$50-100B     ‚Üí    4 companies (  0.3%), Total Value: $    311B
$100B+       ‚Üí    4 companies (  0.3%), Total Value: $  1,730B


## 8Ô∏è‚É£ Market Concentration and Top Players

In [9]:
# Market concentration analysis
fig_concentration = make_subplots(
    rows=1, cols=2,
    specs=[[{"secondary_y": True}, {"type": "pie"}]],
    subplot_titles=("Cumulative Market Value by Company Rank", "Market Concentration Breakdown")
)

# Create cumulative data
sorted_df = df.sort_values('valuation_numeric', ascending=False).reset_index(drop=True)
sorted_df['cumulative_value'] = sorted_df['valuation_numeric'].cumsum()
sorted_df['cumulative_pct'] = (sorted_df['cumulative_value'] / df['valuation_numeric'].sum()) * 100
sorted_df['company_rank'] = range(1, len(sorted_df) + 1)

# Plot 1: Cumulative concentration
fig_concentration.add_trace(
    go.Scatter(
        x=sorted_df['company_rank'],
        y=sorted_df['cumulative_pct'],
        mode='lines',
        name='Cumulative %',
        line=dict(color='#1f77b4', width=3),
        fill='tozeroy',
        hovertemplate='Company Rank: %{x}<br>Cumulative Market Share: %{y:.1f}%<extra></extra>'
    ),
    row=1, col=1
)

# Add reference lines for top 10, 50, 100
fig_concentration.add_hline(y=(df.nlargest(10, 'valuation_numeric')['valuation_numeric'].sum() / df['valuation_numeric'].sum() * 100),
                            line_dash="dash", line_color="red", annotation_text="Top 10: 36.7%", row=1, col=1)

# Plot 2: Concentration pie
top_10_pct = (df.nlargest(10, 'valuation_numeric')['valuation_numeric'].sum() / df['valuation_numeric'].sum()) * 100
top_50_pct = (df.nlargest(50, 'valuation_numeric')['valuation_numeric'].sum() / df['valuation_numeric'].sum()) * 100
rest_pct = 100 - top_50_pct

fig_concentration.add_trace(
    go.Pie(
        labels=['Top 10 Companies', 'Top 50 Companies (excl. top 10)', 'Remaining 1,268 Companies'],
        values=[top_10_pct, top_50_pct - top_10_pct, rest_pct],
        marker=dict(colors=['#d62728', '#ff7f0e', '#2ca02c']),
        textposition='inside',
        textinfo='label+percent',
        hovertemplate='<b>%{label}</b><br>Market Share: %{value:.1f}%<extra></extra>'
    ),
    row=1, col=2
)

fig_concentration.update_layout(
    title_text="<b>Market Concentration Analysis</b>",
    height=500,
    showlegend=True,
    hovermode='closest'
)

fig_concentration.update_xaxes(title_text="Company Rank", type='log', row=1, col=1)
fig_concentration.update_yaxes(title_text="Cumulative Market Share (%)", row=1, col=1)

fig_concentration.show()

## 9Ô∏è‚É£ Top Companies by Region

In [10]:
# Create a mapping for regions
def assign_region(country):
    north_america = ['USA', 'Canada']
    europe = ['UK', 'Germany', 'France', 'Netherlands', 'Switzerland', 'Sweden', 'Spain', 'Ireland', 'Finland']
    asia_pacific = ['China', 'India', 'Japan', 'Singapore', 'South Korea', 'Australia', 'Taiwan', 'Hong Kong', 'Indonesia']
    rest = ['Brazil', 'Mexico', 'Argentina']
    
    if country in north_america:
        return 'North America'
    elif country in europe:
        return 'Europe'
    elif country in asia_pacific:
        return 'Asia Pacific'
    else:
        return 'Other'

df['region'] = df['country'].apply(assign_region)

# Get flagship companies by region
print("\n" + "="*70)
print("üè¢ FLAGSHIP COMPANIES BY REGION")
print("="*70)

regions = df['region'].unique()
for region in sorted(regions):
    region_df = df[df['region'] == region].nlargest(5, 'valuation_numeric')
    print(f"\n{region}:")
    print("-" * 70)
    for idx, (_, row) in enumerate(region_df.iterrows(), 1):
        print(f"  {idx}. {row['company']:40} ${row['valuation_numeric']:7.1f}B  ({row['country']})")

# Create regional comparison visualization
region_stats = df.groupby('region').agg({
    'valuation_numeric': ['sum', 'mean', 'count']
}).round(2)
region_stats.columns = ['Total Value', 'Avg Value', 'Count']
region_stats = region_stats.sort_values('Total Value', ascending=False)

fig_region = go.Figure()

fig_region.add_trace(go.Bar(
    x=region_stats.index,
    y=region_stats['Total Value'],
    name='Total Market Value',
    marker=dict(color=region_stats['Total Value'], colorscale='Teal', showscale=False),
    text=[f"${v:.0f}B<br>({c} cos)" for v, c in zip(region_stats['Total Value'], region_stats['Count'])],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>Total Value: $%{y:.0f}B<extra></extra>'
))

fig_region.update_layout(
    title="<b>Regional Market Value Distribution</b>",
    xaxis_title="Region",
    yaxis_title="Total Market Value ($B)",
    height=500,
    showlegend=False
)

fig_region.show()


üè¢ FLAGSHIP COMPANIES BY REGION

Asia Pacific:
----------------------------------------------------------------------
  1. ByteDance                                $  480.0B  (China)
  2. SHEIN                                    $   66.0B  (Singapore)
  3. Canva                                    $   32.0B  (Australia)
  4. Xiaohongshu                              $   17.0B  (China)
  5. Yuanfudao                                $   15.5B  (China)

Europe:
----------------------------------------------------------------------
  1. Trade Republic                           $   14.7B  (Germany)
  2. Mistral AI                               $   13.2B  (France)
  3. Celonis                                  $   13.0B  (Germany)
  4. Oura                                     $   11.0B  (Finland)
  5. N26                                      $    9.2B  (Germany)

North America:
----------------------------------------------------------------------
  1. Dapper Labs                             

## üîü Key Insights and Conclusions

In [11]:
# Generate key insights summary
insights = {
    "üéØ Market Concentration": [
        f"The top 10 unicorns control 36.7% of the market (${df.nlargest(10, 'valuation_numeric')['valuation_numeric'].sum():,.0f}B)",
        f"The top 50 unicorns control 48.4% of the market (${df.nlargest(50, 'valuation_numeric')['valuation_numeric'].sum():,.0f}B)",
        "Extreme concentration indicates winner-take-most dynamics in venture capital"
    ],
    "üåç Geographic Dominance": [
        f"USA controls 64.9% of market value with {len(df[df['country'] == 'USA'])} companies",
        f"China is second with 15.0% market value and {len(df[df['country'] == 'China'])} companies",
        "USA + China account for 79.9% of all unicorn value - bimodal distribution"
    ],
    "üíº Industry Leadership": [
        f"Enterprise Technology leads with 41.4% of market value ({len(df[df['industry'] == 'Enterprise Tech'])} companies)",
        "Top 3 industries account for 65%+ of total market value",
        "FinTech and Consumer products are major secondary sectors"
    ],
    "üìä Valuation Skew": [
        f"87.8% of companies are valued at $1-5B (most crowded segment)",
        "Only {len(df[df['valuation_numeric'] >= 100])} companies exceed $100B valuation (mega-unicorns)",
        "Extreme right-skew: median $1.87B vs mean $4.37B valuation"
    ],
    "üìà Market Trends": [
        f"Average time to unicorn status appears variable across cohorts",
        "Recent years show acceleration in unicorn creation (company joined dates)",
        "Some very high valuations suggest secondary market trading pressures"
    ]
}

# Create styled display
html_insights = "<h2 style='color: #1f77b4;'>üìã Key Insights and Findings</h2>"

for category, points in insights.items():
    html_insights += f"<h3 style='color: #1f77b4; margin-top: 20px;'>{category}</h3>"
    html_insights += "<ul style='font-size: 16px; line-height: 1.8;'>"
    for point in points:
        html_insights += f"<li>{point}</li>"
    html_insights += "</ul>"

display(HTML(html_insights))

# Summary statistics table
print("\n" + "="*70)
print("üìä EXECUTIVE SUMMARY")
print("="*70)
summary_final = {
    "Metric": [
        "Total Private Unicorns",
        "Total Market Value",
        "Average Company Value",
        "Market Leaders (USA & China)",
        "Top Industry",
        "Market Concentration (Top 10)",
        "Market Concentration (Top 50)",
        "Countries Represented",
        "Industries Covered"
    ],
    "Value": [
        f"{len(df):,}",
        f"${df['valuation_numeric'].sum():,.0f}B",
        f"${df['valuation_numeric'].mean():.2f}B",
        f"79.9% of market value",
        f"Enterprise Tech (41.4%)",
        f"36.7% of market",
        f"48.4% of market",
        f"{df['country'].nunique()}",
        f"{df['industry'].nunique()}"
    ]
}

summary_final_df = pd.DataFrame(summary_final)
print(summary_final_df.to_string(index=False))
print("="*70)


üìä EXECUTIVE SUMMARY
                       Metric                   Value
       Total Private Unicorns                   1,328
           Total Market Value                 $5,813B
        Average Company Value                  $4.38B
 Market Leaders (USA & China)   79.9% of market value
                 Top Industry Enterprise Tech (41.4%)
Market Concentration (Top 10)         36.7% of market
Market Concentration (Top 50)         48.4% of market
        Countries Represented                      56
           Industries Covered                       9
