# Nevada Procurement: Vendor Concentration Analysis

**Objective**: Measure vendor market concentration using Herfindahl-Hirschman Index (HHI) to identify potential competition risks and monopolization patterns.

**Data**: Nevada procurement contracts silver data (1,607 records, full coverage)

**Key Metrics**:
- HHI by organization (corrected methodology)
- Top-N vendor concentration (Top 5, Top 10) 
- Competition indicators

**Methodology Update**: Using complete silver data for robust concentration analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb
from scipy.stats import bootstrap
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Configure plotting
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 8)

print("Setup complete")

In [None]:
# Load data using DuckDB for efficient querying
conn = duckdb.connect()

# Load silver contracts data (latest version - using glob pattern from notebooks directory)
contracts_query = """
SELECT 
    contract_id,
    vendor_name,
    organization,
    fiscal_year_begin,
    dollars_spent_to_date,
    is_zero_spend,
    contract_status
FROM read_parquet("../../data/silver/contracts/version=v0.3.0/*/data.parquet")
WHERE vendor_name IS NOT NULL 
    AND organization IS NOT NULL
    AND fiscal_year_begin IS NOT NULL
"""

contracts = conn.execute(contracts_query).df()
print(f"Loaded {len(contracts):,} contract records")
print(f"Date range: {contracts['fiscal_year_begin'].min()} - {contracts['fiscal_year_begin'].max()}")
print(f"Organizations: {contracts['organization'].nunique()}")
print(f"Vendors: {contracts['vendor_name'].nunique()}")
print(f"Zero spend contracts: {(contracts['dollars_spent_to_date'] == 0).sum()}")
print(f"Total spend: ${contracts['dollars_spent_to_date'].sum():,.2f}")

contracts.head()

In [None]:
# CORRECTED HHI calculation - by organization only, include zero spend
hhi_query = """
WITH vendor_spend AS (
    SELECT 
        organization,
        vendor_name,
        SUM(dollars_spent_to_date) as vendor_spend,
        COUNT(*) as contract_count
    FROM read_parquet("../../data/silver/contracts/version=v0.3.0/*/data.parquet")
    WHERE vendor_name IS NOT NULL 
        AND organization IS NOT NULL
    GROUP BY 1,2
),
org_totals AS (
    SELECT 
        organization,
        SUM(vendor_spend) as total_spend,
        COUNT(*) as total_contracts
    FROM vendor_spend 
    GROUP BY 1
),
market_shares AS (
    SELECT 
        v.*,
        o.total_spend,
        o.total_contracts,
        CASE 
            WHEN o.total_spend > 0 THEN v.vendor_spend / o.total_spend 
            ELSE 1.0 / COUNT(*) OVER (PARTITION BY v.organization)  -- Equal share for zero-spend orgs
        END as market_share,
        DENSE_RANK() OVER (PARTITION BY v.organization ORDER BY v.vendor_spend DESC) as vendor_rank
    FROM vendor_spend v
    JOIN org_totals o USING (organization)
)
SELECT 
    organization,
    total_spend,
    total_contracts,
    -- HHI calculation (sum of squared market shares)
    SUM(market_share * market_share) as hhi,
    -- Top-N concentration
    SUM(CASE WHEN vendor_rank <= 5 THEN market_share ELSE 0 END) as top5_share,
    COUNT(*) as unique_vendors,
    COUNT(CASE WHEN market_share >= 0.10 THEN 1 END) as vendors_over_10pct
FROM market_shares
GROUP BY organization, total_spend, total_contracts
ORDER BY hhi DESC
"""

hhi_results = conn.execute(hhi_query).df()
print(f"HHI calculated for {len(hhi_results)} organizations")
print("\\nFull Results:")
display(hhi_results)

In [None]:
# Interpret HHI values (DOJ guidelines)
def interpret_hhi(hhi):
    if hhi < 0.15:
        return "Competitive (Low)"
    elif hhi < 0.25:
        return "Moderately Concentrated"
    else:
        return "Highly Concentrated"

hhi_results['concentration_level'] = hhi_results['hhi'].apply(interpret_hhi)

# Summary statistics
print("=== VENDOR CONCENTRATION SUMMARY ===")
print(f"Average HHI: {hhi_results['hhi'].mean():.3f}")
print(f"Median HHI: {hhi_results['hhi'].median():.3f}")
print(f"Max HHI: {hhi_results['hhi'].max():.3f}")
print()
print("Concentration Levels:")
print(hhi_results['concentration_level'].value_counts(normalize=True).round(3))
print()
print("Most Concentrated Markets:")
print(hhi_results.nlargest(5, 'hhi')[['organization', 'hhi', 'top5_share', 'unique_vendors']])

In [None]:
# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Nevada Procurement Vendor Concentration Analysis', fontsize=16, y=0.98)

# 1. HHI Distribution
axes[0, 0].hist(hhi_results['hhi'], bins=15, alpha=0.7, edgecolor='black')
axes[0, 0].axvline(0.15, color='orange', linestyle='--', alpha=0.8, label='Moderate threshold')
axes[0, 0].axvline(0.25, color='red', linestyle='--', alpha=0.8, label='High threshold')
axes[0, 0].set_xlabel('Herfindahl-Hirschman Index')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Distribution of Market Concentration')
axes[0, 0].legend()

# 2. HHI vs Top 5 Share
axes[0, 1].scatter(hhi_results['top5_share'], hhi_results['hhi'], alpha=0.7, s=50)
axes[0, 1].set_xlabel('Top 5 Vendor Share')
axes[0, 1].set_ylabel('HHI')
axes[0, 1].set_title('HHI vs Top 5 Concentration')

# 3. HHI by Organization (top 15)
top_hhi = hhi_results.nlargest(15, 'hhi')
y_pos = range(len(top_hhi))
axes[1, 0].barh(y_pos, top_hhi['hhi'])
axes[1, 0].set_yticks(y_pos)
axes[1, 0].set_yticklabels([org[:20] + '...' if len(org) > 20 else org for org in top_hhi['organization']])
axes[1, 0].set_xlabel('HHI')
axes[1, 0].set_title('Top 15 Most Concentrated Organizations')

# 4. Vendor count vs HHI (log scale for better visibility)
axes[1, 1].scatter(hhi_results['unique_vendors'], hhi_results['hhi'], alpha=0.7, s=50)
axes[1, 1].set_xlabel('Number of Unique Vendors')
axes[1, 1].set_ylabel('HHI')
axes[1, 1].set_xscale('log')
axes[1, 1].set_title('Vendor Diversity vs Concentration (Log Scale)')

plt.tight_layout()
plt.show()

# Save the figure
plt.savefig('../output/vendor_concentration_analysis.png', dpi=300, bbox_inches='tight')
print("Charts saved to ../output/vendor_concentration_analysis.png")

In [None]:
# Risk Analysis: Identify high-risk scenarios
high_concentration = hhi_results[hhi_results['hhi'] >= 0.25].copy()
high_concentration = high_concentration.sort_values('hhi', ascending=False)

print("=== HIGH CONCENTRATION RISK ANALYSIS ===")
print(f"Markets with high concentration (HHI ≥ 0.25): {len(high_concentration)}")
print(f"Percentage of markets: {len(high_concentration) / len(hhi_results) * 100:.1f}%")
print()
print("Top Risk Cases:")
risk_cols = ['organization', 'hhi', 'top5_share', 'unique_vendors', 'vendors_over_10pct']
print(high_concentration[risk_cols].head(10).to_string(index=False))

# Market dominance analysis
print("\n=== MARKET DOMINANCE PATTERNS ===")
single_vendor_dominance = hhi_results[hhi_results['top5_share'] > 0.8]
print(f"Markets with >80% top-5 concentration: {len(single_vendor_dominance)}")

few_vendors = hhi_results[hhi_results['unique_vendors'] <= 3]
print(f"Markets with ≤3 vendors: {len(few_vendors)}")

# Full dataset assessment
print("\n=== FULL DATASET ANALYSIS ===")
print("Coverage: Complete silver contracts data (1,607 records)")
print("All major procurement organizations included")
print("Methodology: Organization-level vendor concentration (not by fiscal year)")

In [None]:
# Export results for further analysis
hhi_results.to_csv('../output/vendor_concentration_results.csv', index=False)
high_concentration.to_csv('../output/high_concentration_markets.csv', index=False)

# Create summary report
summary = {
    'analysis_date': pd.Timestamp.now().strftime('%Y-%m-%d'),
    'total_markets': len(hhi_results),
    'avg_hhi': hhi_results['hhi'].mean(),
    'median_hhi': hhi_results['hhi'].median(),
    'high_concentration_markets': len(high_concentration),
    'pct_high_concentration': len(high_concentration) / len(hhi_results) * 100,
    'most_concentrated_org': hhi_results.loc[hhi_results['hhi'].idxmax(), 'organization'],
    'highest_hhi': hhi_results['hhi'].max()
}

pd.Series(summary).to_csv('../output/concentration_summary.csv')

print("✓ Results exported:")
print("  - vendor_concentration_results.csv")
print("  - high_concentration_markets.csv")
print("  - concentration_summary.csv")
print("  - vendor_concentration_analysis.png")

## Key Findings & Recommendations

**Concentration Levels**: [To be filled after running analysis]

**Risk Areas**: Organizations and fiscal years with HHI ≥ 0.25 indicate potential competition concerns

**Policy Implications**: 
- High concentration markets may benefit from vendor outreach programs
- Consider set-aside programs for small/minority vendors in concentrated markets
- Monitor for potential price manipulation in highly concentrated segments

**Data Limitations**:
- Analysis based on 49.8% sample - trends reliable, absolute values approximate
- Vendor diversity may be underrepresented in sample
- Geographic factors not included (no location data)

**Next Steps**:
1. Validate findings with full dataset
2. Analyze bid competition rates in high-concentration markets
3. Investigate vendor entry/exit patterns
4. Cross-reference with procurement outcome quality metrics