# Nevada Procurement: Bid Competition Analysis

**Objective**: Analyze competition patterns in Nevada procurement through bid-to-award conversion rates, bidder counts, and competition signals.

**Data**: Nevada procurement bids and contracts silver data

**Key Metrics**:
- Bid-to-award conversion rates by organization
- Average bidders per solicitation
- Single-bidder percentage (competition risk indicator)
- Procurement cycle timing

**Coverage**: Bids (37.4%), Contracts (49.8%) - sufficient for trend analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 8)

print("Setup complete")

In [None]:
# Load data using DuckDB
conn = duckdb.connect()

# Load bids data
bids_query = """
SELECT 
    bid_solicitation_id,
    contract_id,
    organization,
    fiscal_year,
    bid_opening_date,
    awarded_vendor_count,
    has_awarded_vendor,
    has_contract,
    status_category,
    days_since_opening
FROM read_parquet("../../data/silver/bids/version=v0.3.0/*/data.parquet")
WHERE bid_solicitation_id IS NOT NULL
"""

bids = conn.execute(bids_query).df()
print(f"Loaded {len(bids):,} bid records")
print(f"Organizations: {bids['organization'].nunique()}")
print(f"Date range: {bids['bid_opening_date'].min()} - {bids['bid_opening_date'].max()}")
print(f"Has contract: {bids['has_contract'].sum():,} ({bids['has_contract'].mean()*100:.1f}%)")
print(f"Has awarded vendor: {bids['has_awarded_vendor'].sum():,} ({bids['has_awarded_vendor'].mean()*100:.1f}%)")

# Load contracts for join analysis
contracts_query = """
SELECT 
    contract_id,
    bid_solicitation_id,
    vendor_name,
    organization,
    begin_date,
    end_date,
    dollars_spent_to_date,
    contract_status
FROM read_parquet("../../data/silver/contracts/version=v0.3.0/*/data.parquet")
WHERE contract_id IS NOT NULL
"""

contracts = conn.execute(contracts_query).df()
print(f"\nLoaded {len(contracts):,} contract records")

bids.head()

In [None]:
# Bid-to-Award Conversion Analysis
conversion_query = """
SELECT 
    organization,
    fiscal_year,
    COUNT(*) as total_bids,
    SUM(CASE WHEN has_contract THEN 1 ELSE 0 END) as bids_with_contracts,
    SUM(CASE WHEN has_awarded_vendor THEN 1 ELSE 0 END) as bids_with_awards,
    -- Conversion rates
    SUM(CASE WHEN has_contract THEN 1 ELSE 0 END) * 1.0 / COUNT(*) as contract_conversion_rate,
    SUM(CASE WHEN has_awarded_vendor THEN 1 ELSE 0 END) * 1.0 / COUNT(*) as award_conversion_rate,
    -- Competition metrics
    AVG(CASE WHEN has_awarded_vendor THEN awarded_vendor_count END) as avg_bidders_when_awarded,
    SUM(CASE WHEN awarded_vendor_count = 1 THEN 1 ELSE 0 END) * 1.0 / 
        SUM(CASE WHEN has_awarded_vendor THEN 1 ELSE 0 END) as single_bidder_rate
FROM read_parquet("../../data/silver/bids/version=v0.3.0/*/data.parquet")
WHERE organization IS NOT NULL 
    AND fiscal_year IS NOT NULL
GROUP BY organization, fiscal_year
HAVING COUNT(*) >= 3  -- Minimum sample size for rates
ORDER BY total_bids DESC
"""

conversion_results = conn.execute(conversion_query).df()
print(f"Conversion analysis for {len(conversion_results)} org-year combinations")
print("\nTop Results by Bid Volume:")
display(conversion_results.head(10))

In [None]:
# Competition Health Metrics
print("=== COMPETITION HEALTH SUMMARY ===")
print(f"Overall bid-to-contract conversion: {bids['has_contract'].mean()*100:.1f}%")
print(f"Overall bid-to-award conversion: {bids['has_awarded_vendor'].mean()*100:.1f}%")

# Competition intensity
awarded_bids = bids[bids['has_awarded_vendor'] == True]
if len(awarded_bids) > 0:
    avg_bidders = awarded_bids['awarded_vendor_count'].mean()
    single_bidder_pct = (awarded_bids['awarded_vendor_count'] == 1).mean() * 100
    print(f"Average bidders per awarded solicitation: {avg_bidders:.1f}")
    print(f"Single bidder rate: {single_bidder_pct:.1f}% (competition risk indicator)")
    
    # Competition distribution
    print("\nBidder Count Distribution:")
    bidder_dist = awarded_bids['awarded_vendor_count'].value_counts().sort_index()
    for count, frequency in bidder_dist.items():
        print(f"  {count} bidders: {frequency} solicitations ({frequency/len(awarded_bids)*100:.1f}%)")

# Organization-level competition
org_competition = conversion_results.groupby('organization').agg({
    'total_bids': 'sum',
    'contract_conversion_rate': 'mean',
    'avg_bidders_when_awarded': 'mean',
    'single_bidder_rate': 'mean'
}).round(3)

print("\nCompetition by Organization:")
display(org_competition.sort_values('single_bidder_rate', ascending=False).head(10))

In [None]:
# Procurement Timing Analysis
timing_query = """
SELECT 
    b.bid_solicitation_id,
    b.organization,
    b.bid_opening_date,
    c.begin_date as contract_begin_date,
    DATEDIFF('day', b.bid_opening_date, c.begin_date) as days_bid_to_contract
FROM read_parquet("../../data/silver/bids/version=v0.3.0/*/data.parquet") b
INNER JOIN read_parquet("../../data/silver/contracts/version=v0.3.0/*/data.parquet") c 
    ON b.contract_id = c.contract_id
WHERE b.bid_opening_date IS NOT NULL 
    AND c.begin_date IS NOT NULL
    AND DATEDIFF('day', b.bid_opening_date, c.begin_date) >= 0
"""

timing_results = conn.execute(timing_query).df()
print(f"Timing analysis for {len(timing_results)} bid-contract pairs")

if len(timing_results) > 0:
    print(f"\nProcurement Cycle Timing:")
    print(f"Median days bid-to-contract: {timing_results['days_bid_to_contract'].median():.0f}")
    print(f"Average days bid-to-contract: {timing_results['days_bid_to_contract'].mean():.0f}")
    print(f"90th percentile: {timing_results['days_bid_to_contract'].quantile(0.9):.0f} days")
    
    # By organization
    org_timing = timing_results.groupby('organization')['days_bid_to_contract'].agg(['count', 'median', 'mean']).round(1)
    org_timing = org_timing[org_timing['count'] >= 3]  # Minimum sample size
    print("\nTiming by Organization:")
    display(org_timing.sort_values('median', ascending=False))
else:
    print("No bid-contract timing data available for analysis")

In [None]:
# Create competition visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Nevada Procurement: Bid Competition Analysis', fontsize=16, y=0.98)

# 1. Conversion rates by organization
org_conv = conversion_results.groupby('organization')['contract_conversion_rate'].mean().sort_values(ascending=True)
if len(org_conv) > 0:
    org_conv.tail(10).plot(kind='barh', ax=axes[0, 0])
    axes[0, 0].set_xlabel('Bid-to-Contract Conversion Rate')
    axes[0, 0].set_title('Conversion Rates by Organization (Top 10)')

# 2. Bidder count distribution
if len(awarded_bids) > 0:
    awarded_bids['awarded_vendor_count'].hist(bins=15, ax=axes[0, 1], alpha=0.7, edgecolor='black')
    axes[0, 1].axvline(1, color='red', linestyle='--', alpha=0.8, label='Single bidder')
    axes[0, 1].set_xlabel('Number of Bidders')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('Distribution of Bidder Counts')
    axes[0, 1].legend()

# 3. Competition vs conversion rate
if len(conversion_results) > 0:
    axes[1, 0].scatter(conversion_results['avg_bidders_when_awarded'], 
                      conversion_results['contract_conversion_rate'], alpha=0.7)
    axes[1, 0].set_xlabel('Average Bidders When Awarded')
    axes[1, 0].set_ylabel('Contract Conversion Rate')
    axes[1, 0].set_title('Competition Level vs Conversion Success')

# 4. Timing distribution
if len(timing_results) > 0:
    timing_results['days_bid_to_contract'].hist(bins=20, ax=axes[1, 1], alpha=0.7, edgecolor='black')
    axes[1, 1].axvline(timing_results['days_bid_to_contract'].median(), 
                      color='orange', linestyle='--', alpha=0.8, label='Median')
    axes[1, 1].set_xlabel('Days from Bid Opening to Contract Start')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].set_title('Procurement Cycle Duration')
    axes[1, 1].legend()

plt.tight_layout()
plt.show()

# Save the figure
plt.savefig('../output/bid_competition_analysis.png', dpi=300, bbox_inches='tight')
print("Charts saved to ../output/bid_competition_analysis.png")

In [None]:
# Competition Risk Analysis
print("=== COMPETITION RISK ANALYSIS ===")

# High single-bidder risk organizations
high_risk = conversion_results[conversion_results['single_bidder_rate'] > 0.5]
print(f"Organizations with >50% single-bidder rate: {len(high_risk)}")
if len(high_risk) > 0:
    print("\nHigh Risk Organizations:")
    risk_cols = ['organization', 'fiscal_year', 'total_bids', 'single_bidder_rate', 'avg_bidders_when_awarded']
    print(high_risk[risk_cols].to_string(index=False))

# Low conversion efficiency
low_efficiency = conversion_results[conversion_results['contract_conversion_rate'] < 0.3]
print(f"\nLow conversion efficiency (<30%): {len(low_efficiency)} org-year combinations")
if len(low_efficiency) > 0:
    print("Organizations with low bid success:")
    eff_cols = ['organization', 'fiscal_year', 'total_bids', 'contract_conversion_rate']
    print(low_efficiency[eff_cols].to_string(index=False))

# Overall competition health score
# Score = (1 - single_bidder_rate) * conversion_rate * log(avg_bidders)
conversion_results['competition_score'] = (
    (1 - conversion_results['single_bidder_rate'].fillna(0)) * 
    conversion_results['contract_conversion_rate'] * 
    np.log1p(conversion_results['avg_bidders_when_awarded'].fillna(1))
)

print("\nTop Competitive Organizations (by competition score):")
top_competitive = conversion_results.nlargest(5, 'competition_score')
comp_cols = ['organization', 'competition_score', 'contract_conversion_rate', 'avg_bidders_when_awarded']
print(top_competitive[comp_cols].to_string(index=False))

In [None]:
# Export competition analysis results
conversion_results.to_csv('../output/bid_competition_results.csv', index=False)
if len(timing_results) > 0:
    timing_results.to_csv('../output/procurement_timing_results.csv', index=False)
if len(high_risk) > 0:
    high_risk.to_csv('../output/competition_risk_organizations.csv', index=False)

# Summary statistics
summary = {
    'analysis_date': pd.Timestamp.now().strftime('%Y-%m-%d'),
    'total_bids_analyzed': len(bids),
    'total_contracts_analyzed': len(contracts),
    'overall_contract_conversion_rate': bids['has_contract'].mean(),
    'overall_award_conversion_rate': bids['has_awarded_vendor'].mean(),
    'avg_bidders_overall': awarded_bids['awarded_vendor_count'].mean() if len(awarded_bids) > 0 else 0,
    'single_bidder_rate_overall': (awarded_bids['awarded_vendor_count'] == 1).mean() if len(awarded_bids) > 0 else 0,
    'high_risk_organizations': len(high_risk),
    'median_cycle_days': timing_results['days_bid_to_contract'].median() if len(timing_results) > 0 else None
}

pd.Series(summary).to_csv('../output/competition_summary.csv')

print("Results exported:")
print("  - bid_competition_results.csv")
print("  - procurement_timing_results.csv")
print("  - competition_risk_organizations.csv")
print("  - competition_summary.csv")
print("  - bid_competition_analysis.png")

## Key Findings & Policy Implications

**Competition Health**: [To be filled after running analysis]

**Risk Indicators**: 
- High single-bidder rates indicate potential vendor market concentration
- Low conversion rates may signal process inefficiencies or over-specification

**Policy Recommendations**:
- Target vendor outreach in high single-bidder markets
- Review procurement specifications in low-conversion areas
- Monitor cycle times for process optimization opportunities

**Data Limitations**:
- Bids sample (37.4% coverage) - trends reliable, totals approximate
- Join rate between bids and contracts varies by data availability
- Award vendor counts depend on accurate bid response tracking