# Nevada Procurement: Lifecycle & Timing Analysis

**Objective**: Analyze procurement lifecycle patterns, timing efficiency, and seasonal trends in Nevada government procurement.

**Data**: Nevada procurement contracts and purchase orders silver data

**Key Metrics**:
- Contract lifecycle duration patterns
- Seasonal procurement trends
- Processing time efficiency
- Fiscal year timing patterns

**Coverage**: Full silver dataset for comprehensive cycle analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 8)

print("Setup complete")

In [None]:
# Load silver data for lifecycle analysis
conn = duckdb.connect()

# Load contracts with timing data
contracts_query = """
SELECT 
    contract_id,
    vendor_name,
    organization,
    fiscal_year_begin,
    begin_date,
    end_date,
    contract_duration_days,
    dollars_spent_to_date,
    contract_status
FROM read_parquet("../../data/silver/contracts/version=v0.3.0/*/data.parquet")
WHERE contract_id IS NOT NULL
"""

contracts = conn.execute(contracts_query).df()
print(f"Loaded {len(contracts):,} contract records")

# Load purchase orders
pos_query = """
SELECT 
    po_id,
    vendor_name,
    organization,
    fiscal_year,
    sent_date,
    total_amount,
    status_category
FROM read_parquet("../../data/silver/purchase_orders/version=v0.5.0/*/data.parquet")
WHERE po_id IS NOT NULL
"""

pos = conn.execute(pos_query).df()
print(f"Loaded {len(pos):,} purchase order records")

# Convert dates
contracts['begin_date'] = pd.to_datetime(contracts['begin_date'])
contracts['end_date'] = pd.to_datetime(contracts['end_date'])
pos['sent_date'] = pd.to_datetime(pos['sent_date'])

print(f"\nDate ranges:")
print(f"Contracts: {contracts['begin_date'].min()} to {contracts['begin_date'].max()}")
print(f"POs: {pos['sent_date'].min()} to {pos['sent_date'].max()}")

contracts.head()

In [None]:
# Contract Lifecycle Analysis
print("=== CONTRACT LIFECYCLE PATTERNS ===")

# Duration analysis
duration_data = contracts[contracts['contract_duration_days'] > 0]
if len(duration_data) > 0:
    print(f"Contracts with duration data: {len(duration_data):,}")
    print(f"Average duration: {duration_data['contract_duration_days'].mean():.0f} days ({duration_data['contract_duration_days'].mean()/365:.1f} years)")
    print(f"Median duration: {duration_data['contract_duration_days'].median():.0f} days")
    print(f"90th percentile: {duration_data['contract_duration_days'].quantile(0.9):.0f} days")
    
    # Duration by organization
    org_durations = duration_data.groupby('organization')['contract_duration_days'].agg(['count', 'mean', 'median']).round(0)
    org_durations = org_durations[org_durations['count'] >= 5].sort_values('mean', ascending=False)
    print(f"\nDuration by Organization (top 10):")
    display(org_durations.head(10))

# Status distribution
status_dist = contracts['contract_status'].value_counts()
print(f"\nContract Status Distribution:")
for status, count in status_dist.items():
    print(f"  {status}: {count:,} ({count/len(contracts)*100:.1f}%)")

# Active contracts analysis
today = pd.Timestamp.now()
active_contracts = contracts[
    (contracts['begin_date'] <= today) & 
    (contracts['end_date'] >= today)
]
print(f"\nCurrently active contracts: {len(active_contracts):,}")
if len(active_contracts) > 0:
    print(f"Active contract value: ${active_contracts['dollars_spent_to_date'].sum():,.2f}")

In [None]:
# Seasonal and Fiscal Year Patterns
print("=== SEASONAL PROCUREMENT PATTERNS ===")

# Extract temporal features
contracts['month'] = contracts['begin_date'].dt.month
contracts['quarter'] = contracts['begin_date'].dt.quarter
contracts['year'] = contracts['begin_date'].dt.year

pos['month'] = pos['sent_date'].dt.month
pos['quarter'] = pos['sent_date'].dt.quarter
pos['year'] = pos['sent_date'].dt.year

# Monthly patterns
monthly_contracts = contracts.groupby('month').agg({
    'contract_id': 'count',
    'dollars_spent_to_date': 'sum'
}).round(2)
monthly_contracts.columns = ['contract_count', 'total_spend']

monthly_pos = pos.groupby('month').agg({
    'po_id': 'count',
    'total_amount': 'sum'
}).round(2)
monthly_pos.columns = ['po_count', 'total_amount']

print("Contract Activity by Month:")
display(monthly_contracts)

print("\nPurchase Order Activity by Month:")
display(monthly_pos)

# Fiscal year end patterns (Nevada FY ends June 30)
def get_fy_month(date):
    """Convert calendar month to fiscal year month (July=1, June=12)"""
    return ((date.month - 7) % 12) + 1

contracts['fy_month'] = contracts['begin_date'].apply(get_fy_month)
pos['fy_month'] = pos['sent_date'].apply(get_fy_month)

# FY month 12 = June (end of fiscal year)
fy_end_contracts = contracts[contracts['fy_month'] == 12]
fy_end_pos = pos[pos['fy_month'] == 12]

print(f"\nFiscal Year End Activity (June):")
print(f"Contracts: {len(fy_end_contracts):,} ({len(fy_end_contracts)/len(contracts)*100:.1f}%)")
print(f"POs: {len(fy_end_pos):,} ({len(fy_end_pos)/len(pos)*100:.1f}%)")
print(f"June contract spend: ${fy_end_contracts['dollars_spent_to_date'].sum():,.2f}")
print(f"June PO total: ${fy_end_pos['total_amount'].sum():,.2f}")

In [None]:
# Procurement Timing Patterns
print("=== PROCUREMENT TIMING ANALYSIS ===")

# Year-over-year trends
yearly_trends = contracts.groupby('year').agg({
    'contract_id': 'count',
    'dollars_spent_to_date': 'sum',
    'contract_duration_days': 'mean'
}).round(2)
yearly_trends.columns = ['contract_count', 'total_spend', 'avg_duration']

print("Year-over-Year Contract Trends:")
display(yearly_trends)

# End-of-fiscal-year rush analysis
contracts['days_to_fy_end'] = contracts.apply(
    lambda row: (pd.Timestamp(f"{row['fiscal_year_begin'] + 1}-06-30") - row['begin_date']).days 
    if pd.notna(row['begin_date']) and pd.notna(row['fiscal_year_begin']) else None, 
    axis=1
)

# Contracts started in last 30 days of fiscal year
fy_rush = contracts[
    (contracts['days_to_fy_end'] >= 0) & 
    (contracts['days_to_fy_end'] <= 30)
]

print(f"\nFiscal Year-End Rush (last 30 days):")
print(f"Contracts: {len(fy_rush):,} ({len(fy_rush)/len(contracts)*100:.1f}%)")
print(f"Total value: ${fy_rush['dollars_spent_to_date'].sum():,.2f}")
print(f"Average contract size in rush: ${fy_rush['dollars_spent_to_date'].mean():,.2f}")

In [None]:
# Organizational Procurement Patterns
print("=== ORGANIZATIONAL PROCUREMENT PATTERNS ===")

# Procurement volume and timing by organization
org_patterns = contracts.groupby('organization').agg({
    'contract_id': 'count',
    'dollars_spent_to_date': ['sum', 'mean'],
    'contract_duration_days': 'mean'
}).round(2)

org_patterns.columns = ['contract_count', 'total_spend', 'avg_spend', 'avg_duration']
org_patterns = org_patterns.reset_index()
org_patterns = org_patterns[org_patterns['contract_count'] >= 10]

print(f"Organizations with 10+ contracts: {len(org_patterns)}")

# Top spenders
print("\nTop Spending Organizations:")
top_spenders = org_patterns.nlargest(10, 'total_spend')
display(top_spenders[['organization', 'contract_count', 'total_spend', 'avg_duration']])

# Organizations with longest average durations
long_duration_orgs = org_patterns.nlargest(5, 'avg_duration')
print(f"\nOrganizations with Longest Average Contract Durations:")
display(long_duration_orgs[['organization', 'avg_duration', 'contract_count']])

In [None]:
# Vendor Relationship Cycle Analysis
print("=== VENDOR RELATIONSHIP CYCLES ===")

# Multi-year vendor relationships
vendor_years = contracts.groupby('vendor_name')['year'].nunique().sort_values(ascending=False)
long_term_vendors = vendor_years[vendor_years >= 3]

print(f"Vendors with 3+ year relationships: {len(long_term_vendors)}")
if len(long_term_vendors) > 0:
    print(f"\nTop Long-term Vendor Relationships:")
    for vendor, years in long_term_vendors.head(10).items():
        vendor_contracts = contracts[contracts['vendor_name'] == vendor]
        total_value = vendor_contracts['dollars_spent_to_date'].sum()
        print(f"  {vendor[:40]}: {years} years, ${total_value:,.0f}")

# Contract renewal patterns
vendor_org_pairs = contracts.groupby(['vendor_name', 'organization']).agg({
    'contract_id': 'count',
    'begin_date': ['min', 'max'],
    'dollars_spent_to_date': 'sum'
}).round(2)

vendor_org_pairs.columns = ['contract_count', 'first_contract', 'last_contract', 'total_value']
vendor_org_pairs = vendor_org_pairs.reset_index()

# Multi-contract relationships
repeat_relationships = vendor_org_pairs[vendor_org_pairs['contract_count'] > 1]
print(f"\nRepeat vendor-organization relationships: {len(repeat_relationships)}")

if len(repeat_relationships) > 0:
    repeat_relationships['relationship_span_days'] = (
        repeat_relationships['last_contract'] - repeat_relationships['first_contract']
    ).dt.days
    
    print(f"Average relationship span: {repeat_relationships['relationship_span_days'].mean():.0f} days")
    print(f"\nTop Repeat Relationships by Value:")
    top_relationships = repeat_relationships.nlargest(10, 'total_value')
    rel_cols = ['vendor_name', 'organization', 'contract_count', 'total_value', 'relationship_span_days']
    display(top_relationships[rel_cols])

In [None]:
# Create procurement cycle visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Nevada Procurement: Lifecycle & Timing Analysis', fontsize=16, y=0.98)

# 1. Monthly procurement activity
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
monthly_contracts.index = [month_names[i-1] for i in monthly_contracts.index]
monthly_contracts['contract_count'].plot(kind='bar', ax=axes[0, 0], alpha=0.7)
axes[0, 0].set_xlabel('Month')
axes[0, 0].set_ylabel('Contract Count')
axes[0, 0].set_title('Contract Activity by Month')
axes[0, 0].tick_params(axis='x', rotation=45)

# 2. Contract duration distribution
if len(duration_data) > 0:
    duration_years = duration_data['contract_duration_days'] / 365
    axes[0, 1].hist(duration_years, bins=20, alpha=0.7, edgecolor='black')
    axes[0, 1].axvline(duration_years.median(), color='red', linestyle='--', alpha=0.8, label='Median')
    axes[0, 1].set_xlabel('Contract Duration (Years)')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('Contract Duration Distribution')
    axes[0, 1].legend()

# 3. Fiscal year spending pattern
fy_spending = contracts.groupby('fy_month')['dollars_spent_to_date'].sum()
fy_month_names = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
fy_spending.index = [fy_month_names[i-1] for i in fy_spending.index if 1 <= i <= 12]
fy_spending.plot(kind='bar', ax=axes[1, 0], alpha=0.7, color='green')
axes[1, 0].set_xlabel('Fiscal Year Month')
axes[1, 0].set_ylabel('Total Spend ($)')
axes[1, 0].set_title('Spending by Fiscal Year Month')
axes[1, 0].tick_params(axis='x', rotation=45)

# 4. PO monthly distribution
monthly_pos.index = [month_names[i-1] for i in monthly_pos.index]
monthly_pos['po_count'].plot(kind='bar', ax=axes[1, 1], alpha=0.7, color='blue')
axes[1, 1].set_xlabel('Month')
axes[1, 1].set_ylabel('PO Count')
axes[1, 1].set_title('Purchase Order Activity by Month')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Save the figure
plt.savefig('../output/procurement_cycles_analysis.png', dpi=300, bbox_inches='tight')
print("Charts saved to ../output/procurement_cycles_analysis.png")

In [None]:
# Export procurement cycle analysis results
org_patterns.to_csv('../output/organizational_procurement_patterns.csv', index=False)
yearly_trends.to_csv('../output/yearly_procurement_trends.csv', index=False)

if len(repeat_relationships) > 0:
    repeat_relationships.to_csv('../output/vendor_relationship_cycles.csv', index=False)

# Cycle analysis summary
cycle_summary = {
    'analysis_date': pd.Timestamp.now().strftime('%Y-%m-%d'),
    'contracts_analyzed': len(contracts),
    'pos_analyzed': len(pos),
    'avg_contract_duration_days': duration_data['contract_duration_days'].mean() if len(duration_data) > 0 else None,
    'median_contract_duration_days': duration_data['contract_duration_days'].median() if len(duration_data) > 0 else None,
    'fy_end_rush_contracts': len(fy_rush),
    'fy_end_rush_percentage': len(fy_rush) / len(contracts) * 100,
    'long_term_vendors': len(long_term_vendors),
    'repeat_relationships': len(repeat_relationships) if len(repeat_relationships) > 0 else 0
}

pd.Series(cycle_summary).to_csv('../output/procurement_cycles_summary.csv')

print("Procurement cycle analysis results exported:")
print("  - organizational_procurement_patterns.csv")
print("  - yearly_procurement_trends.csv")
print("  - vendor_relationship_cycles.csv")
print("  - procurement_cycles_summary.csv")
print("  - procurement_cycles_analysis.png")

## Key Findings & Process Insights

**Lifecycle Efficiency**: [To be filled after running analysis]

**Seasonal Patterns**:
- Fiscal year-end activity clustering indicates budget timing pressures
- Monthly procurement patterns show administrative cycles
- Quarterly trends reveal planning and execution rhythms

**Organizational Insights**:
- Duration patterns vary significantly by organization type
- Repeat vendor relationships show procurement consistency

**Process Optimization Opportunities**:
- Fiscal year-end clustering suggests need for better budget planning

**Policy Recommendations**:
- Implement quarterly procurement planning to reduce year-end rush
- Monitor long-term vendor relationships for competition health
- Establish cycle time benchmarks by procurement type