# Pre-Commit Configuration Evaluation

This notebook provides a comprehensive evaluation of the `.pre-commit-config.yaml` file, focusing on:
- 📊 **File Size Checks** - Preventing large file commits
- 🔐 **Secret Detection** - Multiple layers of security scanning
- 📓 **Notebook Stripouts** - Jupyter notebook cleaning
- ⚙️ **Configuration Analysis** - Best practices and recommendations

## Evaluation Criteria
- ✅ **Security**: Does it prevent sensitive data leaks?
- ✅ **Performance**: Are the hooks efficient and fast?
- ✅ **Maintainability**: Is the configuration easy to maintain?
- ✅ **Completeness**: Does it cover all necessary checks?

## 1. Load and Parse the YAML Configuration

First, let's load the pre-commit configuration file and examine its structure.

In [None]:
import yaml
import json
import os
from pathlib import Path
import requests
from datetime import datetime
import re

# Load the pre-commit configuration
config_path = Path("../.pre-commit-config.yaml")

try:
    with open(config_path, 'r', encoding='utf-8') as file:
        precommit_config = yaml.safe_load(file)
    
    print("✅ Successfully loaded .pre-commit-config.yaml")
    print(f"📄 Configuration has {len(precommit_config.get('repos', []))} repository hooks")
    
    # Display basic structure
    print("\n📋 Configuration Overview:")
    for i, repo in enumerate(precommit_config.get('repos', []), 1):
        repo_name = repo['repo'].split('/')[-1] if '/' in repo['repo'] else repo['repo']
        hook_count = len(repo.get('hooks', []))
        print(f"  {i}. {repo_name} ({hook_count} hooks)")
        
except FileNotFoundError:
    print("❌ .pre-commit-config.yaml not found!")
except yaml.YAMLError as e:
    print(f"❌ YAML parsing error: {e}")
except Exception as e:
    print(f"❌ Error loading configuration: {e}")

: 

## 2. Analyze File Size Check Configuration

Let's examine the `check-added-large-files` hook to evaluate its effectiveness in preventing large file commits.

In [None]:
# Find and analyze file size check configuration
file_size_analysis = {
    "found": False,
    "max_size_kb": None,
    "max_size_mb": None,
    "recommendation": "❌ Not configured"
}

for repo in precommit_config.get('repos', []):
    for hook in repo.get('hooks', []):
        if hook.get('id') == 'check-added-large-files':
            file_size_analysis["found"] = True
            
            # Extract maxkb argument
            args = hook.get('args', [])
            for arg in args:
                if '--maxkb=' in arg:
                    kb_value = int(arg.split('=')[1])
                    file_size_analysis["max_size_kb"] = kb_value
                    file_size_analysis["max_size_mb"] = round(kb_value / 1024, 2)
                    break

print("📊 File Size Check Analysis")
print("=" * 40)

if file_size_analysis["found"]:
    print("✅ check-added-large-files hook is configured")
    
    if file_size_analysis["max_size_kb"]:
        size_kb = file_size_analysis["max_size_kb"]
        size_mb = file_size_analysis["max_size_mb"]
        print(f"📏 Maximum file size: {size_kb} KB ({size_mb} MB)")
        
        # Evaluate the size limit
        if size_kb <= 500:  # 500KB
            print("🟢 Excellent: Very strict size limit prevents almost all large files")
            file_size_analysis["recommendation"] = "🟢 Excellent configuration"
        elif size_kb <= 1000:  # 1MB
            print("🟡 Good: Reasonable size limit for most projects")
            file_size_analysis["recommendation"] = "🟡 Good configuration"
        elif size_kb <= 5000:  # 5MB
            print("🟠 Fair: Size limit may allow some large files")
            file_size_analysis["recommendation"] = "🟠 Consider reducing limit"
        else:
            print("🔴 Poor: Size limit is too high")
            file_size_analysis["recommendation"] = "🔴 Reduce size limit"
    else:
        print("⚠️  Default size limit (no --maxkb specified)")
        file_size_analysis["recommendation"] = "⚠️  Add explicit --maxkb argument"
else:
    print("❌ check-added-large-files hook is NOT configured")
    print("🚨 This is a critical security gap!")

print(f"\n💡 Recommendation: {file_size_analysis['recommendation']}")

# Test with current repository files
print(f"\n🔍 Current Repository File Analysis:")
large_files = []
for root, dirs, files in os.walk(".."):
    # Skip .git directory
    if '.git' in root:
        continue
    for file in files:
        file_path = os.path.join(root, file)
        try:
            size_bytes = os.path.getsize(file_path)
            size_kb = size_bytes / 1024
            
            # Check against configured limit or default 1MB
            limit_kb = file_size_analysis.get("max_size_kb", 1000)
            
            if size_kb > limit_kb:
                large_files.append({
                    "path": os.path.relpath(file_path, ".."),
                    "size_kb": round(size_kb, 2),
                    "size_mb": round(size_kb / 1024, 2)
                })
        except (OSError, IOError):
            continue

if large_files:
    print(f"⚠️  Found {len(large_files)} files exceeding size limit:")
    for file_info in large_files[:5]:  # Show first 5
        print(f"  📄 {file_info['path']}: {file_info['size_kb']} KB ({file_info['size_mb']} MB)")
    if len(large_files) > 5:
        print(f"  ... and {len(large_files) - 5} more files")
else:
    print("✅ No files exceed the configured size limit")

## 3. Evaluate Secret Detection Setup

Analyzing the multi-layered secret detection configuration including detect-secrets and GitGuardian shield.

In [None]:
# Analyze secret detection configuration
secret_detection = {
    "detect_private_key": False,
    "detect_aws_credentials": False,
    "detect_secrets": False,
    "ggshield": False,
    "baseline_file": None,
    "exclusions": [],
    "coverage_score": 0
}

print("🔐 Secret Detection Analysis")
print("=" * 40)

for repo in precommit_config.get('repos', []):
    for hook in repo.get('hooks', []):
        hook_id = hook.get('id')
        
        # Check for basic secret detection hooks
        if hook_id == 'detect-private-key':
            secret_detection["detect_private_key"] = True
            print("✅ detect-private-key: Detects SSH/TLS private keys")
            
        elif hook_id == 'detect-aws-credentials':
            secret_detection["detect_aws_credentials"] = True
            print("✅ detect-aws-credentials: Detects AWS credentials")
            
        elif hook_id == 'detect-secrets':
            secret_detection["detect_secrets"] = True
            print("✅ detect-secrets: Advanced secret scanning")
            
            # Check for baseline file
            args = hook.get('args', [])
            for arg in args:
                if '--baseline' in arg and len(args) > args.index(arg) + 1:
                    baseline_file = args[args.index(arg) + 1]
                    secret_detection["baseline_file"] = baseline_file
                    print(f"   📄 Baseline file: {baseline_file}")
                    
                    # Check if baseline file exists
                    baseline_path = Path(f"../{baseline_file}")
                    if baseline_path.exists():
                        print("   ✅ Baseline file exists")
                    else:
                        print("   ❌ Baseline file missing!")
            
            # Check exclusions
            exclude = hook.get('exclude')
            if exclude:
                secret_detection["exclusions"].append(exclude)
                print(f"   🚫 Exclusions: {exclude}")
                
        elif hook_id == 'ggshield':
            secret_detection["ggshield"] = True
            print("✅ ggshield: GitGuardian security scanning")

# Calculate coverage score
coverage_components = [
    secret_detection["detect_private_key"],
    secret_detection["detect_aws_credentials"], 
    secret_detection["detect_secrets"],
    secret_detection["ggshield"]
]
secret_detection["coverage_score"] = sum(coverage_components)

print(f"\n📊 Secret Detection Coverage Score: {secret_detection['coverage_score']}/4")

if secret_detection["coverage_score"] == 4:
    print("🟢 Excellent: Comprehensive multi-layered secret detection")
elif secret_detection["coverage_score"] == 3:
    print("🟡 Good: Strong secret detection with minor gaps")
elif secret_detection["coverage_score"] == 2:
    print("🟠 Fair: Basic secret detection, consider adding more layers")
else:
    print("🔴 Poor: Insufficient secret detection coverage")

# Test secret detection with our .env file
print(f"\n🧪 Testing Secret Detection with .env file:")
env_file_path = Path("../.env")
if env_file_path.exists():
    print("✅ .env file exists (contains fake secrets for testing)")
    
    # Check if .env would be caught by gitignore
    gitignore_path = Path("../.gitignore") 
    if gitignore_path.exists():
        with open(gitignore_path, 'r') as f:
            gitignore_content = f.read()
            if '.env' in gitignore_content:
                print("✅ .env file is properly ignored by .gitignore")
            else:
                print("❌ .env file is NOT in .gitignore!")
    
    # Sample some content to show what would be detected
    with open(env_file_path, 'r') as f:
        env_content = f.read()
        
    # Count potential secrets
    api_key_patterns = [
        r'API_KEY\s*=\s*[^\s]+',
        r'SECRET\s*=\s*[^\s]+', 
        r'TOKEN\s*=\s*[^\s]+',
        r'PASSWORD\s*=\s*[^\s]+'
    ]
    
    total_secrets = 0
    for pattern in api_key_patterns:
        matches = re.findall(pattern, env_content, re.IGNORECASE)
        total_secrets += len(matches)
    
    print(f"🎯 Found {total_secrets} potential secrets in .env file")
    print("   These would be caught by secret detection hooks if not properly ignored")
    
else:
    print("❌ .env file not found for testing")

print(f"\n💡 Recommendations:")
if not secret_detection["detect_secrets"]:
    print("   🔸 Add detect-secrets for comprehensive scanning")
if not secret_detection["ggshield"]:
    print("   🔸 Add ggshield for GitGuardian integration")
if secret_detection["baseline_file"] and not Path(f"../{secret_detection['baseline_file']}").exists():
    print("   🔸 Create the missing baseline file")
if secret_detection["coverage_score"] == 4:
    print("   🟢 Secret detection configuration is excellent!")

## 4. Review Notebook Stripout Configuration

Examining the nbstripout hook that cleans Jupyter notebooks before committing.

In [None]:
# Analyze notebook stripout configuration
notebook_config = {
    "nbstripout_found": False,
    "repo_info": None,
    "version": None,
    "notebook_count": 0,
    "notebooks_with_outputs": []
}

print("📓 Notebook Stripout Analysis")
print("=" * 40)

# Check for nbstripout configuration
for repo in precommit_config.get('repos', []):
    for hook in repo.get('hooks', []):
        if hook.get('id') == 'nbstripout':
            notebook_config["nbstripout_found"] = True
            notebook_config["repo_info"] = repo['repo']
            notebook_config["version"] = repo.get('rev', 'Unknown')
            print("✅ nbstripout hook is configured")
            print(f"   📦 Repository: {repo['repo']}")
            print(f"   🏷️  Version: {repo.get('rev', 'Not specified')}")
            break

if not notebook_config["nbstripout_found"]:
    print("❌ nbstripout hook is NOT configured")
    print("   🚨 Jupyter notebooks may contain outputs and metadata!")

# Scan for notebook files in the project
print(f"\n🔍 Scanning for Jupyter notebooks:")
notebook_files = []
for root, dirs, files in os.walk(".."):
    if '.git' in root:
        continue
    for file in files:
        if file.endswith('.ipynb'):
            notebook_path = os.path.join(root, file)
            rel_path = os.path.relpath(notebook_path, "..")
            notebook_files.append(rel_path)

notebook_config["notebook_count"] = len(notebook_files)

if notebook_files:
    print(f"📊 Found {len(notebook_files)} Jupyter notebook(s):")
    for nb_path in notebook_files:
        print(f"   📄 {nb_path}")
        
        # Check if notebook has outputs
        try:
            full_path = Path(f"../{nb_path}")
            with open(full_path, 'r', encoding='utf-8') as f:
                nb_content = json.load(f)
                
            has_outputs = False
            output_count = 0
            
            for cell in nb_content.get('cells', []):
                if cell.get('outputs'):
                    has_outputs = True
                    output_count += len(cell['outputs'])
                if cell.get('execution_count'):
                    has_outputs = True
                    
            if has_outputs:
                notebook_config["notebooks_with_outputs"].append({
                    "path": nb_path,
                    "output_count": output_count
                })
                print(f"      ⚠️  Has {output_count} outputs/execution data")
            else:
                print(f"      ✅ Clean (no outputs)")
                
        except Exception as e:
            print(f"      ❌ Error reading notebook: {e}")
else:
    print("   ℹ️  No Jupyter notebooks found")

# Evaluate configuration effectiveness
print(f"\n📊 Notebook Configuration Assessment:")

if notebook_config["nbstripout_found"]:
    if len(notebook_config["notebooks_with_outputs"]) == 0:
        print("🟢 Excellent: nbstripout is configured and all notebooks are clean")
        recommendation = "🟢 Perfect configuration"
    else:
        print(f"🟡 Good: nbstripout is configured but {len(notebook_config['notebooks_with_outputs'])} notebook(s) have outputs")
        print("   💡 Run nbstripout manually on existing notebooks to clean them")
        recommendation = "🟡 Clean existing notebooks"
else:
    if notebook_config["notebook_count"] > 0:
        print("🔴 Critical: Notebooks found but nbstripout not configured!")
        recommendation = "🔴 Add nbstripout immediately"
    else:
        print("🟢 Good: No notebooks found, nbstripout not needed currently")
        recommendation = "🟢 Add nbstripout when notebooks are added"

print(f"\n💡 Recommendation: {recommendation}")

# Show benefits of nbstripout
print(f"\n🎯 Benefits of nbstripout:")
print("   ✅ Removes sensitive output data")
print("   ✅ Reduces repository size")
print("   ✅ Cleaner diffs and merges")
print("   ✅ Prevents execution metadata conflicts")
print("   ✅ Improves collaboration")

if notebook_config["notebooks_with_outputs"]:
    total_outputs = sum(nb["output_count"] for nb in notebook_config["notebooks_with_outputs"])
    print(f"\n⚠️  Current risk: {total_outputs} outputs across {len(notebook_config['notebooks_with_outputs'])} notebooks")
    print("   These could contain sensitive data or large binary content")

## 5. Check Hook Dependencies and Versions

Verifying that all hooks are using current versions and checking for potential compatibility issues.

In [None]:
# Check hook versions and dependencies
version_analysis = {
    "repositories": [],
    "outdated_count": 0,
    "security_issues": [],
    "total_hooks": 0
}

print("🔍 Hook Version Analysis")
print("=" * 40)

# Known latest versions (as of 2025) - in real scenario, this would be fetched from APIs
known_latest = {
    "pre-commit-hooks": "v4.6.0",
    "detect-secrets": "v1.4.0", 
    "nbstripout": "0.7.1",
    "black": "24.4.2",
    "flake8": "7.0.0",
    "isort": "5.13.2",
    "mirrors-eslint": "v9.5.0",
    "mirrors-prettier": "v4.0.0-alpha.8",
    "ggshield": "v1.25.0"
}

for repo_config in precommit_config.get('repos', []):
    repo_url = repo_config['repo']
    repo_name = repo_url.split('/')[-1] if '/' in repo_url else repo_url
    current_version = repo_config.get('rev', 'Not specified')
    
    # Check if version is specified
    if current_version == 'Not specified':
        status = "⚠️  No version specified"
        recommendation = "Specify explicit version"
    else:
        latest_version = known_latest.get(repo_name, "Unknown")
        if latest_version != "Unknown":
            if current_version == latest_version:
                status = "✅ Up to date"
                recommendation = "Good"
            else:
                status = "🟡 Potentially outdated"
                recommendation = f"Consider updating to {latest_version}"
                version_analysis["outdated_count"] += 1
        else:
            status = "❓ Version unknown"
            recommendation = "Manual check required"
    
    hook_count = len(repo_config.get('hooks', []))
    version_analysis["total_hooks"] += hook_count
    
    version_analysis["repositories"].append({
        "name": repo_name,
        "url": repo_url,
        "current_version": current_version,
        "status": status,
        "recommendation": recommendation,
        "hook_count": hook_count
    })
    
    print(f"📦 {repo_name}")
    print(f"   🏷️  Version: {current_version}")
    print(f"   📊 Status: {status}")
    print(f"   🔧 Hooks: {hook_count}")
    if recommendation != "Good":
        print(f"   💡 {recommendation}")
    print()

# Summary
print(f"📊 Version Summary:")
print(f"   📦 Total repositories: {len(version_analysis['repositories'])}")
print(f"   🔧 Total hooks: {version_analysis['total_hooks']}")
print(f"   🟡 Potentially outdated: {version_analysis['outdated_count']}")

# Check for security considerations
print(f"\n🔒 Security Analysis:")

security_checks = [
    {
        "name": "Version pinning",
        "check": all(repo["current_version"] != "Not specified" for repo in version_analysis["repositories"]),
        "importance": "High",
        "description": "All repositories should have pinned versions"
    },
    {
        "name": "Recent versions", 
        "check": version_analysis["outdated_count"] <= 1,
        "importance": "Medium",
        "description": "Most hooks should be reasonably current"
    },
    {
        "name": "Security-focused hooks",
        "check": any("detect" in repo["name"] or "ggshield" in repo["name"] for repo in version_analysis["repositories"]),
        "importance": "High", 
        "description": "Security scanning hooks should be present"
    }
]

for check in security_checks:
    status = "✅" if check["check"] else "❌"
    print(f"   {status} {check['name']} ({check['importance']} priority)")
    print(f"      {check['description']}")

# Performance considerations
print(f"\n⚡ Performance Considerations:")
heavy_hooks = ['ggshield', 'eslint', 'black', 'flake8']
configured_heavy = [repo["name"] for repo in version_analysis["repositories"] if repo["name"] in heavy_hooks]

if configured_heavy:
    print(f"   🐌 Heavy hooks detected: {', '.join(configured_heavy)}")
    print("   💡 Consider using 'stages: [manual]' for expensive hooks during development")
else:
    print("   ⚡ No particularly heavy hooks detected")

# CI configuration check
ci_config = precommit_config.get('ci', {})
if ci_config:
    print(f"\n🚀 CI Configuration:")
    print(f"   🔄 Auto-update: {'✅' if ci_config.get('autoupdate_schedule') else '❌'}")
    print(f"   🔧 Auto-fix PRs: {'✅' if ci_config.get('autofix_prs') else '❌'}")
    print(f"   📅 Update schedule: {ci_config.get('autoupdate_schedule', 'Not set')}")
else:
    print(f"\n❌ No CI configuration found")
    print("   💡 Consider adding CI configuration for automated updates")

## 6. Validate YAML Structure and Syntax

Performing structural validation and checking for proper pre-commit configuration syntax.

In [None]:
# Validate YAML structure and syntax
validation_results = {
    "yaml_valid": False,
    "structure_valid": False,
    "required_fields": [],
    "optional_fields": [],
    "issues": [],
    "score": 0
}

print("🔧 YAML Structure Validation")
print("=" * 40)

# 1. YAML Syntax Validation
try:
    # Re-load to ensure it's still valid
    with open(config_path, 'r', encoding='utf-8') as file:
        config_reloaded = yaml.safe_load(file)
    validation_results["yaml_valid"] = True
    print("✅ YAML syntax is valid")
except yaml.YAMLError as e:
    validation_results["issues"].append(f"YAML syntax error: {e}")
    print(f"❌ YAML syntax error: {e}")
except Exception as e:
    validation_results["issues"].append(f"File error: {e}")
    print(f"❌ File error: {e}")

# 2. Pre-commit Structure Validation
if validation_results["yaml_valid"]:
    print(f"\n📋 Structure Validation:")
    
    # Check required top-level fields
    required_top_level = ['repos']
    optional_top_level = ['ci', 'default_language_version', 'default_stages', 'files', 'exclude']
    
    for field in required_top_level:
        if field in precommit_config:
            validation_results["required_fields"].append(field)
            print(f"   ✅ Required field '{field}' present")
        else:
            validation_results["issues"].append(f"Missing required field: {field}")
            print(f"   ❌ Missing required field: {field}")
    
    for field in optional_top_level:
        if field in precommit_config:
            validation_results["optional_fields"].append(field)
            print(f"   ℹ️  Optional field '{field}' present")
    
    # 3. Repository Structure Validation
    print(f"\n📦 Repository Configuration Validation:")
    
    if 'repos' in precommit_config and isinstance(precommit_config['repos'], list):
        for i, repo in enumerate(precommit_config['repos']):
            print(f"\n   Repository {i+1}:")
            
            # Check required repo fields
            required_repo_fields = ['repo', 'rev', 'hooks']
            for field in required_repo_fields:
                if field in repo:
                    print(f"      ✅ {field}: {repo[field] if field != 'hooks' else f'{len(repo[field])} hooks'}")
                else:
                    issue = f"Repository {i+1} missing required field: {field}"
                    validation_results["issues"].append(issue)
                    print(f"      ❌ Missing {field}")
            
            # Validate hooks structure
            if 'hooks' in repo and isinstance(repo['hooks'], list):
                for j, hook in enumerate(repo['hooks']):
                    if 'id' not in hook:
                        issue = f"Repository {i+1}, Hook {j+1} missing required 'id' field"
                        validation_results["issues"].append(issue)
                        print(f"      ❌ Hook {j+1} missing 'id'")
                    else:
                        print(f"      ✅ Hook: {hook['id']}")
            else:
                issue = f"Repository {i+1} 'hooks' field must be a list"
                validation_results["issues"].append(issue)
                print(f"      ❌ Invalid hooks structure")
    
    # 4. CI Configuration Validation (if present)
    if 'ci' in precommit_config:
        print(f"\n🚀 CI Configuration Validation:")
        ci_config = precommit_config['ci']
        
        recommended_ci_fields = [
            'autofix_commit_msg', 'autofix_prs', 'autoupdate_branch',
            'autoupdate_commit_msg', 'autoupdate_schedule'
        ]
        
        for field in recommended_ci_fields:
            if field in ci_config:
                print(f"   ✅ {field}: {ci_config[field]}")
            else:
                print(f"   ℹ️  Optional CI field '{field}' not configured")

# 5. Best Practices Check
print(f"\n🎯 Best Practices Validation:")

best_practices = [
    {
        "name": "Version pinning",
        "check": all('rev' in repo and repo['rev'] != 'HEAD' for repo in precommit_config.get('repos', [])),
        "description": "All repositories should have pinned versions (not HEAD)"
    },
    {
        "name": "Hook organization",
        "check": len(precommit_config.get('repos', [])) <= 10,
        "description": "Reasonable number of repositories (not overwhelming)"
    },
    {
        "name": "Security hooks present",
        "check": any('detect' in str(repo).lower() or 'secret' in str(repo).lower() 
                    for repo in precommit_config.get('repos', [])),
        "description": "Security-related hooks should be configured"
    },
    {
        "name": "File processing hooks",
        "check": any('nbstripout' in str(repo) or 'trailing-whitespace' in str(repo)
                    for repo in precommit_config.get('repos', [])),
        "description": "File cleanup hooks should be present"
    }
]

passed_practices = 0
for practice in best_practices:
    status = "✅" if practice["check"] else "⚠️ "
    print(f"   {status} {practice['name']}")
    print(f"      {practice['description']}")
    if practice["check"]:
        passed_practices += 1

validation_results["score"] = (
    (1 if validation_results["yaml_valid"] else 0) + 
    (1 if len(validation_results["required_fields"]) > 0 else 0) +
    (1 if len(validation_results["issues"]) == 0 else 0) +
    (passed_practices / len(best_practices))
) / 4 * 100

print(f"\n📊 Validation Score: {validation_results['score']:.1f}/100")

if validation_results["score"] >= 90:
    print("🟢 Excellent: Configuration follows best practices")
elif validation_results["score"] >= 75:
    print("🟡 Good: Minor improvements possible")
elif validation_results["score"] >= 50:
    print("🟠 Fair: Several issues to address")
else:
    print("🔴 Poor: Significant problems with configuration")

if validation_results["issues"]:
    print(f"\n⚠️  Issues to address:")
    for issue in validation_results["issues"]:
        print(f"   🔸 {issue}")

validation_results["structure_valid"] = len(validation_results["issues"]) == 0

## 7. Generate Configuration Report

Creating a comprehensive report with recommendations, security assessment, and suggested improvements.

In [None]:
# Generate comprehensive configuration report
report = {
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "overall_score": 0,
    "categories": {},
    "recommendations": [],
    "security_rating": "",
    "summary": ""
}

print("📊 COMPREHENSIVE PRE-COMMIT CONFIGURATION REPORT")
print("=" * 60)
print(f"🕐 Generated: {report['timestamp']}")
print(f"📁 Configuration: .pre-commit-config.yaml")

# Calculate category scores
category_scores = {
    "File Size Control": {
        "score": 85 if file_size_analysis["found"] and file_size_analysis["max_size_kb"] <= 1000 else 
                40 if file_size_analysis["found"] else 0,
        "details": f"{'✅' if file_size_analysis['found'] else '❌'} File size limit: {file_size_analysis.get('max_size_kb', 'Not set')} KB"
    },
    "Secret Detection": {
        "score": (secret_detection["coverage_score"] / 4) * 100,
        "details": f"🔐 {secret_detection['coverage_score']}/4 security layers active"
    },
    "Notebook Management": {
        "score": 100 if notebook_config["nbstripout_found"] and len(notebook_config["notebooks_with_outputs"]) == 0 else
                75 if notebook_config["nbstripout_found"] else
                50 if notebook_config["notebook_count"] == 0 else 0,
        "details": f"📓 nbstripout: {'✅' if notebook_config['nbstripout_found'] else '❌'}, Clean notebooks: {notebook_config['notebook_count'] - len(notebook_config['notebooks_with_outputs'])}/{notebook_config['notebook_count']}"
    },
    "Version Management": {
        "score": max(0, 100 - (version_analysis["outdated_count"] * 20)),
        "details": f"📦 {len(version_analysis['repositories']) - version_analysis['outdated_count']}/{len(version_analysis['repositories'])} repos up-to-date"
    },
    "Configuration Quality": {
        "score": validation_results["score"],
        "details": f"🔧 Structure: {'✅' if validation_results['structure_valid'] else '❌'}, Issues: {len(validation_results['issues'])}"
    }
}

# Display category breakdown
print(f"\n📋 CATEGORY ANALYSIS:")
total_score = 0
for category, data in category_scores.items():
    score = data["score"]
    total_score += score
    
    if score >= 90:
        grade = "🟢 A"
    elif score >= 80:
        grade = "🟡 B" 
    elif score >= 70:
        grade = "🟠 C"
    elif score >= 60:
        grade = "🔴 D"
    else:
        grade = "🚨 F"
    
    print(f"   {category:<20} {grade} ({score:3.0f}%) - {data['details']}")
    report["categories"][category] = {"score": score, "grade": grade, "details": data["details"]}

report["overall_score"] = total_score / len(category_scores)

# Overall Assessment
print(f"\n🎯 OVERALL ASSESSMENT:")
print(f"   📊 Overall Score: {report['overall_score']:.1f}/100")

if report["overall_score"] >= 90:
    overall_grade = "🟢 EXCELLENT"
    report["summary"] = "Outstanding configuration with comprehensive security and quality controls"
elif report["overall_score"] >= 80:
    overall_grade = "🟡 GOOD"
    report["summary"] = "Strong configuration with minor areas for improvement"
elif report["overall_score"] >= 70:
    overall_grade = "🟠 FAIR"
    report["summary"] = "Decent configuration but several important improvements needed"
elif report["overall_score"] >= 60:
    overall_grade = "🔴 POOR"
    report["summary"] = "Significant configuration issues that should be addressed"
else:
    overall_grade = "🚨 CRITICAL"
    report["summary"] = "Major configuration problems requiring immediate attention"

print(f"   🏆 Grade: {overall_grade}")
print(f"   📝 {report['summary']}")

# Security Rating
security_factors = [
    secret_detection["coverage_score"] >= 3,
    file_size_analysis["found"],
    '.env' in str(precommit_config),  # Check if env files are being handled
    validation_results["yaml_valid"]
]

security_score = sum(security_factors) / len(security_factors) * 100

if security_score >= 75:
    report["security_rating"] = "🔒 HIGH"
elif security_score >= 50:
    report["security_rating"] = "🟡 MEDIUM"
else:
    report["security_rating"] = "🚨 LOW"

print(f"\n🔒 SECURITY RATING: {report['security_rating']} ({security_score:.0f}%)")

# Generate Recommendations
print(f"\n💡 PRIORITY RECOMMENDATIONS:")

priority_recs = []

if not file_size_analysis["found"]:
    priority_recs.append("🔴 HIGH: Add check-added-large-files hook immediately")
elif not file_size_analysis["max_size_kb"]:
    priority_recs.append("🟡 MEDIUM: Add explicit --maxkb argument to file size check")

if secret_detection["coverage_score"] < 3:
    priority_recs.append("🔴 HIGH: Enhance secret detection with more security layers")

if notebook_config["notebook_count"] > 0 and not notebook_config["nbstripout_found"]:
    priority_recs.append("🟠 MEDIUM: Add nbstripout for Jupyter notebook cleaning")

if len(notebook_config["notebooks_with_outputs"]) > 0:
    priority_recs.append("🟡 LOW: Clean existing notebook outputs manually")

if version_analysis["outdated_count"] > 2:
    priority_recs.append("🟡 MEDIUM: Update outdated hook versions")

if not validation_results["structure_valid"]:
    priority_recs.append("🔴 HIGH: Fix YAML configuration issues")

if not priority_recs:
    priority_recs.append("🟢 Configuration is excellent - no critical issues!")

for i, rec in enumerate(priority_recs[:5], 1):  # Top 5 recommendations
    print(f"   {i}. {rec}")
    report["recommendations"].append(rec)

# Implementation Steps
print(f"\n🛠️  NEXT STEPS:")
print("   1. Address HIGH priority recommendations first")
print("   2. Test configuration with: `pre-commit run --all-files`")
print("   3. Install hooks: `pre-commit install`")
print("   4. Monitor performance and adjust as needed")
print("   5. Schedule regular configuration reviews")

# Save report summary
print(f"\n💾 Report complete! Key metrics:")
print(f"   • {len(precommit_config.get('repos', []))} repositories configured")
print(f"   • {version_analysis['total_hooks']} total hooks")
print(f"   • {secret_detection['coverage_score']}/4 security layers")
print(f"   • {len(priority_recs)} recommendations")

print(f"\n🎉 Pre-commit configuration evaluation complete!")
print("   Use this analysis to improve your repository's quality and security.")