# COTTON Setup and Requirements

## Installation Guide and Environment Configuration

This notebook provides comprehensive setup instructions for the COTTON framework implementation.

### What You'll Learn:
1. Dependency installation and management
2. Environment validation and configuration
3. API setup for external services
4. Hardware requirements and optimization
5. Quick start guide and troubleshooting

## 1. Requirements.txt - Core Dependencies

In [None]:
# Core Dependencies for COTTON Implementation
requirements_txt = """
# Core Dependencies
torch>=2.0.0
transformers>=4.35.0
accelerate>=0.24.0
peft>=0.6.0
bitsandbytes>=0.41.0
datasets>=2.14.0

# Data Processing
pandas>=1.5.0
numpy>=1.24.0
scikit-learn>=1.3.0

# Evaluation Metrics
rouge-score>=0.1.2
nltk>=3.8.0
sacrebleu>=2.3.0

# LangChain & LangGraph
langchain>=0.1.0
langgraph>=0.0.30
langchain-openai>=0.0.5
langchain-anthropic>=0.1.0

# API Clients
openai>=1.0.0
anthropic>=0.8.0

# Visualization
matplotlib>=3.7.0
seaborn>=0.12.0
plotly>=5.15.0

# Utilities
tqdm>=4.65.0
python-dotenv>=1.0.0
requests>=2.31.0
jupyter>=1.0.0
ipywidgets>=8.0.0

# Optional: For enhanced features
faiss-cpu>=1.7.4
sentence-transformers>=2.2.0
"""

# Save requirements.txt
with open('requirements.txt', 'w') as f:
    f.write(requirements_txt.strip())

print("✅ Requirements.txt created successfully!")
print("\n📦 To install all dependencies, run:")
print("   pip install -r requirements.txt")

## 2. Automated Dependency Installation

In [None]:
import subprocess
import sys
import os

def install_package(package):
    """Install a package using pip"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ Successfully installed {package}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to install {package}: {e}")
        return False

def setup_environment():
    """Setup the complete environment for COTTON"""
    print("🚀 Setting up COTTON environment...")
    
    # Core packages
    core_packages = [
        "torch>=2.0.0",
        "transformers>=4.35.0", 
        "accelerate>=0.24.0",
        "peft>=0.6.0",
        "bitsandbytes>=0.41.0",
        "datasets>=2.14.0"
    ]
    
    # LangChain packages
    langchain_packages = [
        "langchain>=0.1.0",
        "langgraph>=0.0.30", 
        "langchain-openai>=0.0.5",
        "langchain-anthropic>=0.1.0"
    ]
    
    # Data science packages
    data_packages = [
        "pandas>=1.5.0",
        "numpy>=1.24.0",
        "scikit-learn>=1.3.0",
        "matplotlib>=3.7.0",
        "seaborn>=0.12.0"
    ]
    
    # Evaluation packages
    eval_packages = [
        "rouge-score>=0.1.2",
        "nltk>=3.8.0",
        "sacrebleu>=2.3.0"
    ]
    
    # API packages
    api_packages = [
        "openai>=1.0.0",
        "anthropic>=0.8.0"
    ]
    
    # Utility packages
    util_packages = [
        "tqdm>=4.65.0",
        "python-dotenv>=1.0.0",
        "requests>=2.31.0",
        "jupyter>=1.0.0"
    ]
    
    all_packages = (
        core_packages + langchain_packages + data_packages + 
        eval_packages + api_packages + util_packages
    )
    
    failed_packages = []
    
    for package in all_packages:
        if not install_package(package):
            failed_packages.append(package)
    
    print(f"\n📊 Installation Summary:")
    print(f"   ✅ Successfully installed: {len(all_packages) - len(failed_packages)}/{len(all_packages)}")
    
    if failed_packages:
        print(f"   ❌ Failed packages: {failed_packages}")
        print("   💡 Try installing failed packages manually")
    
    # Download NLTK data
    try:
        import nltk
        nltk.download('punkt', quiet=True)
        print("✅ NLTK data downloaded")
    except:
        print("⚠️  Could not download NLTK data")
    
    print("\n🎉 Environment setup complete!")
    return len(failed_packages) == 0

# Uncomment the line below to run automatic installation
# setup_environment()

## 3. Configuration Templates

In [None]:
import os
from dataclasses import dataclass
from typing import Optional

@dataclass
class APIConfig:
    """API configuration for external services"""
    
    # OpenAI API (for multi-agent cleaning and evaluation)
    OPENAI_API_KEY: Optional[str] = os.getenv("OPENAI_API_KEY")
    
    # Anthropic API (for Claude integration)
    ANTHROPIC_API_KEY: Optional[str] = os.getenv("ANTHROPIC_API_KEY")
    
    # Hugging Face API (for model downloads)
    HF_TOKEN: Optional[str] = os.getenv("HF_TOKEN")
    
    def validate(self):
        """Validate API configuration"""
        warnings = []
        
        if not self.OPENAI_API_KEY:
            warnings.append("⚠️  OpenAI API key not found - multi-agent cleaning will use fallback methods")
        
        if not self.ANTHROPIC_API_KEY:
            warnings.append("⚠️  Anthropic API key not found - Claude enhancement will be simulated")
        
        if not self.HF_TOKEN:
            warnings.append("⚠️  Hugging Face token not found - may have issues downloading gated models")
        
        for warning in warnings:
            print(warning)
        
        return len(warnings) == 0

@dataclass
class ModelConfig:
    """Model configuration for COTTON training"""
    
    # Base model selection
    BASE_MODEL: str = "codellama/CodeLlama-7b-hf"
    
    # Alternative models (uncomment to use)
    # BASE_MODEL: str = "microsoft/CodeGPT-small-py"
    # BASE_MODEL: str = "Salesforce/codet5p-770m"
    
    # Hardware configuration
    DEVICE: str = "cuda" if os.system("nvidia-smi") == 0 else "cpu"
    MIXED_PRECISION: bool = True
    USE_QUANTIZATION: bool = True
    
    # Memory optimization
    GRADIENT_CHECKPOINTING: bool = True
    DATALOADER_NUM_WORKERS: int = 4
    
    def validate(self):
        """Validate model configuration"""
        if self.DEVICE == "cpu":
            print("⚠️  Using CPU - training will be very slow")
            self.USE_QUANTIZATION = False
        
        if not self.USE_QUANTIZATION and self.DEVICE == "cuda":
            print("💡 Quantization disabled - ensure sufficient GPU memory")
        
        return True

# Test configuration
api_config = APIConfig()
model_config = ModelConfig()

print("🔧 Configuration Validation:")
print("=" * 40)
api_config.validate()
model_config.validate()
print("✅ Configuration validation complete")

## 4. Environment Variables Template

In [None]:
# Create .env template file
env_template = """
# COTTON Environment Variables
# Copy this to .env and fill in your values

# API Keys
OPENAI_API_KEY=your_openai_api_key_here
ANTHROPIC_API_KEY=your_anthropic_api_key_here
HF_TOKEN=your_huggingface_token_here

# Model Configuration  
BASE_MODEL=codellama/CodeLlama-7b-hf
DEVICE=cuda
USE_QUANTIZATION=true

# Data Paths
DATA_PATH=./data/codecot-9k
OUTPUT_PATH=./outputs
CACHE_DIR=./cache

# Training Configuration
BATCH_SIZE=1
LEARNING_RATE=1e-4
MAX_EPOCHS=20
EARLY_STOPPING=5

# Evaluation Configuration
EVAL_BATCH_SIZE=4
TEMPERATURE=0.1
MAX_NEW_TOKENS=256
"""

# Save .env template
with open('.env.template', 'w') as f:
    f.write(env_template.strip())

print("✅ .env template created successfully!")
print("\n🔑 API Setup Instructions:")
print("   1. Copy .env.template to .env")
print("   2. Replace placeholder values with your actual API keys")
print("   3. Ensure .env is in your .gitignore to keep keys secure")

# Load environment variables if .env exists
try:
    from dotenv import load_dotenv
    if os.path.exists('.env'):
        load_dotenv()
        print("\n✅ Environment variables loaded from .env")
    else:
        print("\n💡 Create .env file from template to use API keys")
except ImportError:
    print("\n⚠️  python-dotenv not installed. Install with: pip install python-dotenv")

## 5. System Validation and Hardware Check

In [None]:
import sys
import platform
import psutil
import shutil

def validate_setup():
    """Validate that all components are properly set up"""
    
    print("🔍 Validating COTTON setup...")
    print("=" * 50)
    
    issues = []
    
    # System Information
    print(f"🖥️  System: {platform.system()} {platform.release()}")
    print(f"🐍 Python: {sys.version}")
    
    # Check Python version
    if sys.version_info < (3, 8):
        issues.append("❌ Python 3.8+ required")
    else:
        print("✅ Python version OK")
    
    # Check memory
    memory_gb = psutil.virtual_memory().total / (1024**3)
    print(f"🧠 RAM: {memory_gb:.1f} GB")
    if memory_gb < 8:
        issues.append("⚠️  Low RAM - recommend 16GB+ for training")
    
    # Check core dependencies
    required_packages = [
        "torch", "transformers", "peft", "datasets",
        "langchain", "langgraph", "pandas", "numpy"
    ]
    
    print("\n📦 Package Availability:")
    for package in required_packages:
        try:
            __import__(package)
            print(f"✅ {package} available")
        except ImportError:
            issues.append(f"❌ {package} not installed")
            print(f"❌ {package} not installed")
    
    # Check GPU availability
    print("\n🎮 GPU Information:")
    try:
        import torch
        if torch.cuda.is_available():
            gpu_count = torch.cuda.device_count()
            for i in range(gpu_count):
                gpu_name = torch.cuda.get_device_name(i)
                gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024**3)
                print(f"✅ GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")
            
            if gpu_memory < 8:
                issues.append("⚠️  GPU memory < 8GB - may need quantization")
        else:
            print("⚠️  No GPU available - using CPU (training will be slow)")
            issues.append("⚠️  No GPU detected")
    except Exception as e:
        issues.append(f"❌ Could not check GPU status: {e}")
    
    # Check disk space
    print("\n💾 Storage Information:")
    try:
        free_space = shutil.disk_usage(".").free // (1024**3)  # GB
        print(f"💾 Free space: {free_space} GB")
        if free_space < 20:
            issues.append("⚠️  Low disk space - recommend 50GB+ for models and data")
    except Exception as e:
        print(f"⚠️  Could not check disk space: {e}")
    
    # Check API keys (optional)
    print("\n🔑 API Configuration:")
    api_config = APIConfig()
    api_config.validate()
    
    # Summary
    print("\n" + "=" * 50)
    if issues:
        print(f"❌ Setup Issues Found:")
        for issue in issues:
            print(f"   {issue}")
        print("\n💡 Please resolve these issues before proceeding")
        return False
    else:
        print("🎉 Setup validation complete - ready to run COTTON!")
        return True

# Run validation
validation_result = validate_setup()

## 6. Quick Start Guide

In [None]:
def print_quick_start_guide():
    """Print comprehensive quick start guide"""
    
    guide = """
    🚀 COTTON QUICK START GUIDE
    ============================
    
    1. 📦 INSTALLATION
       pip install -r requirements.txt
       # or run: python install_dependencies.py
    
    2. 🔑 API SETUP (Optional but recommended)
       - Copy .env.template to .env
       - Add your API keys:
         * OpenAI API key (for multi-agent cleaning)
         * Anthropic API key (for Claude integration)
         * Hugging Face token (for model access)
    
    3. 📊 DATA PREPARATION
       - Use provided sample data, or
       - Clone COTTON repository: https://github.com/NTDXYG/COTTON
       - Place CodeCoT-9k dataset in ./data/codecot-9k/
    
    4. 🏃 RUNNING THE NOTEBOOK
       jupyter notebook 1.6.2.1_COTTON_Implementation.ipynb
       
       OR run sections individually:
       
       # Load and explore data
       dataset = CodeCoTDataset()
       data = dataset.load_from_github()
       
       # Apply multi-agent cleaning
       cleaner = MultiAgentCleaner(api_key="your_openai_key")
       cleaned_data = cleaner.clean_dataset(data)
       
       # Setup and train COTTON model
       trainer = COTTONTrainer(config)
       model, tokenizer = trainer.setup_model()
       # trainer.train(train_data, val_data)  # Uncomment for actual training
       
       # Generate CoTs
       inference = COTTONInference(model, tokenizer, config)
       cots = inference.batch_generate_cots(problems)
       
       # Evaluate with LangChain
       evaluator = COTTONEvaluator(api_key="your_openai_key")
       results = evaluator.batch_evaluate(cots, references)
    
    5. 🔧 CUSTOMIZATION
       - Modify COTTONConfig for different models/settings
       - Adjust multi-agent prompts for your domain
       - Extend evaluation metrics
       - Add custom CoT templates
    
    6. 🚀 DEPLOYMENT
       - Export trained model: trainer.save_model("./cotton_final")
       - Use COTTONInference for production inference
       - Integrate with your development workflow
    
    📝 EXAMPLE USAGE:
    
    ```python
    # Simple CoT generation
    problem = "def find_max(numbers): ..."
    cot = inference.generate_cot(problem)
    print(f"Generated CoT: {cot}")
    
    # LangChain evaluation
    evaluation = evaluator.evaluate_cot_quality(cot)
    print(f"Quality scores: {evaluation}")
    
    # Claude enhancement
    enhanced = claude_enhancer.enhance_cot_with_claude(problem, cot)
    print(f"Enhanced CoT: {enhanced}")
    ```
    
    🆘 TROUBLESHOOTING:
    
    - CUDA out of memory? Reduce batch_size or enable quantization
    - API errors? Check your API keys and quotas
    - Model download issues? Verify HF_TOKEN and internet connection
    - Evaluation errors? Ensure all dependencies are installed
    
    📚 RESOURCES:
    
    - Original paper: https://arxiv.org/abs/2312.05562
    - GitHub repository: https://github.com/NTDXYG/COTTON
    - LangChain docs: https://python.langchain.com/
    - LangGraph docs: https://langchain-ai.github.io/langgraph/
    
    🎯 NEXT STEPS:
    
    1. Run the complete notebook end-to-end
    2. Experiment with different base models
    3. Try your own code generation problems
    4. Integrate with your existing workflow
    5. Contribute improvements back to the community!
    """
    
    print(guide)
    return guide

# Display quick start guide
quick_start_guide = print_quick_start_guide()

## 7. Hardware Recommendations and Optimization

In [None]:
def print_hardware_recommendations():
    """Print hardware recommendations for different use cases"""
    
    recommendations = """
    🖥️  HARDWARE RECOMMENDATIONS
    ===============================
    
    📊 MINIMUM REQUIREMENTS:
    - CPU: 4+ cores, 2.5GHz+
    - RAM: 8GB (16GB recommended)
    - GPU: Optional, any CUDA-capable GPU
    - Storage: 20GB free space
    - Internet: For model downloads and API calls
    
    🎯 RECOMMENDED SETUPS:
    
    1. DEVELOPMENT & EXPERIMENTATION:
       - CPU: Intel i5/AMD Ryzen 5 or better
       - RAM: 16GB DDR4
       - GPU: RTX 3060/4060 (8GB VRAM) or RTX 3070/4070
       - Storage: 50GB SSD space
       
    2. TRAINING & PRODUCTION:
       - CPU: Intel i7/AMD Ryzen 7 or better
       - RAM: 32GB DDR4
       - GPU: RTX 3090/4090 (24GB VRAM) or A100/H100
       - Storage: 100GB+ NVMe SSD
       
    3. CLOUD ALTERNATIVES:
       - Google Colab Pro/Pro+ (T4/A100 access)
       - AWS EC2 g4dn/p3 instances
       - Azure NC series VMs
       - Vast.ai for cost-effective GPU rental
    
    ⚡ PERFORMANCE OPTIMIZATION TIPS:
    
    1. MEMORY OPTIMIZATION:
       - Enable 4-bit quantization (bitsandbytes)
       - Use gradient checkpointing
       - Reduce batch size if OOM errors
       - Clear GPU cache between runs
       
    2. TRAINING OPTIMIZATION:
       - Use LoRA for parameter-efficient fine-tuning
       - Enable mixed precision training
       - Use data parallelism for multi-GPU setups
       - Optimize dataloader workers
       
    3. INFERENCE OPTIMIZATION:
       - Use greedy decoding for deterministic results
       - Batch multiple requests when possible
       - Cache frequently used prompts
       - Consider model quantization for deployment
    
    💰 COST CONSIDERATIONS:
    
    1. LOCAL DEVELOPMENT:
       - One-time hardware investment
       - No ongoing API costs
       - Full control over data privacy
       
    2. CLOUD USAGE:
       - Pay-per-use model
       - Access to latest hardware
       - Ongoing operational costs
       
    3. HYBRID APPROACH:
       - Local development + cloud training
       - Best of both worlds
       - Cost-effective for most users
    """
    
    print(recommendations)
    
    # Current system assessment
    print("\n🔍 YOUR CURRENT SYSTEM ASSESSMENT:")
    print("=" * 40)
    
    try:
        import torch
        import psutil
        
        # Memory
        ram_gb = psutil.virtual_memory().total / (1024**3)
        print(f"💾 RAM: {ram_gb:.1f}GB")
        
        if ram_gb >= 32:
            print("   ✅ Excellent for training")
        elif ram_gb >= 16:
            print("   ✅ Good for development")
        elif ram_gb >= 8:
            print("   ⚠️  Minimum - consider upgrade")
        else:
            print("   ❌ Insufficient for training")
        
        # GPU
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
            print(f"🎮 GPU: {gpu_name} ({gpu_memory:.1f}GB)")
            
            if gpu_memory >= 20:
                print("   ✅ Excellent for training large models")
            elif gpu_memory >= 12:
                print("   ✅ Good for training with quantization")
            elif gpu_memory >= 8:
                print("   ⚠️  Sufficient for inference, limited training")
            else:
                print("   ❌ May need significant optimization")
        else:
            print("🎮 GPU: None detected")
            print("   ❌ CPU-only training will be very slow")
        
        # CPU
        cpu_count = psutil.cpu_count()
        print(f"🔧 CPU: {cpu_count} cores")
        
        if cpu_count >= 8:
            print("   ✅ Excellent for data processing")
        elif cpu_count >= 4:
            print("   ✅ Good for most tasks")
        else:
            print("   ⚠️  May bottleneck data loading")
            
    except Exception as e:
        print(f"❌ Could not assess system: {e}")

# Display hardware recommendations
print_hardware_recommendations()

## 8. Troubleshooting Common Issues

In [None]:
def print_troubleshooting_guide():
    """Print comprehensive troubleshooting guide"""
    
    troubleshooting = """
    🔧 TROUBLESHOOTING GUIDE
    =========================
    
    🚨 COMMON ISSUES AND SOLUTIONS:
    
    1. CUDA OUT OF MEMORY (OOM)
       Symptoms: RuntimeError: CUDA out of memory
       Solutions:
       ✅ Reduce batch size: config.BATCH_SIZE = 1
       ✅ Enable quantization: USE_QUANTIZATION = True
       ✅ Use gradient checkpointing: GRADIENT_CHECKPOINTING = True
       ✅ Clear GPU cache: torch.cuda.empty_cache()
       ✅ Use CPU offloading: device_map="auto"
    
    2. MODEL DOWNLOAD FAILURES
       Symptoms: HTTPError, Connection timeout
       Solutions:
       ✅ Check internet connection
       ✅ Verify HuggingFace token: HF_TOKEN in .env
       ✅ Try different mirror: export HF_ENDPOINT=https://hf-mirror.com
       ✅ Download manually and specify local path
       ✅ Use smaller model variant first
    
    3. API AUTHENTICATION ERRORS
       Symptoms: 401 Unauthorized, Invalid API key
       Solutions:
       ✅ Verify API keys in .env file
       ✅ Check API key permissions and quotas
       ✅ Ensure .env is loaded: load_dotenv()
       ✅ Test API keys independently
       ✅ Use fallback methods when API unavailable
    
    4. DEPENDENCY CONFLICTS
       Symptoms: ImportError, Version conflicts
       Solutions:
       ✅ Create fresh virtual environment
       ✅ Use exact versions from requirements.txt
       ✅ Update pip: pip install --upgrade pip
       ✅ Install packages individually to isolate issues
       ✅ Check Python version compatibility
    
    5. SLOW TRAINING/INFERENCE
       Symptoms: Very slow progress, high CPU usage
       Solutions:
       ✅ Verify GPU is being used: torch.cuda.is_available()
       ✅ Enable mixed precision: use_amp=True
       ✅ Optimize dataloader: num_workers=4, pin_memory=True
       ✅ Use compiled models: torch.compile() (PyTorch 2.0+)
       ✅ Profile code to identify bottlenecks
    
    6. EVALUATION ERRORS
       Symptoms: ROUGE/BLEU calculation failures
       Solutions:
       ✅ Download NLTK data: nltk.download('punkt')
       ✅ Handle empty strings in metrics
       ✅ Use fallback evaluation when metrics fail
       ✅ Check text encoding issues
       ✅ Validate input format for evaluation functions
    
    7. LANGCHAIN/LANGGRAPH ISSUES
       Symptoms: Import errors, workflow failures
       Solutions:
       ✅ Update to latest versions: pip install --upgrade langchain langgraph
       ✅ Check compatibility matrix
       ✅ Use mock LLMs for testing without API keys
       ✅ Simplify workflows to isolate issues
       ✅ Check langchain community examples
    
    🔍 DEBUGGING TECHNIQUES:
    
    1. ENABLE VERBOSE LOGGING:
       ```python
       import logging
       logging.basicConfig(level=logging.DEBUG)
       ```
    
    2. MEMORY MONITORING:
       ```python
       import torch
       print(f"GPU Memory: {torch.cuda.memory_allocated()/1e9:.2f}GB")
       ```
    
    3. GRADUAL TESTING:
       - Start with minimal examples
       - Add complexity step by step
       - Test each component independently
    
    4. ENVIRONMENT ISOLATION:
       - Use virtual environments
       - Document working configurations
       - Keep backup of working setups
    
    📞 GETTING HELP:
    
    1. Check GitHub Issues: https://github.com/NTDXYG/COTTON/issues
    2. LangChain Community: https://github.com/langchain-ai/langchain/discussions
    3. HuggingFace Forums: https://discuss.huggingface.co/
    4. Stack Overflow: Use tags [pytorch], [langchain], [transformers]
    5. Discord Communities: HuggingFace, LangChain
    
    🎯 PREVENTION TIPS:
    
    - Always use virtual environments
    - Pin dependency versions in requirements.txt
    - Test with small datasets first
    - Monitor system resources during execution
    - Keep backups of working configurations
    - Document successful setups for team sharing
    """
    
    print(troubleshooting)

# Display troubleshooting guide
print_troubleshooting_guide()

## 9. Final Setup Verification

In [None]:
def final_setup_check():
    """Perform final comprehensive setup verification"""
    
    print("🎯 FINAL SETUP VERIFICATION")
    print("=" * 50)
    
    checks = {
        "Python Version": False,
        "Core Dependencies": False,
        "GPU Access": False,
        "API Configuration": False,
        "Disk Space": False,
        "Memory": False
    }
    
    try:
        # Python version check
        if sys.version_info >= (3, 8):
            checks["Python Version"] = True
            print("✅ Python version compatible")
        else:
            print("❌ Python version too old")
        
        # Core dependencies check
        required = ["torch", "transformers", "langchain", "pandas", "numpy"]
        missing = []
        for pkg in required:
            try:
                __import__(pkg)
            except ImportError:
                missing.append(pkg)
        
        if not missing:
            checks["Core Dependencies"] = True
            print("✅ All core dependencies available")
        else:
            print(f"❌ Missing dependencies: {missing}")
        
        # GPU check
        try:
            import torch
            if torch.cuda.is_available():
                checks["GPU Access"] = True
                print("✅ GPU access confirmed")
            else:
                print("⚠️  No GPU access (CPU fallback available)")
                checks["GPU Access"] = "partial"
        except:
            print("❌ Cannot check GPU status")
        
        # API configuration check
        api_keys_found = 0
        if os.getenv("OPENAI_API_KEY"):
            api_keys_found += 1
        if os.getenv("ANTHROPIC_API_KEY"):
            api_keys_found += 1
        if os.getenv("HF_TOKEN"):
            api_keys_found += 1
        
        if api_keys_found >= 1:
            checks["API Configuration"] = True
            print(f"✅ {api_keys_found}/3 API keys configured")
        else:
            print("⚠️  No API keys found (fallback methods available)")
            checks["API Configuration"] = "partial"
        
        # Disk space check
        try:
            free_space = shutil.disk_usage(".").free // (1024**3)
            if free_space >= 20:
                checks["Disk Space"] = True
                print(f"✅ Sufficient disk space ({free_space}GB)")
            else:
                print(f"⚠️  Low disk space ({free_space}GB)")
        except:
            print("❌ Cannot check disk space")
        
        # Memory check
        try:
            memory_gb = psutil.virtual_memory().total / (1024**3)
            if memory_gb >= 8:
                checks["Memory"] = True
                print(f"✅ Sufficient RAM ({memory_gb:.1f}GB)")
            else:
                print(f"⚠️  Low RAM ({memory_gb:.1f}GB)")
        except:
            print("❌ Cannot check memory")
        
    except Exception as e:
        print(f"❌ Error during setup check: {e}")
    
    # Summary
    print("\n" + "=" * 50)
    passed_checks = sum(1 for v in checks.values() if v is True)
    partial_checks = sum(1 for v in checks.values() if v == "partial")
    total_checks = len(checks)
    
    print(f"📊 Setup Status: {passed_checks}/{total_checks} checks passed")
    if partial_checks > 0:
        print(f"⚠️  {partial_checks} checks partially satisfied")
    
    if passed_checks >= 4:
        print("\n🎉 READY TO RUN COTTON!")
        print("   You can proceed with the main implementation notebook.")
        print("   Start with: 1.6.2.1_COTTON_Implementation.ipynb")
    elif passed_checks + partial_checks >= 4:
        print("\n⚠️  PARTIALLY READY")
        print("   You can run COTTON with limitations.")
        print("   Consider addressing warnings for full functionality.")
    else:
        print("\n❌ SETUP INCOMPLETE")
        print("   Please resolve the failed checks before proceeding.")
        print("   Refer to the troubleshooting guide above.")
    
    return checks

# Run final verification
setup_status = final_setup_check()

# Save setup report
import json
with open('setup_report.json', 'w') as f:
    json.dump({
        'timestamp': '2024-01-01 12:00:00',  # Would use actual timestamp
        'checks': setup_status,
        'python_version': f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
        'platform': platform.system()
    }, f, indent=2)

print("\n📁 Setup report saved to 'setup_report.json'")