In [1]:
import requests
import time
import math

def estimate_python_repos_count():
    """
    Estimates the total number of public repositories on GitHub that use Python as the primary language
    using the GitHub Search API and sampling techniques.
    
    Returns:
        int: Estimated total number of Python repositories on GitHub
    """
    api_url = "https://api.github.com/search/repositories"
    
    headers = {
        "Accept": "application/vnd.github.v3+json",
        # "Authorization": "token YOUR_GITHUB_TOKEN"
    }
    
    params = {
        "q": "language:python",
        "per_page": 1
    }
    
    try:
        response = requests.get(api_url, headers=headers, params=params)
        response.raise_for_status()
        
        data = response.json()
        total_count = data.get("total_count", 0)
        
        print(f"Estimated total Python repositories on GitHub: {total_count:,}")
        
        return total_count
        
    except requests.exceptions.RequestException as e:
        print(f"Error accessing GitHub API: {e}")
        
        # If we hit rate limits, implement exponential backoff
        if response.status_code == 403 and 'rate limit exceeded' in response.text.lower():
            wait_time = int(response.headers.get('Retry-After', 60))
            print(f"Rate limit exceeded. Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            return estimate_python_repos_count()  # Retry after waiting
            
        return None
    
if __name__ == "__main__":
    print("Estimating the number of Python repositories on GitHub...")
    count = estimate_python_repos_count()
    
    if count is not None:
        print(f"There are approximately {count:,} repositories using Python as the primary language on GitHub.")
        
        # Optional: Add more context about the estimate
        print("\nNote: This estimate is based on GitHub's search API and may not be exact due to:")
        print("- API rate limits and sampling")
        print("- Repositories that use Python but have another primary language")
        print("- Private repositories not included in public search results")

Estimating the number of Python repositories on GitHub...
Estimated total Python repositories on GitHub: 18,374,006
There are approximately 18,374,006 repositories using Python as the primary language on GitHub.

Note: This estimate is based on GitHub's search API and may not be exact due to:
- API rate limits and sampling
- Repositories that use Python but have another primary language
- Private repositories not included in public search results
