### Which Python Packages are Included?

In [1]:
import requests
from requests.exceptions import RequestException
import random
import os
import logging
import re
import ast
import time
import csv
import json
import tempfile
import subprocess
from datetime import datetime
from tqdm import tqdm
from datetime import datetime
from typing import Set, List, Tuple, Dict, Any, Optional

In [2]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("repo_analysis.log"),
        logging.StreamHandler()
    ]
)

In [3]:
# Constants
GITHUB_API_URL = "https://api.github.com"
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36"
]
GITHUB_TOKEN = ""
#os.environ.get("GITHUB_TOKEN")
DATA_FILE = "library_usage.csv"

### Utils

In [4]:
# Global variables to track rate limit status
remaining_requests = 3000  # Authenticated GitHub API limit (increased from 60)
rate_limit_reset_time = 0
last_rate_check = 0
RATE_LIMIT_WAIT = True  # Set to False to exit instead of waiting for rate limit reset
RETRY_DELAY = 1  # Reduced from 2

def get_headers() -> Dict[str, str]:
    """Get headers for API requests with random user agent and authentication if available."""
    headers = {
        "User-Agent": random.choice(USER_AGENTS),
        "Accept": "application/vnd.github.v3+json"
    }
    if GITHUB_TOKEN:
        headers["Authorization"] = f"token {GITHUB_TOKEN}"
    return headers

class RateLimitExceeded(Exception):
    """Exception raised when GitHub API rate limit is exceeded."""
    pass

def check_rate_limit() -> Tuple[int, int]:
    """
    Check GitHub API rate limit status.
    
    Returns:
        Tuple of (remaining requests, reset time in seconds)
    """
    global remaining_requests, rate_limit_reset_time, last_rate_check
    
    # Only check every 30 seconds to avoid wasting requests (increased from 10)
    current_time = time.time()
    if current_time - last_rate_check < 30:
        return remaining_requests, rate_limit_reset_time
    
    try:
        response = requests.get(f"{GITHUB_API_URL}/rate_limit", headers=get_headers())
        data = response.json()
        
        if 'resources' in data and 'core' in data['resources']:
            remaining_requests = data['resources']['core']['remaining']
            rate_limit_reset_time = data['resources']['core']['reset']
            last_rate_check = current_time
            
            logging.info(f"Rate limit status: {remaining_requests} requests remaining, " +
                        f"resets in {rate_limit_reset_time - current_time:.1f} seconds")
            
            return remaining_requests, rate_limit_reset_time
    except Exception as e:
        logging.warning(f"Failed to check rate limit: {e}")
    
    return remaining_requests, rate_limit_reset_time

def wait_for_rate_limit() -> bool:
    """
    Wait for rate limit to reset if needed.
    
    Returns:
        True if we waited and rate limit should be reset, False if program should exit
    """
    remaining, reset_time = check_rate_limit()
    
    # More aggressive approach - only wait when we have very few requests left
    if remaining > 1:  # Reduced buffer from 5 to 1
        return True
    
    wait_time = max(0, reset_time - time.time())
    
    if wait_time > 300 and not RATE_LIMIT_WAIT:  # More than 5 minutes wait
        logging.error(f"Rate limit exceeded. Would need to wait {wait_time/60:.1f} minutes. Exiting.")
        return False
    
    logging.warning(f"Rate limit almost reached. Waiting {wait_time:.1f} seconds for reset...")
    
    # Print countdown every 60 seconds (increased from 30)
    while wait_time > 0:
        time.sleep(min(60, wait_time))
        wait_time = max(0, reset_time - time.time())
        if wait_time > 0:
            logging.info(f"Still waiting for rate limit reset: {wait_time:.1f} seconds remaining")
    
    # Reset our tracking variables
    global remaining_requests, last_rate_check
    remaining_requests = 5000  # Default authenticated GitHub API limit (increased from 60)
    last_rate_check = 0
    
    logging.info("Rate limit should be reset now. Resuming operations.")
    return True

def make_github_request(url: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """
    Make a request to GitHub API with rate limit handling but no retries.
    
    Args:
        url: The API URL to request
        params: Optional query parameters
        
    Returns:
        Parsed JSON response
        
    Raises:
        RateLimitExceeded: If rate limit is hit and waiting is not possible
        RequestException: For other request errors
    """
    global remaining_requests, rate_limit_reset_time
    
    # Only check rate limit if we're getting close to the limit
    if remaining_requests <= 100:  # Only check when getting low
        if not wait_for_rate_limit():
            raise RateLimitExceeded("Rate limit exceeded and waiting is disabled")
    
    try:
        response = requests.get(url, headers=get_headers(), params=params)
        
        # Update rate limit info from response headers
        if 'X-RateLimit-Remaining' in response.headers:
            remaining_requests = int(response.headers['X-RateLimit-Remaining'])
            rate_limit_reset_time = int(response.headers['X-RateLimit-Reset'])
        
        # Handle rate limiting - no retry, just report the error
        if response.status_code == 403 and remaining_requests == 0:
            logging.error("Rate limit exceeded")
            raise RateLimitExceeded("Rate limit exceeded")
        
        # Special handling for common error codes
        if response.status_code == 404:
            logging.warning(f"Resource not found: {url}")
        elif response.status_code == 451:
            logging.warning(f"Resource unavailable for legal reasons (451): {url}")
        
        response.raise_for_status()
        return response.json()
    
    except requests.exceptions.HTTPError as e:
        logging.warning(f"HTTP error for {url}: {e}")
        raise
    except json.JSONDecodeError as e:
        logging.warning(f"JSON decode error for {url}: {e}")
        raise
    except RequestException as e:
        logging.warning(f"Request error for {url}: {e}")
        raise

### Get Random Python Repos

In [5]:
def get_random_python_repos(n: int = 10, min_stars: int = 0, max_per_page: int = None) -> List[Tuple[str, str, str]]:
    """
    Get random Python repositories from GitHub using the repositories endpoint with random IDs.
    Uses the utility functions for rate limiting and request handling.
    
    Args:
        n: Number of repositories to fetch
        min_stars: Minimum number of stars for repositories
        max_per_page: Maximum number of repositories to consider from each page (None = all)
        
    Returns:
        List of tuples (repo_name, repo_url, last_updated)
    """
    results = []
    
    while len(results) < n:
        try:
            # Generate a random repository ID in a reasonable range
            random_id = random.randint(1, 700_000_000)
            
            # Fetch repositories starting from random ID
            url = f"{GITHUB_API_URL}/repositories?since={random_id}"
            
            try:
                response = make_github_request(url)
                repos = response
            except RateLimitExceeded:
                logging.warning("Rate limit exceeded while fetching random repositories. Waiting...")
                if not wait_for_rate_limit():
                    logging.error("Could not wait for rate limit reset. Exiting.")
                    break
                continue
            
            # Limit number of repos to consider from this page if specified
            if max_per_page:
                repos = repos[:max_per_page]
            
            # Process each repository
            for repo in repos:
                # Get repository name
                repo_name = repo.get("full_name")
                if not repo_name:
                    continue
                
                try:
                    # Check if it's a Python repository and meets star criteria
                    repo_url = f"{GITHUB_API_URL}/repos/{repo_name}"
                    repo_data = make_github_request(repo_url)
                    
                    # Check language and stars
                    if (repo_data.get("language") == "Python" and 
                            repo_data.get("stargazers_count", 0) >= min_stars):
                        
                        results.append((
                            repo_name,
                            repo_data.get("html_url", ""),
                            repo_data.get("updated_at", "")
                        ))
                        logging.info(f"Found: {repo_name} (Stars: {repo_data.get('stargazers_count')})")
                        
                        # Check if we have enough results
                        if len(results) >= n:
                            break
                
                except RateLimitExceeded:
                    logging.warning("Rate limit exceeded while fetching repo details. Waiting...")
                    if not wait_for_rate_limit():
                        logging.error("Could not wait for rate limit reset. Exiting.")
                        break
                except Exception as e:
                    logging.warning(f"Error processing repository {repo_name}: {e}")
                    continue
                
        except Exception as e:
            logging.error(f"Error in repository fetch: {e}")
            # Exponential backoff on errors
            time.sleep(10)
    
    logging.info(f"Successfully collected {len(results)} random Python repositories")
    return results[:n]

### Get Python Files and Analyze Files per Repo.

In [6]:
def get_default_branch(repo_name: str) -> Optional[str]:
    """
    Get the default branch (main or master) for a repository.
    Returns None if the branch cannot be determined.
    """
    try:
        if not wait_for_rate_limit():
            raise RateLimitExceeded("Rate limit exceeded while getting default branch")
            
        response = make_github_request(f"{GITHUB_API_URL}/repos/{repo_name}")
        default_branch = response.get("default_branch", "main")
        return default_branch
    except Exception as e:
        logging.warning(f"Couldn't determine default branch for {repo_name}: {e}")
        # Return None to indicate we couldn't get the branch
        return None

def get_python_files(repo_name: str, max_files: int = 10, seed: int = 42) -> List[str]:
    """
    Get a list of Python files in the repository.
    
    Args:
        repo_name: Full name of the repository (owner/repo)
        max_files: Maximum number of files to return
        seed: Random seed for reproducible file selection
        
    Returns:
        List of file paths
    """
    try:
        if not wait_for_rate_limit():
            raise RateLimitExceeded("Rate limit exceeded while getting Python files")
        
        default_branch = get_default_branch(repo_name)
        if default_branch is None:
            logging.warning(f"Skipping repo {repo_name} - couldn't determine default branch")
            return []
            
        url = f"{GITHUB_API_URL}/repos/{repo_name}/git/trees/{default_branch}?recursive=1"
        
        try:
            response = make_github_request(url)
            
            if 'tree' in response:
                python_files = [file['path'] for file in response['tree'] 
                        if file['path'].endswith('.py') and file['type'] == 'blob']
                
                # Set seed and shuffle to ensure reproducible file selection
                random.seed(seed)
                random.shuffle(python_files)
                python_files = python_files[:max_files]
                
                return python_files
        except RateLimitExceeded:
            logging.warning(f"Rate limit exceeded while getting files for {repo_name}")
            # Try fallback method
        
        # Fallback method: try to get files from the repo contents
        url = f"{GITHUB_API_URL}/repos/{repo_name}/contents"
        response = make_github_request(url)
        
        python_files = []
        for item in response:
            if item['type'] == 'file' and item['name'].endswith('.py'):
                python_files.append(item['path'])
            elif item['type'] == 'dir':
                # Try to look in top-level directories
                try:
                    dir_url = f"{GITHUB_API_URL}/repos/{repo_name}/contents/{item['path']}"
                    dir_response = make_github_request(dir_url)
                    
                    for dir_item in dir_response:
                        if dir_item['type'] == 'file' and dir_item['name'].endswith('.py'):
                            python_files.append(dir_item['path'])
                except Exception:
                    pass
                    
        # Set seed and shuffle files from fallback method as well
        random.seed(seed)
        random.shuffle(python_files)
        python_files = python_files[:max_files]
        
        return python_files
                
    except Exception as e:
        logging.error(f"Error getting Python files for {repo_name}: {e}")
        return []

def extract_imports(file_content: str) -> Set[str]:
    """
    Extract all imported module names from Python code using the ast module.
    
    Args:
        file_content: Python code content as a string
        
    Returns:
        Set of top-level module names that are imported
    """
    try:
        # Parse the code into an AST
        tree = ast.parse(file_content)
        
        imports = set()
        
        # Walk through the AST nodes
        for node in ast.walk(tree):
            # Handle "import module" or "import module.submodule" statements
            if isinstance(node, ast.Import):
                for name in node.names:
                    # Extract the top-level module name (before any dots)
                    module_name = name.name.split('.', 1)[0]
                    imports.add(module_name)
            
            # Handle "from module import name" statements
            elif isinstance(node, ast.ImportFrom):
                if node.module:  # In case of "from . import name"
                    # Extract the top-level module name (before any dots)
                    module_name = node.module.split('.', 1)[0]
                    imports.add(module_name)
        
        return imports
    
    except SyntaxError:
        # Fallback to regex for files with syntax errors
        fallback_imports = set()
        lines = file_content.split("\n")
        for line in lines:
            # Match import statements
            import_match = re.match(r'\s*import\s+([\w\s,]+)(?:\s+as\s+\w+)?', line)
            if import_match:
                modules = import_match.group(1).split(',')
                for module in modules:
                    module_name = module.strip().split('.', 1)[0].split()[0]
                    fallback_imports.add(module_name)
                    
            # Match from statements
            from_match = re.match(r'\s*from\s+(\w+)', line)
            if from_match:
                fallback_imports.add(from_match.group(1))
                
        return fallback_imports

def extract_imports(content: str) -> Set[str]:
    """
    Extract imported libraries from Python content using AST parsing
    and regex as a fallback for edge cases.
    """
    libraries = set()
    
    # AST parsing for reliable import extraction
    try:
        tree = ast.parse(content)
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for name in node.names:
                    libraries.add(name.name.split('.')[0])
            elif isinstance(node, ast.ImportFrom):
                if node.module:
                    libraries.add(node.module.split('.')[0])
    except SyntaxError:
        # Fallback to regex for files with syntax errors
        import_pattern = r'^import\s+([\w\.]+)|^from\s+([\w\.]+)\s+import'
        for match in re.finditer(import_pattern, content, re.MULTILINE):
            lib = match.group(1) or match.group(2)
            if lib:
                libraries.add(lib.split('.')[0])
                
    return libraries

def analyze_repo(repo_info: Tuple[str, str, str]) -> List[Tuple[str, str, str, str, str]]:
    """
    Analyze a GitHub repository for Python library usage by cloning it once.
    
    Args:
        repo_info: Tuple of (repo_name, repo_url, last_updated)
        
    Returns:
        List of tuples (library_name, repo_name, file_path, fetch_date, last_updated)
    """
    repo_name, repo_url, last_updated = repo_info
    logging.info(f"Processing repo: {repo_name}")
    
    repo_results = []
    fetch_date = datetime.utcnow().isoformat()
    
    # Create a temporary directory for the cloned repo
    with tempfile.TemporaryDirectory() as temp_dir:
        try:
            # Clone the repository with minimal depth
            clone_cmd = f"git clone --depth 1 https://github.com/{repo_name}.git {temp_dir}"
            subprocess.run(clone_cmd, shell=True, check=True, capture_output=True)
            
            # Find Python files in the repository
            python_files = []
            for root, _, files in os.walk(temp_dir):
                for file in files:
                    if file.endswith('.py'):
                        relative_path = os.path.relpath(os.path.join(root, file), temp_dir)
                        python_files.append(relative_path)
                
                # Limit the number of files to analyze if there are too many
                if len(python_files) >= 10:
                    python_files = python_files[:10]
                    break
            
            # Process each Python file
            for file_path in python_files:
                try:
                    full_path = os.path.join(temp_dir, file_path)
                    with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
                        content = f.read()
                    
                    imports = extract_imports(content)
                    
                    for library in imports:
                        repo_results.append((
                            library, 
                            repo_name, 
                            file_path, 
                            fetch_date, 
                            last_updated
                        ))
                except Exception as e:
                    logging.warning(f"Error processing file {file_path} in {repo_name}: {e}")
            
            return repo_results
        
        except Exception as e:
            logging.error(f"Failed to analyze repo {repo_name}: {e}")
            return []

def save_results(results: List[Tuple[str, str, str, str, str, bool]]) -> None:
    """Save analysis results to CSV file with proper error handling."""
    if not results:
        logging.warning("No results to save")
        return
    
    try:
        file_exists = os.path.exists(DATA_FILE)
        
        with open(DATA_FILE, "a", newline='') as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow([
                    "library_name", 
                    "repo", 
                    "file", 
                    "fetch_date", 
                    "last_updated" 
                ])
            writer.writerows(results)
        
        logging.info(f"Saved {len(results)} entries to {DATA_FILE}")
    
    except Exception as e:
        logging.error(f"Error saving results to CSV: {e}")
        
        # Emergency backup
        backup_file = f"backup_results_{int(time.time())}.json"
        with open(backup_file, "w") as f:
            json.dump(results, f)
        logging.info(f"Backup saved to {backup_file}")

In [7]:
def main():
    """Main function to orchestrate the GitHub repository analysis."""
    try:
        num_repos = 50  # Adjust as needed
        logging.info(f"Fetching {num_repos} repositories...")
        sampled_repos = get_random_python_repos(n=num_repos, min_stars=0)
        logging.info(f"Fetched {len(sampled_repos)} repositories for analysis")
        
        # Create/initialize the CSV file before processing
        file_exists = os.path.exists(DATA_FILE)
        if not file_exists:
            with open(DATA_FILE, "w", newline='') as f:
                writer = csv.writer(f)
                writer.writerow([
                    "library_name", 
                    "repo", 
                    "file", 
                    "fetch_date", 
                    "last_updated" 
                ])
            logging.info(f"Created new CSV file: {DATA_FILE}")
        
        total_imports = 0
        
        # Process repositories sequentially with progress bar
        for repo in tqdm(sampled_repos, desc="Processing Repositories", unit="repo"):
            try:
                repo_name = repo[0]
                tqdm.write(f"Processing {repo_name}...")
                
                repo_results = analyze_repo(repo)
                
                # Write results to CSV immediately after each repo
                if repo_results:
                    with open(DATA_FILE, "a", newline='') as f:
                        writer = csv.writer(f)
                        writer.writerows(repo_results)
                    
                    total_imports += len(repo_results)
                    tqdm.write(f"Completed {repo_name}: saved {len(repo_results)} library imports (total: {total_imports})")
                else:
                    tqdm.write(f"Completed {repo_name}: no library imports found")
                    
            except Exception as e:
                tqdm.write(f"Error processing {repo[0]}: {e}")
                error_file = f"error_{repo[0].replace('/', '_')}_{int(time.time())}.txt"
                with open(error_file, "w") as f:
                    f.write(f"Error processing {repo[0]}: {str(e)}\n")
                    f.write(traceback.format_exc())
        
        logging.info(f"Analysis complete. Analyzed {total_imports} library imports across {len(sampled_repos)} repositories")
        logging.info(f"Results saved to {DATA_FILE}")
        
    except Exception as e:
        logging.error(f"Fatal error in main process: {str(e)}")
        traceback.print_exc()
        sys.exit(1)
        
    return 0

if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)

2025-03-12 20:12:35,226 - INFO - Fetching 50 repositories...
2025-03-12 20:12:39,031 - INFO - Found: shafiurrahman/heroku-salary-prediction (Stars: 0)
2025-03-12 20:12:39,774 - INFO - Found: David-coder02/AirBnB_clone (Stars: 0)
2025-03-12 20:12:52,649 - INFO - Found: matt5797/rawsocket_sniffer (Stars: 0)
2025-03-12 20:13:07,092 - INFO - Found: Ocupe/Projectors (Stars: 210)
2025-03-12 20:13:18,382 - INFO - Found: Hhacel/Python-Pong (Stars: 0)
2025-03-12 20:13:19,129 - INFO - Found: rthorst/TwitterSentiment (Stars: 7)
2025-03-12 20:13:19,399 - INFO - Found: ctberthiaume/seaflowpy (Stars: 0)
2025-03-12 20:13:24,171 - INFO - Found: adm116/Scripts (Stars: 0)
2025-03-12 20:13:33,206 - INFO - Found: shayanshakiba/tgcf (Stars: 0)
2025-03-12 20:13:33,691 - INFO - Found: ghayward/the_Lord_always_delivers_functions (Stars: 1)
2025-03-12 20:13:37,115 - INFO - Found: ingridaburto/tienda1 (Stars: 0)
2025-03-12 20:13:39,143 - INFO - Found: mmiezianko/computational-intelligence-proj (Stars: 0)
2025-0

Processing shafiurrahman/heroku-salary-prediction...


Processing Repositories:   2%|▎                | 1/50 [00:01<01:26,  1.76s/repo]2025-03-12 20:16:40,754 - INFO - Processing repo: David-coder02/AirBnB_clone


Completed shafiurrahman/heroku-salary-prediction: saved 7 library imports (total: 7)
Processing David-coder02/AirBnB_clone...


Processing Repositories:   4%|▋                | 2/50 [00:02<01:04,  1.34s/repo]2025-03-12 20:16:41,762 - INFO - Processing repo: matt5797/rawsocket_sniffer


Completed David-coder02/AirBnB_clone: saved 12 library imports (total: 19)
Processing matt5797/rawsocket_sniffer...


Processing Repositories:   6%|█                | 3/50 [00:03<00:52,  1.11s/repo]2025-03-12 20:16:42,592 - INFO - Processing repo: Ocupe/Projectors


Completed matt5797/rawsocket_sniffer: saved 15 library imports (total: 34)
Processing Ocupe/Projectors...


Processing Repositories:   8%|█▎               | 4/50 [00:04<00:46,  1.00s/repo]2025-03-12 20:16:43,432 - INFO - Processing repo: Hhacel/Python-Pong


Completed Ocupe/Projectors: saved 25 library imports (total: 59)
Processing Hhacel/Python-Pong...


Processing Repositories:  10%|█▋               | 5/50 [00:05<00:40,  1.12repo/s]2025-03-12 20:16:44,124 - INFO - Processing repo: rthorst/TwitterSentiment


Completed Hhacel/Python-Pong: saved 9 library imports (total: 68)
Processing rthorst/TwitterSentiment...


Processing Repositories:  12%|██               | 6/50 [00:08<01:16,  1.74s/repo]2025-03-12 20:16:47,505 - INFO - Processing repo: ctberthiaume/seaflowpy


Completed rthorst/TwitterSentiment: saved 79 library imports (total: 147)
Processing ctberthiaume/seaflowpy...


Processing Repositories:  14%|██▍              | 7/50 [00:09<01:02,  1.46s/repo]2025-03-12 20:16:48,402 - INFO - Processing repo: adm116/Scripts


Completed ctberthiaume/seaflowpy: saved 44 library imports (total: 191)
Processing adm116/Scripts...


Processing Repositories:  16%|██▋              | 8/50 [00:10<00:51,  1.23s/repo]2025-03-12 20:16:49,142 - INFO - Processing repo: shayanshakiba/tgcf


Completed adm116/Scripts: saved 1 library imports (total: 192)
Processing shayanshakiba/tgcf...


Processing Repositories:  18%|███              | 9/50 [00:11<00:46,  1.13s/repo]2025-03-12 20:16:50,032 - INFO - Processing repo: ghayward/the_Lord_always_delivers_functions


Completed shayanshakiba/tgcf: saved 45 library imports (total: 237)
Processing ghayward/the_Lord_always_delivers_functions...


Processing Repositories:  20%|███▏            | 10/50 [00:11<00:39,  1.02repo/s]2025-03-12 20:16:50,704 - INFO - Processing repo: ingridaburto/tienda1


Completed ghayward/the_Lord_always_delivers_functions: no library imports found
Processing ingridaburto/tienda1...


Processing Repositories:  22%|███▌            | 11/50 [00:12<00:35,  1.08repo/s]2025-03-12 20:16:51,485 - INFO - Processing repo: mmiezianko/computational-intelligence-proj


Completed ingridaburto/tienda1: saved 15 library imports (total: 252)
Processing mmiezianko/computational-intelligence-proj...


Processing Repositories:  24%|███▊            | 12/50 [00:13<00:31,  1.20repo/s]2025-03-12 20:16:52,118 - INFO - Processing repo: santiagosimonsantos/UVa


Completed mmiezianko/computational-intelligence-proj: saved 58 library imports (total: 310)
Processing santiagosimonsantos/UVa...


Processing Repositories:  26%|████▏           | 13/50 [00:13<00:30,  1.23repo/s]2025-03-12 20:16:52,887 - INFO - Processing repo: Darkhunter9/EBSD_CNN_Public


Completed santiagosimonsantos/UVa: saved 7 library imports (total: 317)
Processing Darkhunter9/EBSD_CNN_Public...


Processing Repositories:  28%|████▍           | 14/50 [00:14<00:27,  1.30repo/s]2025-03-12 20:16:53,549 - INFO - Processing repo: iremharnak/community_characters


Completed Darkhunter9/EBSD_CNN_Public: saved 64 library imports (total: 381)
Processing iremharnak/community_characters...


Processing Repositories:  30%|████▊           | 15/50 [00:15<00:25,  1.35repo/s]2025-03-12 20:16:54,231 - INFO - Processing repo: Anderton25/Gerador-de-Senha


Completed iremharnak/community_characters: saved 13 library imports (total: 394)
Processing Anderton25/Gerador-de-Senha...


Processing Repositories:  32%|█████           | 16/50 [00:15<00:24,  1.40repo/s]2025-03-12 20:16:54,876 - INFO - Processing repo: Mitchellpkt/matrixprofile


Completed Anderton25/Gerador-de-Senha: saved 1 library imports (total: 395)
Processing Mitchellpkt/matrixprofile...


Processing Repositories:  34%|█████▍          | 17/50 [00:16<00:26,  1.26repo/s]2025-03-12 20:16:55,850 - INFO - Processing repo: FLAMINGxFURY/idtest


Completed Mitchellpkt/matrixprofile: saved 33 library imports (total: 428)
Processing FLAMINGxFURY/idtest...


Processing Repositories:  36%|█████▊          | 18/50 [00:17<00:23,  1.36repo/s]2025-03-12 20:16:56,455 - INFO - Processing repo: rkania3/rahul-web-app


Completed FLAMINGxFURY/idtest: saved 2 library imports (total: 430)
Processing rkania3/rahul-web-app...


Processing Repositories:  38%|██████          | 19/50 [00:18<00:21,  1.45repo/s]2025-03-12 20:16:57,038 - INFO - Processing repo: emltoja/wstep_do_informatyki_i_programowania


Completed rkania3/rahul-web-app: saved 3 library imports (total: 433)
Processing emltoja/wstep_do_informatyki_i_programowania...


Processing Repositories:  40%|██████▍         | 20/50 [00:18<00:20,  1.50repo/s]2025-03-12 20:16:57,651 - INFO - Processing repo: TrellixVulnTeam/tecweb_ac04_9BBK


Completed emltoja/wstep_do_informatyki_i_programowania: saved 16 library imports (total: 449)
Processing TrellixVulnTeam/tecweb_ac04_9BBK...


Processing Repositories:  42%|██████▋         | 21/50 [00:20<00:24,  1.16repo/s]2025-03-12 20:16:58,965 - INFO - Processing repo: chenke91/mysql-compare


Completed TrellixVulnTeam/tecweb_ac04_9BBK: saved 9 library imports (total: 458)
Processing chenke91/mysql-compare...


Processing Repositories:  44%|███████         | 22/50 [00:20<00:22,  1.26repo/s]2025-03-12 20:16:59,606 - INFO - Processing repo: sahernandezr/lab-data-vikings


Completed chenke91/mysql-compare: saved 3 library imports (total: 461)
Processing sahernandezr/lab-data-vikings...


Processing Repositories:  46%|███████▎        | 23/50 [00:21<00:20,  1.35repo/s]2025-03-12 20:17:00,225 - INFO - Processing repo: husniddin123/list_indexing_homework


Completed sahernandezr/lab-data-vikings: saved 12 library imports (total: 473)
Processing husniddin123/list_indexing_homework...


Processing Repositories:  48%|███████▋        | 24/50 [00:21<00:18,  1.41repo/s]2025-03-12 20:17:00,852 - INFO - Processing repo: artunandac/Little-Edd-Assistant


Completed husniddin123/list_indexing_homework: saved 1 library imports (total: 474)
Processing artunandac/Little-Edd-Assistant...


Processing Repositories:  50%|████████        | 25/50 [00:22<00:16,  1.52repo/s]2025-03-12 20:17:01,389 - INFO - Processing repo: wakunezu/eyecatch_generator


Completed artunandac/Little-Edd-Assistant: saved 9 library imports (total: 483)
Processing wakunezu/eyecatch_generator...


Processing Repositories:  52%|████████▎       | 26/50 [00:24<00:23,  1.03repo/s]2025-03-12 20:17:03,093 - INFO - Processing repo: mrprimle/EmoProject


Completed wakunezu/eyecatch_generator: saved 3 library imports (total: 486)
Processing mrprimle/EmoProject...


Processing Repositories:  54%|████████▋       | 27/50 [00:29<00:51,  2.24s/repo]2025-03-12 20:17:08,296 - INFO - Processing repo: leeshinyook/RectangleCropper


Completed mrprimle/EmoProject: saved 7 library imports (total: 493)
Processing leeshinyook/RectangleCropper...


Processing Repositories:  56%|████████▉       | 28/50 [00:30<00:42,  1.94s/repo]2025-03-12 20:17:09,531 - INFO - Processing repo: Aditya-Bhargav-dev/Ds-Algo


Completed leeshinyook/RectangleCropper: saved 5 library imports (total: 498)
Processing Aditya-Bhargav-dev/Ds-Algo...


Processing Repositories:  58%|█████████▎      | 29/50 [00:31<00:32,  1.54s/repo]2025-03-12 20:17:10,144 - INFO - Processing repo: tarekrahman3/Instagram_Hashtag_Analysis


Completed Aditya-Bhargav-dev/Ds-Algo: saved 4 library imports (total: 502)
Processing tarekrahman3/Instagram_Hashtag_Analysis...


Processing Repositories:  60%|█████████▌      | 30/50 [00:31<00:25,  1.28s/repo]2025-03-12 20:17:10,826 - INFO - Processing repo: PPatrickGU/Python-project-FlappyMM


Completed tarekrahman3/Instagram_Hashtag_Analysis: saved 9 library imports (total: 511)
Processing PPatrickGU/Python-project-FlappyMM...


Processing Repositories:  62%|█████████▉      | 31/50 [00:33<00:23,  1.24s/repo]2025-03-12 20:17:11,964 - INFO - Processing repo: 505177793/shiyanlou-code


Completed PPatrickGU/Python-project-FlappyMM: saved 3 library imports (total: 514)
Processing 505177793/shiyanlou-code...


Processing Repositories:  64%|██████████▏     | 32/50 [00:33<00:18,  1.05s/repo]2025-03-12 20:17:12,571 - INFO - Processing repo: NMO732/Projects


Completed 505177793/shiyanlou-code: no library imports found
Processing NMO732/Projects...


Processing Repositories:  66%|██████████▌     | 33/50 [00:34<00:15,  1.10repo/s]2025-03-12 20:17:13,160 - INFO - Processing repo: timolesterhuis/python-plantuml


Completed NMO732/Projects: saved 14 library imports (total: 528)
Processing timolesterhuis/python-plantuml...


Processing Repositories:  68%|██████████▉     | 34/50 [00:35<00:14,  1.14repo/s]2025-03-12 20:17:13,964 - INFO - Processing repo: findryDev/SQLitePython


Completed timolesterhuis/python-plantuml: saved 16 library imports (total: 544)
Processing findryDev/SQLitePython...


Processing Repositories:  70%|███████████▏    | 35/50 [00:35<00:12,  1.22repo/s]2025-03-12 20:17:14,643 - INFO - Processing repo: R4V88/Simple-filmweb-app-in-Python


Completed findryDev/SQLitePython: saved 3 library imports (total: 547)
Processing R4V88/Simple-filmweb-app-in-Python...


Processing Repositories:  72%|███████████▌    | 36/50 [00:39<00:25,  1.81s/repo]2025-03-12 20:17:18,758 - INFO - Processing repo: cloudfellows/stuxnet-worm


Completed R4V88/Simple-filmweb-app-in-Python: saved 16 library imports (total: 563)
Processing cloudfellows/stuxnet-worm...


Processing Repositories:  74%|███████████▊    | 37/50 [00:41<00:24,  1.87s/repo]2025-03-12 20:17:20,779 - INFO - Processing repo: Salehbigdeli/raytracer


Completed cloudfellows/stuxnet-worm: saved 40 library imports (total: 603)
Processing Salehbigdeli/raytracer...


Processing Repositories:  76%|████████████▏   | 38/50 [00:42<00:18,  1.53s/repo]2025-03-12 20:17:21,500 - INFO - Processing repo: oi111/test_1


Completed Salehbigdeli/raytracer: saved 3 library imports (total: 606)
Processing oi111/test_1...


Processing Repositories:  78%|████████████▍   | 39/50 [00:45<00:20,  1.84s/repo]2025-03-12 20:17:24,069 - INFO - Processing repo: Edt12/Platform-Fighting-game-WIP


Completed oi111/test_1: saved 28 library imports (total: 634)
Processing Edt12/Platform-Fighting-game-WIP...


Processing Repositories:  80%|████████████▊   | 40/50 [00:45<00:14,  1.49s/repo]2025-03-12 20:17:24,747 - INFO - Processing repo: aishAgarwal04/Python-Binary-Search-2


Completed Edt12/Platform-Fighting-game-WIP: saved 4 library imports (total: 638)
Processing aishAgarwal04/Python-Binary-Search-2...


Processing Repositories:  82%|█████████████   | 41/50 [00:46<00:11,  1.23s/repo]2025-03-12 20:17:25,361 - INFO - Processing repo: PawQualityProducts/HeifER


Completed aishAgarwal04/Python-Binary-Search-2: no library imports found
Processing PawQualityProducts/HeifER...


Processing Repositories:  84%|█████████████▍  | 42/50 [00:48<00:12,  1.62s/repo]2025-03-12 20:17:27,897 - INFO - Processing repo: PRATIK-BOTHRA/c142


Completed PawQualityProducts/HeifER: saved 3 library imports (total: 641)
Processing PRATIK-BOTHRA/c142...


Processing Repositories:  86%|█████████████▊  | 43/50 [00:49<00:09,  1.36s/repo]2025-03-12 20:17:28,644 - INFO - Processing repo: DavideRebuffo/compiti_vacanze


Completed PRATIK-BOTHRA/c142: saved 7 library imports (total: 648)
Processing DavideRebuffo/compiti_vacanze...


Processing Repositories:  88%|██████████████  | 44/50 [00:50<00:06,  1.17s/repo]2025-03-12 20:17:29,363 - INFO - Processing repo: sskyu1/djangogram


Completed DavideRebuffo/compiti_vacanze: saved 2 library imports (total: 650)
Processing sskyu1/djangogram...


Processing Repositories:  90%|██████████████▍ | 45/50 [00:51<00:05,  1.02s/repo]2025-03-12 20:17:30,039 - INFO - Processing repo: jsfenfen/irs_527


Completed sskyu1/djangogram: saved 18 library imports (total: 668)
Processing jsfenfen/irs_527...


Processing Repositories:  92%|██████████████▋ | 46/50 [00:51<00:03,  1.11repo/s]2025-03-12 20:17:30,659 - INFO - Processing repo: Furkan9268/turtlebot_create


Completed jsfenfen/irs_527: saved 9 library imports (total: 677)
Processing Furkan9268/turtlebot_create...


Processing Repositories:  94%|███████████████ | 47/50 [00:52<00:02,  1.15repo/s]2025-03-12 20:17:31,450 - INFO - Processing repo: dz0ny/gp.recipe.node


Completed Furkan9268/turtlebot_create: saved 49 library imports (total: 726)
Processing dz0ny/gp.recipe.node...


Processing Repositories:  96%|███████████████▎| 48/50 [00:53<00:01,  1.24repo/s]2025-03-12 20:17:32,114 - INFO - Processing repo: MeGaPk/makerbot-gen5-api


Completed dz0ny/gp.recipe.node: saved 39 library imports (total: 765)
Processing MeGaPk/makerbot-gen5-api...


Processing Repositories:  98%|███████████████▋| 49/50 [00:53<00:00,  1.32repo/s]2025-03-12 20:17:32,750 - INFO - Processing repo: leafiy/sublime-less2css


Completed MeGaPk/makerbot-gen5-api: saved 43 library imports (total: 808)
Processing leafiy/sublime-less2css...


Processing Repositories: 100%|████████████████| 50/50 [00:54<00:00,  1.09s/repo]
2025-03-12 20:17:33,544 - INFO - Analysis complete. Analyzed 824 library imports across 50 repositories
2025-03-12 20:17:33,544 - INFO - Results saved to library_usage.csv


Completed leafiy/sublime-less2css: saved 16 library imports (total: 824)


NameError: name 'sys' is not defined