In [1]:
import requests
import time
import csv
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import re
from datetime import datetime
from dotenv import load_dotenv
import os
from collections import Counter

Step 1 - Batch scrape GitHub URLs, save to `assets_with_github.csv` and `assets_without_github.csv`

In [2]:
def scrape_assets(max_pages=13):
    all_assets = []
    assets_with_github = []
    assets_without_github = []
    
    for page in range(max_pages):
        url = f"https://godotengine.org/asset-library/asset?max_results=500&page={page}&sort=name"
        print(f"Scraping page {page}...")
        
        try:
            response = requests.get(url)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            asset_items = soup.find_all('li', class_='asset-item')
            
            if not asset_items:
                print(f"No more assets found on page {page}. Stopping.")
                break
                
            print(f"Found {len(asset_items)} assets on page {page}")
            
            for item in asset_items:
                asset_data = {}
                
                title_elem = item.find('h4')
                if title_elem:
                    asset_data['title'] = title_elem.get_text(strip=True)
                
                link_elem = item.find('a', class_='asset-header')
                if link_elem and link_elem.get('href'):
                    href = link_elem['href']
                    asset_id = href.split('/')[-1]
                    asset_data['asset_id'] = asset_id
                    asset_data['asset_url'] = f"https://godotengine.org{href}"
                
                primary_tag = item.find('span', class_='label-primary')
                if primary_tag:
                    asset_data['category'] = primary_tag.get_text(strip=True)
                
                license_tag = item.find('span', class_='label-default')
                if license_tag:
                    asset_data['license'] = license_tag.get_text(strip=True)
                
                footer = item.find('div', class_='asset-footer')
                if footer:
                    author_link = footer.find('a')
                    if author_link:
                        author_text = author_link.get('title', '')
                        if 'Search assets by' in author_text:
                            author = author_text.replace('Search assets by', '').strip(" '\"")
                            asset_data['author'] = author
                
                img_elem = item.find('img', class_='media-object')
                github_url = None
                if img_elem and img_elem.get('src'):
                    icon_src = img_elem['src']
                    asset_data['icon_url'] = icon_src
                    
                    if 'github.com' in icon_src or ('githubusercontent.com' in icon_src and 'user-images.githubusercontent.com' not in icon_src):
                        # Handle github.com and raw.githubusercontent.com (but not user-images.githubusercontent.com)
                        if 'githubusercontent.com' in icon_src:
                            # For raw.githubusercontent.com/user/repo/branch/file
                            github_match = re.search(r'githubusercontent\.com/([^/]+/[^/]+)', icon_src)
                        else:
                            # For github.com URLs
                            github_match = re.search(r'github\.com/([^/]+/[^/]+)', icon_src)
                            
                        if github_match:
                            repo_path = github_match.group(1)
                            repo_path = repo_path.split('/blob/')[0].split('/raw/')[0].split('?')[0]
                            github_url = f"https://github.com/{repo_path}"
                            asset_data['github_url'] = github_url
                            print(f"Success: {asset_data.get('title', 'Unknown')}")
                        else:
                            print(f"GitHub found in icon but regex failed for '{asset_data.get('title', 'Unknown')}': {icon_src}")
                    else:
                        print(f"No GitHub in icon for '{asset_data.get('title', 'Unknown')}': {icon_src}")
                else:
                    print(f"No icon found for '{asset_data.get('title', 'Unknown')}'")
                          
                if asset_data:
                    all_assets.append(asset_data)
                    if github_url:
                        assets_with_github.append(asset_data)
                    else:
                        assets_without_github.append(asset_data)
            
            time.sleep(5)
            
        except Exception as e:
            print(f"Error scraping page {page}: {e}")
            break
    
    return assets_with_github, assets_without_github

print("Starting Godot Asset Library scraping...")
assets_with_github, assets_without_github = scrape_assets()

print(f"\n=== SAVING {len(assets_with_github)} ASSETS WITH GITHUB ===")
with open('assets_with_github.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['title', 'category', 'license', 'author', 'asset_url', 'github_url'])
    for asset in assets_with_github:
        writer.writerow([
            asset.get('title', ''),
            asset.get('category', ''),
            asset.get('license', ''),
            asset.get('author', ''),
            asset.get('asset_url', ''),
            asset.get('github_url', '')
        ])

print(f"\n=== SAVING {len(assets_without_github)} ASSETS WITHOUT GITHUB ===")
with open('assets_without_github.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['title', 'category', 'license', 'author', 'asset_url'])
    for asset in assets_without_github:
        writer.writerow([
            asset.get('title', ''),
            asset.get('category', ''),
            asset.get('license', ''),
            asset.get('author', ''),
            asset.get('asset_url', '')
        ])

print("Step 1 complete")

Starting Godot Asset Library scraping...
Scraping page 0...
Found 500 assets on page 0
Success: Godot Atlas Cutter
Success: Packed Scene Instantiater
Success: Takin Game Template
Success: Godot 3D Flight Control Tutorial
Success: Thirdperson Controller (.NET)
Success: 3D Cross The Road Template
Success: 3d Maze Generator
Success: Alexandria - alexandria.netserver
Success: DungeonCrawler3D
Success: Follow Parent
Success: GdScript2All
Success: Godot Tiled Tools
Success: Godot-Spyglass
Success: GodotTouchCamera
No GitHub in icon for 'Kenney Prototype Tools':  	https://gitlab.com/MrMinimal/godot-kenney-prototype-tools/-/raw/master/icon.PNG
Success: Kenney's UI Audio
Success: localization_with_google_sheet
Success: RhythmNotifier - Sync Your Game to the Beat of the Music (Sound & Audio)
Success: TextEditor Integration
Success: TODO 4
Success: Virtual Joystick
Success: Visibility shortcut
Success: "Immersive" First Person Controller
Success: .mtl loader
No GitHub in icon for '.nw Level Impor

Step 2 - Find GitHub URLS not found in Step 1, save to `github_from_viewfiles.csv` and `non_github_assets_final.csv`

In [3]:
def extract_github_from_viewfiles():
    github_from_viewfiles = []
    non_github_assets = []
    
    with open('assets_without_github.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        assets = list(reader)
    
    print(f"Processing {len(assets)} assets without GitHub...")
    
    for i, asset in enumerate(assets):
        print(f"Processing {i+1}/{len(assets)}: {asset['title']}")
        
        try:
            response = requests.get(asset['asset_url'])
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            view_files_btn = soup.find('a', class_='btn btn-default')
            if view_files_btn and view_files_btn.find('i', class_='glyphicon-folder-open'):
                view_files_url = view_files_btn.get('href', '')
                
                if 'github.com' in view_files_url:
                    repo_match = re.search(r'github\.com/([^/]+/[^/]+)', view_files_url)
                    if repo_match:
                        repo_path = repo_match.group(1)
                        repo_path = repo_path.split('/tree/')[0].split('/blob/')[0]
                        asset['github_url'] = f"https://github.com/{repo_path}"
                        github_from_viewfiles.append(asset)
                        print(f"  Success: {asset['github_url']}")
                    else:
                        print(f"  GitHub found but regex failed: {view_files_url}")
                        non_github_assets.append(asset)
                else:
                    print(f"  Non-GitHub view files: {view_files_url}")
                    non_github_assets.append(asset)
            else:
                print(f"  No View Files button found")
                non_github_assets.append(asset)
            
        except Exception as e:
            print(f"  Error processing {asset['title']}: {e}")
            non_github_assets.append(asset)
        
        time.sleep(3)
    
    return github_from_viewfiles, non_github_assets

github_from_viewfiles, non_github_assets = extract_github_from_viewfiles()

print(f"\n=== SAVING {len(github_from_viewfiles)} GITHUB REPOS FROM VIEW FILES ===")
with open('github_from_viewfiles.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['title', 'category', 'license', 'author', 'asset_url', 'github_url'])
    for asset in github_from_viewfiles:
        writer.writerow([
            asset.get('title', ''),
            asset.get('category', ''),
            asset.get('license', ''),
            asset.get('author', ''),
            asset.get('asset_url', ''),
            asset.get('github_url', '')
        ])

non_github_assets.sort(key=lambda x: x.get('title', '').lower())

print(f"\n=== SAVING {len(non_github_assets)} NON-GITHUB ASSETS (ALPHABETICAL) ===")
with open('non_github_assets_final.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['title', 'category', 'license', 'author', 'asset_url'])
    for asset in non_github_assets:
        writer.writerow([
            asset.get('title', ''),
            asset.get('category', ''),
            asset.get('license', ''),
            asset.get('author', ''),
            asset.get('asset_url', '')
        ])

print("Step 2 complete")

Processing 758 assets without GitHub...
Processing 1/758: Kenney Prototype Tools
  Non-GitHub view files: https://gitlab.com/MrMinimal/godot-kenney-prototype-tools
Processing 2/758: .nw Level Importer
  Non-GitHub view files: https://gitlab.com/xzirox/nw-level-importer
Processing 3/758: 2.5D Sprite Rotator
  Success: https://github.com/styr0x/2.5D-Sprite-Rotator--Godot-4-
Processing 4/758: 2.5D World Map with Selection
  Success: https://github.com/rafgro/godot25dmap
Processing 5/758: 2048 Demo
  Success: https://github.com/DeanNevan/Godot4-Beta7-Test1-2048
Processing 6/758: 2090s Language System
  Success: https://github.com/NeroNekro/Godot_Language_System
Processing 7/758: 2090s Storage System
  Success: https://github.com/NeroNekro/Godot_Storage_System
Processing 8/758: 2D Camera Controller Singleton / with Dev Tools
  Success: https://github.com/jessejburton/GodotCameraController2D
Processing 9/758: 2D Camera Limiter
  Success: https://github.com/markopolojorgensen/godot_2d_camera_

Step 3 - Create `raw_github_rankings.csv`

In [4]:
class GitHubAPI:
    def __init__(self):
        load_dotenv()
        self.token = os.getenv('GITHUB_TOKEN')
        if not self.token:
            raise ValueError("GITHUB_TOKEN environment variable not set")
        
        self.base_url = 'https://api.github.com'
        self.session = requests.Session()
        self.session.headers.update({
            'Authorization': f'Bearer {self.token}',
            'Accept': 'application/vnd.github+json',
            'X-GitHub-Api-Version': '2022-11-28',
            'User-Agent': 'GodotAssetAnalyzer/1.0'
        })
    
    def get_repo_stats(self, repo_path):
        api_url = f"{self.base_url}/repos/{repo_path}"
        
        try:
            response = self.session.get(api_url)
            
            print(f"    API Response: {response.status_code}")
            if 'X-RateLimit-Remaining' in response.headers:
                print(f"    Rate limit remaining: {response.headers['X-RateLimit-Remaining']}")
            
            if response.status_code == 403:
                print(f"    403 Error response: {response.text[:200]}")
                return {'stars': 0, 'forks': 0, 'last_updated': '', 'api_success': False, 'rate_limited': True}
            
            if response.status_code == 404:
                print(f"    Repository not found: {repo_path}")
                return {'stars': 0, 'forks': 0, 'last_updated': '', 'api_success': False}
            
            response.raise_for_status()
            data = response.json()
            
            return {
                'stars': data.get('stargazers_count', 0),
                'forks': data.get('forks_count', 0),
                'last_updated': data.get('updated_at', ''),
                'api_success': True
            }
        except Exception as e:
            print(f"  API error for {repo_path}: {e}")
            return {'stars': 0, 'forks': 0, 'last_updated': '', 'api_success': False}


In [5]:
def process_github_csvs():
    github_api = GitHubAPI()
    
    try:
        with open('github_api_progress.csv', 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            final_data = list(reader)
            processed_urls = {repo['github_url'] for repo in final_data}
            print(f"Resuming from checkpoint: {len(final_data)} already processed")
    except FileNotFoundError:
        final_data = []
        processed_urls = set()
    
    all_repos = []
    
    csv_files = ['assets_with_github.csv', 'github_from_viewfiles.csv']
    
    for csv_file in csv_files:
        try:
            with open(csv_file, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                repos = list(reader)
                all_repos.extend(repos)
                print(f"Loaded {len(repos)} repos from {csv_file}")
        except FileNotFoundError:
            print(f"File {csv_file} not found, skipping")
    
    print(f"\nProcessing {len(all_repos)} total repositories...")
    
    for i, repo in enumerate(all_repos):
        github_url = repo.get('github_url', '')
        
        if github_url in processed_urls:
            print(f"Skipping {i+1}/{len(all_repos)}: {repo.get('title')} (already processed)")
            continue
            
        print(f"\nProcessing {i+1}/{len(all_repos)}: {repo.get('title', 'NO TITLE')}")
        
        github_repo = ''
        if github_url and 'github.com' in github_url:
            repo_match = re.search(r'github\.com/([^/]+/[^/]+)', github_url)
            if repo_match:
                github_repo = repo_match.group(1)
                github_repo = github_repo.split('/tree/')[0].split('/blob/')[0]
        
        if github_repo:
            print(f"  Making API call for {github_repo}")
            stats = github_api.get_repo_stats(github_repo)
            
            if stats.get('rate_limited'):
                print("Rate limit hit, stopping processing")
                break
            
            final_repo = {
                'title': repo.get('title', ''),
                'github_url': github_url,
                'github_repo': github_repo,
                'stars': stats['stars'],
                'forks': stats['forks'],
                'last_updated': stats['last_updated'],
                'category': repo.get('category', ''),
                'license': repo.get('license', ''),
                'author': repo.get('author', ''),
                'api_success': stats['api_success']
            }
            
            final_data.append(final_repo)
            processed_urls.add(github_url)
            print(f"  Stars: {stats['stars']}, Forks: {stats['forks']}, Success: {stats['api_success']}")
        else:
            print(f"  Could not extract github_repo from URL, skipping")
        
        if len(final_data) % 10 == 0:
            with open('github_api_progress.csv', 'w', newline='', encoding='utf-8') as f:
                if final_data:
                    writer = csv.DictWriter(f, fieldnames=final_data[0].keys())
                    writer.writeheader()
                    writer.writerows(final_data)
            print(f"  Checkpoint saved: {len(final_data)} repos processed")
        
        time.sleep(1.1)
    
    return final_data

final_repos = process_github_csvs()

print(f"\n=== SAVING {len(final_repos)} REPOS WITH RAW GITHUB DATA ===")
with open('raw_github_rankings.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['title', 'github_url', 'github_repo', 'stars', 'forks', 'last_updated', 'category', 'license', 'author', 'api_success'])
    
    for repo in final_repos:
        writer.writerow([
            repo['title'],
            repo['github_url'],
            repo['github_repo'],
            repo['stars'],
            repo['forks'],
            repo['last_updated'],
            repo['category'],
            repo['license'],
            repo['author'],
            repo['api_success']
        ])

print("Step 3 complete - raw data with duplicates preserved")

Loaded 3217 repos from assets_with_github.csv
Loaded 579 repos from github_from_viewfiles.csv

Processing 3796 total repositories...

Processing 1/3796: Godot Atlas Cutter
  Making API call for newold3/Auto-Slice-Atlas
    API Response: 200
    Rate limit remaining: 4999
  Stars: 8, Forks: 1, Success: True

Processing 2/3796: Packed Scene Instantiater
  Making API call for Lazy-Rabbit-2001/godot-addon-packed-scene-instantiater
    API Response: 200
    Rate limit remaining: 4998
  Stars: 1, Forks: 0, Success: True

Processing 3/3796: Takin Game Template
  Making API call for TinyTakinTeller/TakinGodotTemplate
    API Response: 200
    Rate limit remaining: 4997
  Stars: 306, Forks: 20, Success: True

Processing 4/3796: Godot 3D Flight Control Tutorial
  Making API call for UtMan88/Godot3DFlightControls
    API Response: 200
    Rate limit remaining: 4996
  Stars: 11, Forks: 0, Success: True

Processing 5/3796: Thirdperson Controller (.NET)
  Making API call for vaporvee/gd-net-thirdper

### Step 4 - Clean

### Step 5 - Plot

In [8]:
def create_plots():
    with open('raw_github_rankings.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        repos = list(reader)
    
    # Filter only successful API calls
    successful_repos = [repo for repo in repos if repo['api_success'] == 'True']
    
    stars = [int(repo['stars']) for repo in successful_repos]
    forks = [int(repo['forks']) for repo in successful_repos]
    categories = [repo['category'] for repo in successful_repos]
    titles = [repo['title'] for repo in successful_repos]
    
    last_updated = []
    for repo in successful_repos:
        try:
            if repo['last_updated']:
                date_obj = datetime.fromisoformat(repo['last_updated'].replace('Z', '+00:00'))
                last_updated.append(date_obj)
            else:
                last_updated.append(None)
        except:
            last_updated.append(None)
    
    # 1. Top repositories by stars
    plt.figure(figsize=(6, 8))
    sorted_repos = sorted(zip(titles, stars), key=lambda x: x[1], reverse=True)
    top_titles, top_stars = zip(*sorted_repos[:30])
    
    plt.barh(range(len(top_titles)), top_stars, color='#4CAF50')
    plt.yticks(range(len(top_titles)), [title[:30] + '...' if len(title) > 30 else title for title in top_titles])
    plt.xlabel('Stars (log scale)')
    plt.xscale('log')
    plt.title('Top 30 by Stars', fontsize=10, fontweight='bold')
    plt.gca().invert_yaxis()
    plt.grid(True, alpha=0.3, axis='x')
    
    for i, v in enumerate(top_stars):
        plt.text(v * 1.1, i, f'{v:,}', va='center', fontsize=8)
    
    plt.tight_layout()
    plt.savefig('top_repos_by_stars.png')
    plt.close()
        
    # 2. Category distribution
    plt.figure(figsize=(4, 4))
    category_counts = Counter(categories)
    top_categories = dict(category_counts.most_common(8))
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(top_categories)))
    wedges, texts, autotexts = plt.pie(top_categories.values(), labels=top_categories.keys(), 
                                       autopct='%1.1f%%', colors=colors, startangle=90)
    plt.title('Distribution by Category', fontsize=10, fontweight='bold')
    
    for autotext in autotexts:
        autotext.set_fontsize(10)
        autotext.set_color('black')
    
    plt.tight_layout()
    plt.savefig('category_distribution.png')
    plt.close()
    
    # 3. Activity timeline (last updated dates) - 3 years, wide chart
    plt.figure(figsize=(8, 3))
    valid_dates = [date for date in last_updated if date is not None]
    if valid_dates:
        months = [date.strftime('%Y-%m') for date in valid_dates]
        month_counts = Counter(months)
        
        sorted_months = sorted(month_counts.items())
        recent_months = sorted_months[-36:]  # Last 36 months (3 years)
        
        if recent_months:
            month_labels, counts = zip(*recent_months)
            plt.bar(range(len(month_labels)), counts, color='#FF9800', alpha=0.7)
            plt.xticks(range(len(month_labels)), month_labels, rotation=45, ha='right')
            plt.ylabel('Repositories Updated')
            plt.title('Activity Timeline (Last 3 Years)', fontsize=10, fontweight='bold')
            plt.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('activity_timeline.png')
    plt.close()
    
    print(f"Analysis of {len(successful_repos)} repositories with GitHub data:")
    print(f"Total repositories processed: {len(repos)}")
    print(f"Successful API calls: {len(successful_repos)}")
    print(f"Failed API calls: {len(repos) - len(successful_repos)}")
    print(f"Total stars: {sum(stars):,}")
    print(f"Total forks: {sum(forks):,}")
    print(f"Average stars: {np.mean(stars):.1f}")
    print(f"Median stars: {np.median(stars):.1f}")
    
    if category_counts:
        print(f"Most popular category: {category_counts.most_common(1)[0][0]} ({category_counts.most_common(1)[0][1]} repos)")
    
    high_star_repos = [repo for repo in successful_repos if int(repo['stars']) > 100]
    print(f"Repositories with >100 stars: {len(high_star_repos)}")
    
    top_5_repos = sorted(successful_repos, key=lambda x: int(x['stars']), reverse=True)[:5]
    print("\nTop 5 repositories:")
    for i, repo in enumerate(top_5_repos, 1):
        print(f"{i}. {repo['title']} - {repo['stars']} stars ({repo['category']})")
    
    print("\nGenerated 4 plot files:")
    print("- top_repos_by_stars.png")
    print("- stars_vs_forks.png") 
    print("- category_distribution.png")
    print("- activity_timeline.png")

create_plots()

Analysis of 3165 repositories with GitHub data:
Total repositories processed: 3243
Successful API calls: 3165
Failed API calls: 78
Total stars: 179,132
Total forks: 16,134
Average stars: 56.6
Median stars: 10.0
Most popular category: Tools (1061 repos)
Repositories with >100 stars: 328

Top 5 repositories:
1. Godot Demo Projects - 6935 stars (Demos)
2. Material Maker - 4059 stars (3D Tools)
3. Terrain3D - 2990 stars (3D Tools)
4. Dialogue Manager - 2779 stars (Tools)
5. Phantom Camera - 2658 stars (Tools)

Generated 4 plot files:
- top_repos_by_stars.png
- stars_vs_forks.png
- category_distribution.png
- activity_timeline.png


### Step 6 - Create README.md

In [10]:
def generate_readme():
    # Read the data from CSV files
    with open('raw_github_rankings.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        repos = list(reader)
    
    # Filter only successful API calls
    successful_repos = [repo for repo in repos if repo['api_success'] == 'True']
    
    stars = [int(repo['stars']) for repo in successful_repos]
    forks = [int(repo['forks']) for repo in successful_repos]
    categories = [repo['category'] for repo in successful_repos]
    
    # Group by category
    category_groups = {}
    for repo in successful_repos:
        category = repo['category'] if repo['category'] else 'Other'
        if category not in category_groups:
            category_groups[category] = []
        category_groups[category].append(repo)
    
    # Sort each category by stars
    for category in category_groups:
        category_groups[category].sort(key=lambda x: int(x['stars']), reverse=True)
    
    # Sort categories by their top repo's star count
    sorted_categories = sorted(category_groups.items(), 
                             key=lambda x: int(x[1][0]['stars']) if x[1] else 0, 
                             reverse=True)
    
    # Load non-GitHub assets for additional table
    non_github_assets = []
    try:
        with open('non_github_assets_final.csv', 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            non_github_assets = list(reader)
    except FileNotFoundError:
        pass
    
    # Start building README
    markdown = """# Godot Stars!

Here we provide a leaderboard for assets in the Godot Asset Library.

## Overview

"""
    
    # Add statistics
    category_counts = Counter(categories)
    high_star_repos = [repo for repo in successful_repos if int(repo['stars']) > 100]
    
    markdown += f"""- Total repositories analyzed: {len(repos):,}
- Successful GitHub API calls: {len(successful_repos):,}
- Total stars across all repos: {sum(stars):,}
- Total forks across all repos: {sum(forks):,}
- Average stars per repository: {np.mean(stars):.1f}
- Median stars per repository: {np.median(stars):.1f}
- Repositories with >100 stars: {len(high_star_repos)}
- Categories represented: {len(category_counts)}

"""
    
    if category_counts:
        top_category = category_counts.most_common(1)[0]
        markdown += f"Most popular category: {top_category[0]} ({top_category[1]} repositories)\n\n"
    
    # Add visualizations section
    markdown += """## Repositories by Category

Repositories with 10+ stars, ranked by GitHub stars.

"""
    
    # Add category tables
    for category, category_repos in sorted_categories:
        repos_10_plus = [repo for repo in category_repos if int(repo['stars']) >= 10]
        if not repos_10_plus:
            continue
            
        markdown += f"### {category}\n\n"
        markdown += "| Repository | Stars | Forks | Last Updated | License |\n"
        markdown += "|------------|-------|-------|--------------|----------|\n"
        
        for repo in repos_10_plus:
            name = f"[{repo['title']}]({repo['github_url']})"
            stars = f"{int(repo['stars']):,}"
            forks = f"{int(repo['forks']):,}"
            license_text = repo.get('license', 'N/A')
            
            try:
                if repo['last_updated']:
                    date_obj = datetime.fromisoformat(repo['last_updated'].replace('Z', '+00:00'))
                    last_updated = date_obj.strftime('%Y-%m-%d')
                else:
                    last_updated = 'N/A'
            except:
                last_updated = 'N/A'
            
            markdown += f"| {name} | {stars} | {forks} | {last_updated} | {license_text} |\n"
        
    markdown += "\n"
    markdown += """## Visualizations

![Top Repositories by Stars](top_repos_by_stars.png)

![Category Distribution](category_distribution.png)

![Repository Activity Timeline](activity_timeline.png)"""

    # Add non-GitHub assets table if available
    markdown += "\n"
    if non_github_assets:
        markdown += f"""## Non-GitHub Assets

{len(non_github_assets)} assets without GitHub repositories, sorted alphabetically.

| Asset | Category | License | Author |
|-------|----------|---------|---------|
"""
        
        for asset in non_github_assets:#[:20]:  # Show first 20
            name = f"[{asset['title']}]({asset['asset_url']})"
            category = asset.get('category', 'N/A')
            license_text = asset.get('license', 'N/A')
            author = asset.get('author', 'N/A')
            markdown += f"| {name} | {category} | {license_text} | {author} |\n"
        
        #if len(non_github_assets) > 20:
            #markdown += f"\n...and {len(non_github_assets) - 20} more assets\n"
        
        markdown += "\n"
    
    markdown += f"\nGenerated on {datetime.now().strftime('%Y-%m-%d')}"

    # Write README
    with open('README.md', 'w', encoding='utf-8') as f:
        f.write(markdown)
    
    print(f"Generated README.md with:")
    print(f"- {len(sorted_categories)} categories")
    print(f"- {len(successful_repos)} repositories with GitHub data")
    if non_github_assets:
        print(f"- {len(non_github_assets)} non-GitHub assets")
    print(f"- 4 embedded visualization images")
    
    print(f"\nTop categories by leading repository:")
    for category, category_repos in sorted_categories[:5]:
        if category_repos:
            top_repo = category_repos[0]
            print(f"  {category}: {top_repo['title']} ({int(top_repo['stars']):,} stars)")

generate_readme()

Generated README.md with:
- 10 categories
- 3165 repositories with GitHub data
- 179 non-GitHub assets
- 4 embedded visualization images

Top categories by leading repository:
  Demos: Godot Demo Projects (6,935 stars)
  3D Tools: Material Maker (4,059 stars)
  Tools: Dialogue Manager (2,779 stars)
  2D Tools: RMSmartShape2D (1,503 stars)
  Templates: COGITO : Immersive Sim Template (1,357 stars)
