In [29]:
import requests
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import re
from datetime import datetime

In [24]:
def scrape_assets(max_pages=1):
   all_assets = []
   github_assets = []
   non_github_assets = []
   
   for page in range(max_pages):
       url = f"https://godotengine.org/asset-library/asset?max_results=40&page={page}&sort=name"
       print(f"Scraping page {page}...")
       
       try:
           response = requests.get(url)
           response.raise_for_status()
           
           soup = BeautifulSoup(response.content, 'html.parser')
           
           asset_items = soup.find_all('li', class_='asset-item')
           print(f"Found {len(asset_items)} assets on page {page}")
           
           for item in asset_items:
               asset_data = {}
               
               title_elem = item.find('h4')
               if title_elem:
                   asset_data['title'] = title_elem.get_text(strip=True)
               
               link_elem = item.find('a', class_='asset-header')
               if link_elem and link_elem.get('href'):
                   href = link_elem['href']
                   asset_id = href.split('/')[-1]
                   asset_data['asset_id'] = asset_id
                   asset_data['asset_url'] = f"https://godotengine.org{href}"
               
               img_elem = item.find('img', class_='media-object')
               if img_elem and img_elem.get('src'):
                   icon_src = img_elem['src']
                   asset_data['icon_url'] = icon_src
                   
                   if 'github.com' in icon_src or 'githubusercontent.com' in icon_src:
                       github_match = re.search(r'github\.com/([^/]+/[^/]+)', icon_src.replace('githubusercontent.com', 'github.com'))
                       if github_match:
                           asset_data['github_repo'] = github_match.group(1)
                           asset_data['github_url'] = f"https://github.com/{github_match.group(1)}"
               
               tags = []
               tag_spans = item.find_all('span', class_='label')
               for span in tag_spans:
                   tag_text = span.get_text(strip=True)
                   tags.append(tag_text)
               asset_data['tags'] = tags
               
               primary_tag = item.find('span', class_='label-primary')
               if primary_tag:
                   asset_data['category'] = primary_tag.get_text(strip=True)
               
               version_tag = item.find('span', class_='label-info')
               if version_tag:
                   asset_data['godot_version'] = version_tag.get_text(strip=True)
               
               license_tag = item.find('span', class_='label-default')
               if license_tag:
                   asset_data['license'] = license_tag.get_text(strip=True)
               
               footer = item.find('div', class_='asset-footer')
               if footer:
                   author_link = footer.find('a')
                   if author_link:
                       author_text = author_link.get('title', '')
                       if 'Search assets by' in author_text:
                           author = author_text.replace('Search assets by', '').strip(" '\"")
                           asset_data['author'] = author
               
               if asset_data:
                   all_assets.append(asset_data)
                   if asset_data.get('github_url'):
                       github_assets.append(asset_data)
                   else:
                       non_github_assets.append(asset_data)
           
           print(f"Extracted {len(all_assets)} total assets so far")
           
       except Exception as e:
           print(f"Error scraping page {page}: {e}")
       
       time.sleep(2)
   
   return github_assets, non_github_assets

print("Starting Godot Asset Library scraping...")
github_assets, non_github_assets = scrape_assets(max_pages=1)

print(f"\n=== SAVING {len(github_assets)} GITHUB REPOS ===")
with open('godot_github_repos.csv', 'w', newline='', encoding='utf-8') as f:
   writer = csv.writer(f)
   writer.writerow(['title', 'github_url', 'category', 'license', 'author', 'asset_url'])
   for asset in github_assets:
       writer.writerow([
           asset.get('title', ''),
           asset.get('github_url', ''),
           asset.get('category', ''),
           asset.get('license', ''),
           asset.get('author', ''),
           asset.get('asset_url', '')
       ])

print(f"\n=== SAVING {len(non_github_assets)} NON-GITHUB ASSETS ===")
with open('godot_non_github_assets.csv', 'w', newline='', encoding='utf-8') as f:
   writer = csv.writer(f)
   writer.writerow(['title', 'category', 'license', 'author', 'asset_url'])
   for asset in non_github_assets:
       writer.writerow([
           asset.get('title', ''),
           asset.get('category', ''),
           asset.get('license', ''),
           asset.get('author', ''),
           asset.get('asset_url', '')
       ])

print("Step 1 complete")

Starting Godot Asset Library scraping...
Scraping page 0...
Found 40 assets on page 0
Extracted 40 total assets so far

=== SAVING 32 GITHUB REPOS ===

=== SAVING 8 NON-GITHUB ASSETS ===
Step 1 complete


In [28]:
def extract_github_from_download_urls():
    github_from_download = []
    
    with open('godot_non_github_assets.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        assets = list(reader)
    
    print(f"Processing {len(assets)} non-GitHub assets...")
    
    for i, asset in enumerate(assets):
        print(f"Processing {i+1}/{len(assets)}: {asset['title']}")
        
        try:
            response = requests.get(asset['asset_url'])
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Find the download button specifically
            download_btn = soup.find('a', class_='btn btn-primary')
            if download_btn and download_btn.find('i', class_='glyphicon-download'):
                download_url = download_btn.get('href', '')
                
                if 'github.com' in download_url:
                    repo_match = re.search(r'github\.com/([^/]+/[^/]+)', download_url)
                    if repo_match:
                        asset['github_repo'] = repo_match.group(1)
                        asset['github_url'] = f"https://github.com/{repo_match.group(1)}"
                        github_from_download.append(asset)
                        print(f"  Found GitHub: {asset['github_url']}")
                    else:
                        print(f"  Non-GitHub download: {download_url}")
                else:
                    print(f"  Non-GitHub download: {download_url}")
            
        except Exception as e:
            print(f"  Error processing {asset['title']}: {e}")
        
        time.sleep(2)
    
    return github_from_download

github_downloads = extract_github_from_download_urls()

print(f"\n=== SAVING {len(github_downloads)} GITHUB REPOS FROM DOWNLOADS ===")
with open('godot_github_from_downloads.csv', 'w', newline='', encoding='utf-8') as f:
   writer = csv.writer(f)
   writer.writerow(['title', 'github_url', 'category', 'license', 'author', 'asset_url'])
   for asset in github_downloads:
       writer.writerow([
           asset.get('title', ''),
           asset.get('github_url', ''),
           asset.get('category', ''),
           asset.get('license', ''),
           asset.get('author', ''),
           asset.get('asset_url', '')
       ])

print("Step 2 complete")

Processing 8 non-GitHub assets...
Processing 1/8: Kenney Prototype Tools
  Non-GitHub download: https://gitlab.com/MrMinimal/godot-kenney-prototype-tools/-/archive/2bbb2494466d9a55602a8954f52a136cdbbe541f.zip
Processing 2/8: .nw Level Importer
  Non-GitHub download: https://gitlab.com/xzirox/nw-level-importer/-/archive/2a2eb1a3056fc6f21b476a8d803b9ba22e280198.zip
Processing 3/8: 2.5D Sprite Rotator
  Found GitHub: https://github.com/styr0x/2.5D-Sprite-Rotator--Godot-4-
Processing 4/8: 2.5D World Map with Selection
  Found GitHub: https://github.com/rafgro/godot25dmap
Processing 5/8: 2048 Demo
  Found GitHub: https://github.com/DeanNevan/Godot4-Beta7-Test1-2048
Processing 6/8: 2090s Language System
  Found GitHub: https://github.com/NeroNekro/Godot_Language_System
Processing 7/8: 2090s Storage System
  Found GitHub: https://github.com/NeroNekro/Godot_Storage_System
Processing 8/8: 2D Camera Controller Singleton / with Dev Tools
  Found GitHub: https://github.com/jessejburton/GodotCamera

In [37]:
def get_github_stats(repo_path):
   """Get stars, forks, and last updated for a GitHub repo"""
   api_url = f"https://api.github.com/repos/{repo_path}"
   
   try:
       response = requests.get(api_url)
       response.raise_for_status()
       
       data = response.json()
       
       return {
           'stars': data.get('stargazers_count', 0),
           'forks': data.get('forks_count', 0),
           'last_updated': data.get('updated_at', ''),
           'api_success': True
       }
   except Exception as e:
       print(f"  API error for {repo_path}: {e}")
       return {
           'stars': 0,
           'forks': 0,
           'last_updated': '',
           'api_success': False
       }

def process_github_csvs():
    all_repos = []
    
    # Load from both CSV files
    csv_files = ['godot_github_repos.csv', 'godot_github_from_downloads.csv']
    
    for csv_file in csv_files:
       try:
           with open(csv_file, 'r', encoding='utf-8') as f:
               reader = csv.DictReader(f)
               repos = list(reader)
               all_repos.extend(repos)
               print(f"Loaded {len(repos)} repos from {csv_file}")
       except FileNotFoundError:
           print(f"File {csv_file} not found, skipping")
    
    print(f"\nProcessing {len(all_repos)} total repositories...")
    
    # Remove duplicates based on github_url
    unique_repos = {}
    for repo in all_repos:
        github_url = repo.get('github_url', '')
        if github_url and github_url not in unique_repos:
           unique_repos[github_url] = repo
    
    all_repos = list(unique_repos.values())
    print(f"After deduplication: {len(all_repos)} unique repositories")
    
    final_data = []
   
    for i, repo in enumerate(all_repos):
        print(f"\nProcessing {i+1}/{len(all_repos)}: {repo.get('title', 'NO TITLE')}")
        
        github_url = repo.get('github_url', '')
        print(f"  github_url: '{github_url}'")
        
        # Extract repo path from github_url since github_repo field is empty
        github_repo = ''
        if github_url and 'github.com' in github_url:
            repo_match = re.search(r'github\.com/([^/]+/[^/]+)', github_url)
            if repo_match:
                github_repo = repo_match.group(1)
        
        print(f"  extracted github_repo: '{github_repo}'")
        
        if github_repo:
            print(f"  Making API call for {github_repo}")
            stats = get_github_stats(github_repo)
            
            final_repo = {
                'title': repo.get('title', ''),
                'github_url': github_url,
                'stars': stats['stars'],
                'forks': stats['forks'],
                'last_updated': stats['last_updated'],
                'category': repo.get('category', ''),
                'license': repo.get('license', ''),
                'author': repo.get('author', ''),
                'api_success': stats['api_success']
            }
            
            final_data.append(final_repo)
            print(f"  Added to final_data. Success: {stats['api_success']}")
        else:
            print(f"  Could not extract github_repo from URL, skipping")
    
        time.sleep(1)
    return final_data

# Process all repos
final_repos = process_github_csvs()

# Save to final CSV
print(f"\n=== SAVING {len(final_repos)} REPOS WITH GITHUB STATS ===")
with open('godot_github_final.csv', 'w', newline='', encoding='utf-8') as f:
   writer = csv.writer(f)
   writer.writerow(['title', 'github_url', 'stars', 'forks', 'last_updated', 'category', 'license', 'author'])
   
   # Sort by stars descending
   final_repos.sort(key=lambda x: x['stars'], reverse=True)
   
   for repo in final_repos:
       if repo['api_success']:  # Only include repos where API call succeeded
           writer.writerow([
               repo['title'],
               repo['github_url'],
               repo['stars'],
               repo['forks'],
               repo['last_updated'],
               repo['category'],
               repo['license'],
               repo['author']
           ])

print("Step 3 complete - repos sorted by stars")

Loaded 32 repos from godot_github_repos.csv
Loaded 6 repos from godot_github_from_downloads.csv

Processing 38 total repositories...
After deduplication: 36 unique repositories

Processing 1/36: Godot Atlas Cutter
  github_url: 'https://github.com/newold3/Auto-Slice-Atlas'
  extracted github_repo: 'newold3/Auto-Slice-Atlas'
  Making API call for newold3/Auto-Slice-Atlas
  Added to final_data. Success: True

Processing 2/36: Packed Scene Instantiater
  github_url: 'https://github.com/Lazy-Rabbit-2001/godot-addon-packed-scene-instantiater'
  extracted github_repo: 'Lazy-Rabbit-2001/godot-addon-packed-scene-instantiater'
  Making API call for Lazy-Rabbit-2001/godot-addon-packed-scene-instantiater
  Added to final_data. Success: True

Processing 3/36: Takin Game Template
  github_url: 'https://github.com/TinyTakinTeller/TakinGodotTemplate'
  extracted github_repo: 'TinyTakinTeller/TakinGodotTemplate'
  Making API call for TinyTakinTeller/TakinGodotTemplate
  Added to final_data. Success: T

In [42]:
def generate_markdown_table():
    with open('godot_github_final.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        repos = list(reader)
    
    # Group by category
    categories = {}
    for repo in repos:
        category = repo['category'] if repo['category'] else 'Other'
        if category not in categories:
            categories[category] = []
        categories[category].append(repo)
    
    # Sort each category by stars
    for category in categories:
        categories[category].sort(key=lambda x: int(x['stars']), reverse=True)
    
    # Sort categories by their top repo's star count
    sorted_categories = sorted(categories.items(), 
                             key=lambda x: int(x[1][0]['stars']), 
                             reverse=True)
    
    markdown = "# Top Godot GitHub Repositories by Category\n\n"
    
    for category, category_repos in sorted_categories:
        top_5 = category_repos[:5]
        markdown += f"## {category}\n\n"
        markdown += "| Name | Stars | Forks | LastUpdated |\n"
        markdown += "|------|-------|-------|-------------|\n"
        
        for repo in top_5:
            name = f"[{repo['title']}]({repo['github_url']})"
            stars = f"{int(repo['stars']):,}"
            forks = f"{int(repo['forks']):,}"
            
            date_obj = datetime.fromisoformat(repo['last_updated'].replace('Z', '+00:00'))
            last_updated = date_obj.strftime('%Y-%m-%d')
            
            markdown += f"| {name} | {stars} | {forks} | {last_updated} |\n"
        
        markdown += "\n"
    
    with open('README.md', 'w', encoding='utf-8') as f:
        f.write(markdown)
    
    print(f"Generated README.md with {len(categories)} categories")
    
    print(f"Categories by top repo:")
    for category, category_repos in sorted_categories:
        top_repo = category_repos[0]
        print(f"{category}: {top_repo['title']} ({int(top_repo['stars']):,} stars)")

generate_markdown_table()

Generated README.md with 8 categories
Categories by top repo:
Demos: 2.5D Demo (6,933 stars)
Tools: Virtual Joystick (838 stars)
Templates: Takin Game Template (306 stars)
3D Tools: "Immersive" First Person Controller (43 stars)
Scripts: DungeonCrawler3D (24 stars)
2D Tools: 1$ gesture recogniser and recorder (9 stars)
Misc: Follow Parent (6 stars)
Materials: .mtl loader (3 stars)


In [40]:
import csv
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from collections import Counter

def create_plots():
    with open('godot_github_final.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        repos = list(reader)
    
    stars = [int(repo['stars']) for repo in repos]
    forks = [int(repo['forks']) for repo in repos]
    categories = [repo['category'] for repo in repos]
    titles = [repo['title'] for repo in repos]
    
    last_updated = []
    for repo in repos:
        try:
            date_obj = datetime.fromisoformat(repo['last_updated'].replace('Z', '+00:00'))
            last_updated.append(date_obj)
        except:
            last_updated.append(None)
    
    # 1. Top 15 repositories by stars
    plt.figure(figsize=(12, 8))
    sorted_repos = sorted(zip(titles, stars), key=lambda x: x[1], reverse=True)
    top_titles, top_stars = zip(*sorted_repos[:15])
    
    plt.barh(range(len(top_titles)), top_stars, color='#4CAF50')
    plt.yticks(range(len(top_titles)), [title[:30] + '...' if len(title) > 30 else title for title in top_titles])
    plt.xlabel('Stars')
    plt.title('Top 15 Repositories by Stars', fontsize=14, fontweight='bold')
    plt.gca().invert_yaxis()
    
    for i, v in enumerate(top_stars):
        plt.text(v + max(top_stars) * 0.01, i, f'{v:,}', va='center', fontsize=9)
    
    plt.tight_layout()
    plt.savefig('top_repos_by_stars.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 2. Stars vs Forks scatter plot
    plt.figure(figsize=(10, 8))
    plt.scatter(stars, forks, alpha=0.6, color='#2196F3', s=50)
    plt.xlabel('Stars')
    plt.ylabel('Forks')
    plt.title('Stars vs Forks Relationship', fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3)
    
    correlation = np.corrcoef(stars, forks)[0, 1]
    plt.text(0.05, 0.95, f'Correlation: {correlation:.3f}', transform=plt.gca().transAxes, 
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.8), fontsize=12)
    
    plt.tight_layout()
    plt.savefig('stars_vs_forks.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 3. Category distribution
    plt.figure(figsize=(10, 8))
    category_counts = Counter(categories)
    top_categories = dict(category_counts.most_common(8))
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(top_categories)))
    wedges, texts, autotexts = plt.pie(top_categories.values(), labels=top_categories.keys(), 
                                       autopct='%1.1f%%', colors=colors, startangle=90)
    plt.title('Repository Distribution by Category', fontsize=14, fontweight='bold')
    
    for autotext in autotexts:
        autotext.set_fontsize(10)
        autotext.set_color('black')
    
    plt.tight_layout()
    plt.savefig('category_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 4. Activity timeline (last updated dates) - 3 years, wide chart
    plt.figure(figsize=(16, 6))
    valid_dates = [date for date in last_updated if date is not None]
    if valid_dates:
        months = [date.strftime('%Y-%m') for date in valid_dates]
        month_counts = Counter(months)
        
        sorted_months = sorted(month_counts.items())
        recent_months = sorted_months[-36:]  # Last 36 months (3 years)
        
        if recent_months:
            month_labels, counts = zip(*recent_months)
            plt.bar(range(len(month_labels)), counts, color='#FF9800', alpha=0.7)
            plt.xticks(range(len(month_labels)), month_labels, rotation=45, ha='right')
            plt.ylabel('Repositories Updated')
            plt.title('Repository Activity Timeline (Last 3 Years)', fontsize=14, fontweight='bold')
            plt.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('activity_timeline.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Analysis of {len(repos)} repositories:")
    print(f"Total stars: {sum(stars):,}")
    print(f"Total forks: {sum(forks):,}")
    print(f"Average stars: {np.mean(stars):.1f}")
    print(f"Median stars: {np.median(stars):.1f}")
    print(f"Most popular category: {category_counts.most_common(1)[0][0]} ({category_counts.most_common(1)[0][1]} repos)")
    
    high_star_repos = [repo for repo in repos if int(repo['stars']) > 100]
    print(f"Repositories with >100 stars: {len(high_star_repos)}")
    print("Generated 4 plot files:")
    print("- top_repos_by_stars.png")
    print("- stars_vs_forks.png") 
    print("- category_distribution.png")
    print("- activity_timeline.png")

create_plots()

Analysis of 35 repositories:
Total stars: 8,687
Total forks: 1,999
Average stars: 248.2
Median stars: 8.0
Most popular category: Scripts (8 repos)
Repositories with >100 stars: 5
Generated 4 plot files:
- top_repos_by_stars.png
- stars_vs_forks.png
- category_distribution.png
- activity_timeline.png
