In [17]:
import glob
import os

projects = os.listdir("data/projects")
last_commits = glob.glob("data/projects_last_commit/*.json")

print(f"Analyzed projects: {len(projects)}")
print(f"Last commit extracted for projects: {len(last_commits)}")

Analyzed projects: 1082
Last commit extracted for projects: 1082


In [8]:
import shutil

for project in projects:
    last_commit_file = f"data/projects_last_commit/{project}_last_commit.json"
    if last_commit_file not in last_commits:
        shutil.rmtree(f"data/projects/{project}")
        
        

In [9]:
for last_commit in last_commits:
    file_name = last_commit.split("/")[-1]
    project_name = file_name.split("_last_commit.json")[0]
    if project_name not in projects:
        print(f"Last commit exists for non-existing project: {project_name}")
        os.remove(last_commit)

Last commit exists for non-existing project: goreplay
Last commit exists for non-existing project: desktop
Last commit exists for non-existing project: openvpn-install
Last commit exists for non-existing project: django-rest-framework
Last commit exists for non-existing project: electron
Last commit exists for non-existing project: NewPipe
Last commit exists for non-existing project: pandas
Last commit exists for non-existing project: gopeed
Last commit exists for non-existing project: micropython
Last commit exists for non-existing project: nuclei
Last commit exists for non-existing project: kitty
Last commit exists for non-existing project: node
Last commit exists for non-existing project: OpenVoice
Last commit exists for non-existing project: ollama
Last commit exists for non-existing project: filament
Last commit exists for non-existing project: react-native-vector-icons
Last commit exists for non-existing project: vueuse
Last commit exists for non-existing project: wasmer
Last com

In [None]:
for missing_project in missing_projects:
    name_parts = missing_project[1].split("_")
    print(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url=https://github.com/{name_parts[0]}/{name_parts[1]} --name={name_parts[1]} --last_commit=True")

In [8]:
import os
import glob
import re
import json

fully_analyzed = []
partly_analyzed = []
not_analyzed = []

projects = os.listdir("data/projects")

for project in projects:
    json_files = glob.glob(f"data/projects/{project}/*.json")

    if len(json_files) == 1 and json_files[0].endswith(f"{project}.json"):
        fully_analyzed.append(project)

    if len(json_files) == 1 and json_files[0].endswith("last_commit.json"):
        not_analyzed.append(project)

    if len(json_files) == 0:
        not_analyzed.append(project)

    if len(json_files) > 1:
        if any(file.endswith(f"summary.json") for file in json_files):
            fully_analyzed.append(project)
        else:
            batch_files = [file for file in json_files if "batch" in file]
            batch_info = []                                                                                             
            for batch_file in batch_files:                                                                              
                # Extract batch number using regex                                                                      
                match = re.search(r'batch_(\d+)\.json$', batch_file)                                                    
                if match:                                                                                               
                    batch_num = int(match.group(1))                                                                     
                    batch_info.append((batch_num, batch_file))                                                         
                                                                                                                     
            # Sort by batch number and get the latest                                                                   
            if batch_info:                                                                                             
                batch_info.sort(key=lambda x: x[0], reverse=True)                                                      
                latest_batch_num, latest_batch_file = batch_info[0]                                                    
                print(f"Project: {project}, Latest batch: {latest_batch_num}, File: {latest_batch_file}")

                with open(latest_batch_file, 'r', encoding='utf-8') as f:
                    try:
                        data = json.load(f)
                        commit_data = data.get("commit_data", [])
                        latest_commit = data["commit_data"][-1]

                        # Check if latest commit is indeed the latest
                        if not latest_commit["is_latest_commit"]:
                            partly_analyzed.append(project)
                        
                        # Check if latest commit  contains data
                        if latest_commit["is_latest_commit"]:
                            if not latest_commit["network_data"]:
                                partly_analyzed.append(project)
                    except Exception as e:
                        not_analyzed.append(project)
                        print(f"Error reading {latest_batch_file}: {e}")
                        continue

Project: moby_moby, Latest batch: 546, File: data/projects/moby_moby/moby_moby_batch_546.json
Project: dbeaver_dbeaver, Latest batch: 278, File: data/projects/dbeaver_dbeaver/dbeaver_dbeaver_batch_278.json
Project: juspay_hyperswitch, Latest batch: 59, File: data/projects/juspay_hyperswitch/juspay_hyperswitch_batch_59.json
Project: ansible_ansible, Latest batch: 552, File: data/projects/ansible_ansible/ansible_ansible_batch_552.json
Project: ionic-team_ionic-framework, Latest batch: 143, File: data/projects/ionic-team_ionic-framework/ionic-team_ionic-framework_batch_143.json
Project: angular_angular, Latest batch: 349, File: data/projects/angular_angular/angular_angular_batch_349.json
Project: apache_superset, Latest batch: 179, File: data/projects/apache_superset/apache_superset_batch_179.json
Project: mrdoob_three.js, Latest batch: 461, File: data/projects/mrdoob_three.js/mrdoob_three.js_batch_461.json
Project: cypress-io_cypress, Latest batch: 223, File: data/projects/cypress-io_cyp

KeyboardInterrupt: 

Fully analyzed projects: 480
  marker
  Flowise
  consul
  windows
  youtube-music
  spring-framework
  vue-pure-admin
  crewAI
  magic-wormhole
  ant-design-vue
  czkawka
  rocksdb
  googletest
  CodeEdit
  LocalAI
  tensorflow
  amark_gun
  etherpad-lite
  streamlit
  spacedrive
  rustdesk
  toml
  freqtrade
  base_node
  go-redis
  podman
  localstack
  OI-wiki
  ruff
  bootstrap
  SmsForwarder
  jekyll
  beekeeper-studio
  Qwen3
  paperless-ngx
  scrcpy
  WxJava
  graphql-js
  IOPaint
  restic
  graphrag
  sweetalert2
  agno
  iina
  firecracker
  OpenManus
  netbox
  oven-sh_bun
  zap
  hackathon-starter
  supervision
  rocketmq
  netty
  FlClash
  cherry-studio
  koodo-reader
  harness
  MiniCPM-o
  asdf
  Sa-Token
  hello-algo
  n
  vagrant
  swiper
  plane
  actix-web
  unsloth
  motion
  druid
  handsontable_handsontable
  DeepSeek-V3
  Kingfisher
  grapesjs
  llama.cpp
  okhttp
  servo_servo
  deskflow
  colly
  invoke-ai_InvokeAI
  Mobile-Security-Framework-MobSF
  biome
  g

In [6]:
import os
import shutil

local_projects = os.listdir("data/projects")
remove_projects = ['ExplorerPatcher', 'ComfyUI', 'react-spring', 'lo', 'honojs_hono', 'FreeCAD_FreeCAD', 'haystack', 'gatsbyjs_gatsby', 'maui', 'jeecgboot_JeecgBoot', 'GSYVideoPlayer', 'XX-Net', 'slick', 'vanna', 'mlflow_mlflow', 'live', 'yfinance', 'nowinandroid', 'Qwen', 'AgentGPT', 'eslint', 'canal', 'fiber', 'alacritty', 'vectordotdev_vector', 'dgraph', 'headlessui', 'zustand', 'Aerial', 'vant', 'fd', 'skywalking', 'teable', 'zx', '3x-ui', 'mongodb_mongo', 'hello-algo', 'angular_angular', 'slate', 'btop', 'osquery', 'FiraCode', 'json-server', 'fasthttp', 'apollographql_apollo-client', 'modular', 'DoKit', 'chi', 'validator', 'brew', 'ember.js', 'invoke-ai_InvokeAI', 'babel', 'xxl-job', 'esbuild', 'glide', 'langchain', 'setup', 'TeamNewPipe_NewPipe', 'imgui', 'sherlock', 'carbon-lang', 'rustdesk_rustdesk', 'LazyVim', 'NationalSecurityAgency_ghidra', 'ente-io_ente', 'kong', 'ace', 'cilium_cilium', 'layui', 'ray-project_ray', 'dubbo', 'shellcheck', 'Rectangle', 'scikit-learn', 'yazi', 'anki', 'wasmerio_wasmer', 'rust-lang_rust', 'localsend', 'yolov5', 'etherpad-lite', 'nrwl_nx', 'electron-react-boilerplate', 'distroless', 'dbeaver_dbeaver', 'react-scan', 'text-generation-webui', 'trix', 'continuedev_continue', 'tldraw_tldraw', 'swr', 'photoprism_photoprism', 'cube', 'fmt', 'spring-projects_spring-boot', 'commander.js', 'firefly-iii', 'drizzle-orm', 'Apktool', 'yabai', 'ignite', 'Docker-OSX', 'yjs', 'MPAndroidChart', 'FastChat', 'electron_electron', 'docker-elk', 'CodeEdit', 'dioxus', 'fullcalendar', 'express', 'lede', 'recharts', 'biome', 'graphql_graphql-js', 'Bilibili-Evolved', '3b1b_manim', 'ripgrep', 'eladmin', 'fuel-core', 'insightface', 'MediaCrawler', 'bootstrap', 'laravel', 'vant-weapp', 'ingress-nginx', 'OI-wiki', 'leetcode', 'LLaMA-Factory', 'redisson_redisson', 'labring_FastGPT', 'config', 'PowerToys', 'paperless-ngx', 'timqian_chinese-independent-blogs', 'tabler', 'python-fire', 'immutable-js', 'jan', 'sweetalert2', 'automa', 'kotaemon', 'tailscale', 'atuinsh_atuin', 'dapr_dapr', 'code-server', 'ethereum_solidity', 'pyscript', 'frigate', 'cosmopolitan', 'typescript-go', 'iina', 'codemirror5', 'ToolJet_ToolJet', 'redoc', 'servo_servo', 'kubernetes_kubernetes', 'sway', 'apify_crawlee', 'Inquirer.js', 'PowerShell_PowerShell', 'spdlog', 'pi-hole', 'swoole-src', 'nocodb_nocodb', 'netbox', 'guava', 'metabase_metabase', 'Perplexica', 'localstack', 'Bend', 'Hyprland', 'fairseq', 'spring-framework', 'v', 'beego', 'storybookjs_storybook', 'CasaOS', 'graphrag', 'anime', 'meilisearch', 'homebrew-cask', 'Apollo-11', 'gum', 'docs', 'pytorch_geometric', 'minimind', 'vueuse_vueuse', 'fullPage.js', 'dayjs', 'redis', 'koodo-reader', 'screenshot-to-code', 'robertdavidgraham_masscan', 'vllm', 'excalidraw', 'caddy', 'typst', 'gkd', 'DeepSeek-V3', 'keepassxc', 'compose-samples', 'fyne', 'NervJS_taro', 'bat', 'vite', 'vim', 'opencv', 'ddia', 'lvgl', 'dockge', 'OpenManus', 'mimikatz', 'github1s', 'syncthing', 'd2', 'fingerprintjs_fingerprintjs', 'slint', 'beekeeper-studio', 'shardeum', 'tornado', 'flash-attention', 'mobx', 'HumanSignal_label-studio', 'faiss', 'marked', 'ghostty', 'penpot', 'gpt_academic', 'html5-boilerplate', 'gpt4free', 'cypress-io_cypress', 'chatwoot_chatwoot', 'agenticSeek', 'wezterm', 'highlight.js', 'zxing', 'twenty', 'fuels-rs', 'iTerm2-Color-Schemes', 'ws', 'Mobile-Security-Framework-MobSF', 'react-navigation', 'gpt4all', 'jellyfin', 'refinedev_refine', 'firecrawl', 'kickstart.nvim', 'srs', 'dolt', 'qlib', 'flutter_flutter', 'TryGhost_Ghost', 'monica', 'vue2-elm', 'harness', 'urfave_cli', 'Ice', 'toeverything_AFFiNE', 'termux-app', 'mongoose', 'conductor', 'baidu_amis', 'zulip_zulip', 'tdesktop', 'Dokploy_dokploy', 'gitlabhq_gitlabhq', 'jarun_nnn', 'Magisk', 'tauri', 'Chart.js', 'phaserjs_phaser', 'plotly_dash', 'hugo', 'GopeedLab_gopeed', 'gradio', 'draggable', 'supabase_supabase', 'calibre', 'ColossalAI', 'Anuken_Mindustry', 'relay', 'LunarVim', 'glance', 'ccxt_ccxt', 'GPT-SoVITS', 'mitmproxy', 'tensorflow', 'outline', 'node-red_node-red', 'WordPress', 'guidance', 'valkey', 'smolagents', 'swc-project_swc', 'cmder', 'yew', 'heroicons', 'docling', 'deno', 'vercel_next.js', 'apache_airflow', 'RevokeMsgPatcher', 'just', 'TabbyML_tabby', 'argoproj_argo-cd', 'Folo', 'open-interpreter', 'zen-browser_desktop', 'amark_gun', 'web-check', 'vnpy_vnpy', 'CorentinJ_Real-Time-Voice-Cloning', 'Kingfisher', 'yup', 'UTM', 'NativeScript_NativeScript', 'harbor', 'gethomepage_homepage', 'youtube-music', 'delve', 'fish-speech', 'pure', 'aseprite', 'NaiboWang_EasySpider', 'ruoyi-vue-pro', 'fastai', 'OmniParser', 'appwrite_appwrite', 'faas', 'fuels-ts', 'framework', 'Catch2', 'serverless', 'k3s', 'cline', 'mypy', 'apache_superset', 'chatgpt-on-wechat', 'sst', 'barryvdh_laravel-debugbar', 'folly', 'SwiftyJSON', 'pingora', 'sing-box', 'php-src', 'xbmc', 'terminal', 'Kong_insomnia', 'ml-agents', 'google_filament', 'deskreen', 'devdocs', 'v2rayNG', 'YesPlayMusic', 'dimsemenov_PhotoSwipe', 'IOPaint', 'pocketbase', 'neovim', 'bagisto', 'theia', 'opentofu', 'svelte', 'agno', 'graal', 'commitlint', 'uutils_coreutils', 'trufflehog', 'cadvisor', 'netty', 'lazygit', 'refined-github', 'ionic-team_ionic-framework', 'stats', 'winget-cli', 'daytona', 'intro.js', 'uv', 'KaTeX', 'flask', 'micro_go-micro', 'grafana_loki', 'emscripten', 'rocketmq', 'casbin', 'youtube-dl', 'certbot', 'matplotlib', 'serenity', 'driver.js', 'textual', 'airbytehq_airbyte', 'k6', 'AdGuardHome', 'OSX-KVM', 'reflex', 'shap', 'Jobs_Applier_AI_Agent_AIHawk', 'rich', 'axum', 'eruda', 'devise', 'MMKV', 'flatbuffers', 'ai-hedge-fund', 'mpv', 'preact', 'lapce_lapce', 'nushell', 'langgenius_dify', 'postiz-app', 'influxdb', 'qBittorrent', 'composio', 'julia', 'bilibili-API-collect', 'joi', 'angular-cli', 'weui', 'ish', 'hashcat', 'Sa-Token', 'monaco-editor', 'teambit_bit', 'Flowise', 'LLMs-from-scratch', 'oven-sh_bun', 'spacemacs', 'query', 'nginx_nginx', 'pytorch_pytorch', 'pathway', 'hosts', 'jq', 'subquery_subql', 'pdf.js', 'OpenAPI-Specification', 'CymChad_BaseRecyclerViewAdapterHelper', 'DefinitelyTyped_DefinitelyTyped', 'tinygrad', 'TDengine', 'frp', 'egui', 'focalboard', 'guzzle', 'kit', 'anything-llm', 'czkawka', 'create-t3-app', 'windows95', 'axios', 'druid', 'ChromeAppHeroes', 'facefusion', 'withastro_astro', 'libgdx', 'lobehub_lobe-chat', 'sanic', 'hasura_graphql-engine', 'diffusers', 'validatorjs_validator.js', 'withfig_autocomplete', 'gravitational_teleport', 'lerna', 'luigi', 'SteamTools', 'django', 'Rocket', 'puter', 'apollo', 'incubator-seata', 'codex', 'googletest', 'react', 'react-native', 'google_fonts', 'consul', 'taichi', 'badges_shields', 'strapi_strapi', 'plane', 'unionlabs_union', 'logseq', 'mocha', 'zeromicro_go-zero', 'gorm', 'ice', 'xterm.js', 'ladybird', 'grapesjs', 'podman', 'whisper.cpp', 'rails', 'AnotherRedisDesktopManager', 'decaporg_decap-cms', 'leptos', 'poetry', 'seaweedfs_seaweedfs', 'curl', 'celery', 'expo_expo', 'timescaledb', 'neon', 'koreader', 'nginx-proxy', 'OpenAPITools_openapi-generator', 'htmx', 'GoogleChrome_lighthouse', 'windows', 'requests', 'delta', 'ScreenToGif', 'lit_lit', 'element-plus', 'everyone-can-use-english', 'PostgREST_postgrest', 'bubbletea', 'terraform', 'colima', 'ansible_ansible', 'mrdoob_three.js', 'aider', 'go-gitea_gitea', 'puppeteer', 'Genesis', 'svgo', 'bokeh', 'halo', 'llvm_llvm-project', 'docusaurus', 'MetaGPT', 'ink', 'SmsForwarder', 'firecracker', 'LightRAG', 'NextChat', 'qwik', 'redis_go-redis', 'payloadcms_payload', 'jest', 'laurent22_joplin', 'qinglong', 'pulumi', 'myshell-ai_OpenVoice', 'Iosevka', 'acme.sh', 'nvm', 'miaosha', 'spotube', 'numpy_numpy', 'PHPMailer', 'appsmithorg_appsmith', 'postgres', 'reveal.js', 'blueprint', 'linera-protocol', 'ManimCommunity_manim', 'mdb-ui-kit', 'Ventoy', 'cheerio', 'mindsdb', 'telescope.nvim', 'apache_spark', 'videojs_video.js', 'PaddleOCR', 'stanford-oval_storm', 'CopilotKit_CopilotKit', 'microsoft_TypeScript', 'tiptap', 'nacos', 'matomo-org_matomo', 'locustio_locust', 'helm', 'fabricjs_fabric.js', 'etcd', 'playwright', 'sinaptik-ai_pandas-ai', 'you-get', 'bettercap', 'micropython_micropython', 'rollup', 'fzf', 'APIJSON', 'asdf', 'rembg', 'quivr', 'Karabiner-Elements', 'dashy', 'python-telegram-bot', 'zod', 'v2fly_v2ray-core', 'actix_actix-web', 'Budibase_budibase', 'react-admin', 'react-testing-library', 'Avalonia', 'QuickLook', 'microsoft_semantic-kernel', 'voice-changer', 'components', 'calcom_cal.com', 'jumpserver', 'directus_directus', 'zed-industries_zed', 'rapid7_metasploit-framework', 'jitsi_jitsi-meet', 'CyberChef', 'cloc', 'sqlmap', 'desktop_desktop', 'mem0', 'fabric', 'grpc_grpc', 'envoy', 'LibreChat', 'solid', 'react-redux', 'powerlevel10k', 'FFmpeg', 'autogen', 'agentic', 'crewAI', 'Ehviewer_CN_SXJ', 'xstate', 'clash-verge-rev', 'Eugeny_tabby', 'darkreader', 'vim-airline', 'handsontable_handsontable', 'SwiftLint', 'reduxjs_redux', 'dotnet_core', 'adonisjs_core', 'Files', 'vueComponent_ant-design-vue', 'nextauthjs_next-auth', 'openpilot', 'pyenv', 'necolas_react-native-web', 'xgboost', 'prometheus', 'gin', 'ungoogled-chromium', 'tokio', 'stb', 'typeorm', 'PrefectHQ_prefect', 'ip2region', 'signoz', 'binwiederhier_ntfy', 'phoenix', 'prisma', 'polars', 'BitNet', 'tailwindcss', 'TriliumNext_trilium', 'huggingface_open-r1', 'zoxide', 'milvus', 'mlc-llm', 'prettier', 'obsproject_obs-studio', 'hexo', 'remotion-dev_remotion', 'wekan_wekan', 'lx-music-desktop', 'HikariCP', 'crawl4ai', 'black', 'MoneyPrinterTurbo', 'vitessio_vitess', 'mifi_lossless-cut', 'chakra-ui', 'cpython', 'zig', 'sharp', 'Seal', 'antlr4', 'n', 'vue-admin-better', 'dgtlmoon_changedetection.io', 'void', 'n8n-io_n8n', 'dragonfly', 'godot', 'Infisical_infisical', 'gulp', 'oblador_react-native-vector-icons', 'WSL', 'uppy', 'HandBrake', 'pm2', 'ArchiveBox_ArchiveBox', 'dub', 'kivy', 'homebridge_homebridge', 'jotai', 'bruno', 'classnames', 'statsd', 'rrweb', 'radare2', 'jsdom', 'Scrapegraph-ai', 'slidev', 'pixijs', 'airbnb_lottie-ios', 'google-research_google-research', 'phpunit', 'wiki', 'Langchain-Chatchat', 'helix', 'peft', 'mantine', 'starship', 'retrofit', 'Proton', 'compose', 'docsify', 'yt-dlp', 'cobalt', 'netdata_netdata', 'vaultwarden', 'hoppscotch', 'wg-easy', 'parcel', 'go-kratos_kratos', 'getsentry_sentry', 'toml', 'flux', 'quasarframework_quasar', 'Tasmota', 'excelize', 'elastic_elasticsearch', 'gitbook', 'barry-ran_QtScrcpy', 'brave-browser', 'novuhq_novu', 'koa', 'mkdocs-material', 'PicGo', 'jhipster_generator-jhipster', 'SeleniumHQ_selenium', 'ant-design_ant-design-pro', 'mall', 'js-cookie', 'vulhub', 'croc', 'sequelize', 'yoga', 'DeepSpeed', 'fastapi', 'calculator', 'lux', 'pandoc', 'monolog', 'scrcpy', 'minio', 'semantic-release', 'bazel', 'bulletproof-react', 'webpack', 'cursor-free-vip', 'HanLP', 'git-extras', 'v2rayN', 'MagicMirror', 'dspy', 'spleeter', 'libuv', 'OpenBB-finance_OpenBB', 'Charts', 'spacedrive', 'huginn', 'unleashed-firmware', 'nodejs_node', 'kubernetes_minikube', 'duckdb', 'grpc-gateway', 'deskflow', 'diff-so-fancy', 'uni-app', 'apache_echarts', 'Sunshine', 'simple-icons', 'stretchr_testify', 'signalapp_Signal-Android', 'ruby', 'echo', 'unocss', 'ILSpy', 'bitcoin', 'WxJava', 'openzeppelin-contracts', 'kuboard-press', 'zellij-org_zellij', 'dotnet_roslyn', 'jgthms_bulma', 'facebook_hhvm', 'SDWebImage', 'compiler-explorer_compiler-explorer', 'faker', 'ai_nanoid', 'SmartTube', 'floating-ui', 'v8', 'motion', 'backstage_backstage', 'dotenv', 'tesseract', 'MiniCPM-o', 'AppFlowy-IO_AppFlowy', 'GSAP', 'mediapipe', 'trpc', 'mui_material-ui', 'detectron2', 'kafka', 'RustPython', 'bevy', 'immich-app_immich', 'cloudreve', 'gitleaks', 'arthas', 'winutil', 'kotlin', 'streamlit', 'rclone_rclone', 'whisper', 'tree-sitter_tree-sitter', 'moby_moby', 'juspay_hyperswitch', 'LocalAI', 'vue-vben-admin', 'sqlitebrowser', 'portainer', 'ImHex', 'linux-insides', 'khoj', 'wails', 'serve', 'frida', 'unilm', 'searxng_searxng', 'chatboxai_chatbox', 'FlClash', 'keycloak_keycloak', 'ant-design', 'nw.js', 'react-hook-form', 'redash', 'kestra', 'langflow-ai_langflow', 'vuetify', 'openai-python', 'ReactiveX_RxSwift', 'hiddify-app', 'graphql', 'headscale', 'spring-cloud-alibaba', 'gogs', 'Z-Siqi_Clash-for-Windows_Chinese', 'dotnet_aspnetcore', 'elixir', 'nuxt', 'ohmyzsh', 'openui', 'browser-use', 'TrafficMonitor', 'jekyll', 'marker', 'onlook', 'drawdb', 'daisyui', 'OpenClash', 'ChatTTS', 'spf13_viper', 'ionicons', 'slim', 'gorhill_uBlock', 'scrapy', 'RxJava', 'medusajs_medusa', 'pandas-dev_pandas', 'ruff', 'llama.cpp', 'uptime-kuma', 'vapor_vapor', 'hashicorp_vagrant', 'gradle', 'editor.js', 'explosion_spaCy', 'keras', 'trivy', 'mjml', 'llamafile', 'anoma', 'cobra', 'age', 'composer_composer', 'taipy', 'jax', 'magic-wormhole', 'compose-multiplatform', 'apache_flink', 'flameshot', 'remix', 'micro', 'appium', 'containerd', 'openssl', 'cherry-studio', 'jsoncrack.com', 'rancher', 'socket.io', 'hcengineering_platform', 'pipenv', 'SnapKit', 'meteor_meteor', 'generative-models', 'maybe', 'saleor', 'gson', 'servers', 'ShareX', 'iced-rs_iced', 'PostHog_posthog', 'faceswap', 'cli_cli', 'FreeTubeApp_FreeTube', 'shadPS4', 'gpt-researcher', 'DeepSpeech', 'Xray-core', 'egg', 'vercel_turborepo', 'QwenLM_Qwen3', 'zloirock_core-js', 'prophet', 'wechaty', 'heroui-inc_heroui', 'MinerU', 'diagrams', 'recommenders', 'transformers', 'act', 'allinurl_goaccess', 'swiftlang_swift', 'linux-command', 'bcc', 'mdBook', 'deepface', 'lexical', 'ava', 'framework7', 'NvChad', 'RSSHub', 'surya', 'apache_shardingsphere', 'react-three-fiber', 'SwitchHosts', 'supervision', 'raylib', 'hey', 'disruptor', 'copy_v86', 'zstd', 'open-webui_open-webui', 'Dapper', 'etcher', 'gitui', 'xyflow', 'livewire', 'fish-shell', 'Deep-Live-Cam', 'Tencent_ncnn', 'glow', 'dokku', 'vue-pure-admin', 'hackathon-starter', 'tmux', 'postcss', 'github-readme-stats', 'restic', 'pingcap_tidb', 'eleventy', 'umami-software_umami', 'vault', 'colly', 'fastlane_fastlane', 'the-swift-programming-language-in-chinese', 'unsloth', 'wagtail', 'chroma', 'jadx', 'react-router', 'ui', 'Byaidu_PDFMathTranslate', 'Chat2DB', 'dive', 'swiper', 'air', 'mlx', 'spicetify_cli', 'go', 'k9s', 'source-code-hunter', 'ha_xiaomi_home', 'lutzroeder_netron', 'onnx', 'backbone', '1Panel', 'upscayl', 'sops', 'filebrowser', 'BabylonJS_Babylon.js', 'grpc-go', 'dev-sidecar', 'tesseract.js', 'gin-vue-admin', 'Wox', 'fastify', 'wechat-app-mall', 'notepad-plus-plus_notepad-plus-plus', 'neoclide_coc.nvim', 'x64dbg', 'styled-components', 'jquery_jquery', 'filamentphp_filament', 'tabler-icons', 'memos', 'uikit_uikit', 'AdminLTE', 'zap', 'pnpm', 'pytorch-lightning', 'typesense', 'swagger-ui', 'freqtrade', 'rufus', 'SingleFile', 'surrealdb', 'inkonchain_node', 'siyuan', 'pydantic', 'traefik', 'ultralytics', 'base_node', 'react-bootstrap', 'philc_vimium', 'BerriAI_litellm', 'Sortable', 'openwrt', 'alpine', 'llm.c', 'Umi-OCR', 'glances', 'vuejs_core', 'Leaflet', 'VSCodium_vscodium', 'table', 'uWebSockets', 'oh-my-posh', 'rocksdb', 'mybatis-3', 'discord.js', 'fx', 'parse-server', 'openjdk_jdk', 'gleam', 'shadowsocks-android', 'GyulyVGC_sniffnet', 'projectdiscovery_nuclei', 'simdjson', 'actual', 'mdx', 'OCRmyPDF', 'OpenHands', 'Alamofire', 'okhttp', 'protobuf', 'setup-ipsec-vpn', 'hutool', 'encode_django-rest-framework', 'crystal', 'doomemacs', 'buger_goreplay', 'p5.js', 'flow', 'jenkinsci_jenkins', 'nlohmann_json', 'kovidgoyal_kitty', 'drawio-desktop', 'mermaid', 'Stirling-PDF', 'markitdown', 'webtorrent']

diff = set(local_projects) - set(remove_projects)
print(f"Projects to remove: {len(diff)}")
for project in diff:
    print(f"Removing project: {project}")

    if os.path.isdir(f"data/projects/{project}"):
        shutil.rmtree(f"data/projects/{project}")

Projects to remove: 78
Removing project: vapor
Removing project: rustdesk
Removing project: redisson
Removing project: goreplay
Removing project: composer
Removing project: dapr
Removing project: fabric.js
Removing project: PDFMathTranslate
Removing project: go-redis
Removing project: react-native-vector-icons
Removing project: micropython
Removing project: go-zero
Removing project: vueuse
Removing project: lucide-icons_lucide
Removing project: video.js
Removing project: postgrest
Removing project: NewPipe
Removing project: wasmer
Removing project: node
Removing project: ArchiveBox
Removing project: vagrant
Removing project: fingerprintjs
Removing project: tabby
Removing project: Clash-for-Windows_Chinese
Removing project: nginx
Removing project: FreeCAD
Removing project: autocomplete
Removing project: laravel-debugbar
Removing project: node-red
Removing project: Qwen3
Removing project: vscodium
Removing project: desktop
Removing project: numpy
Removing project: vitess
Removing project