In [21]:
# ZIA GitHub Enrichment (NO API Required): this scrip parses GitHub repo URLs from ../data/lab_github.csv, groups by owner/repo,
# then scrape each repo homepage to pull stars, watching, forks, last commit date, and README text.
#
# Works with ZIA URLs that often lack a scheme (e.g., "github.com/org/repo").
# Avoids false positives from URLs where "github.com/..." appears inside another site's query string.
#
# Outputs:
#   - repo_counts: distinct repos + proxy_hits
#   - repo_enrichment: repo_counts + scraped metadata
#   - repos_enriched: row-level subset of df that includes repo URLs + metadata

import re
import time
import pandas as pd
import requests
from urllib.parse import urlparse, unquote

# ----------------------------
# Config
# ----------------------------
CSV_PATH = "../data/lab_github.csv"
SLEEP_SECONDS = 0.6          # throttling to GitHub; reduce if tiny list, increase if big list
MAX_README_CHARS = 8000

ALLOWED_HOSTS = {
    "github.com", "www.github.com",
    "raw.githubusercontent.com",
    "codeload.github.com",
    "objects.githubusercontent.com",
}

# Show full text in DataFrame cells (can get huge)
pd.set_option("display.max_colwidth", None)

# Optional: show more rows/columns too
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)

# ----------------------------
# Helpers
# ----------------------------
def normalize_url(u: str) -> str:
    u = (u or "").strip()
    if not u:
        return ""
    if "://" in u:
        return u
    return "https://" + u

def to_int_compact(s: str):
    """Convert '243k'/'6.7k'/'50.6k'/'1,234' => int."""
    if not s:
        return None
    s = s.strip().lower().replace(",", "")
    m = re.match(r"^(\d+(?:\.\d+)?)([km])?$", s)
    if not m:
        return None
    num = float(m.group(1))
    suf = m.group(2)
    if suf == "k":
        num *= 1_000
    elif suf == "m":
        num *= 1_000_000
    return int(num)

def clean_ws(text: str):
    return re.sub(r"\s+", " ", text or "").strip()

def extract_owner_repo_strict(raw_url: str):
    """
    Extract owner/repo only if the URL's actual host is GitHub-related.
    Prevents false positives from google.com URLs that contain "github.com/..." in query parameters.
    Returns: (owner, repo, owner_repo, host) or (None,...)
    """
    u = normalize_url(raw_url)
    if not u:
        return (None, None, None, None)

    try:
        p = urlparse(u)
    except Exception:
        return (None, None, None, None)

    host = (p.netloc or "").lower()
    if host not in ALLOWED_HOSTS:
        return (None, None, None, None)

    path = unquote(p.path or "").strip("/")
    parts = [x for x in path.split("/") if x]
    if len(parts) < 2:
        return (None, None, None, host)

    owner, repo = parts[0].strip(), parts[1].strip()

    # Strip .git
    if repo.lower().endswith(".git"):
        repo = repo[:-4]

    # Trim punctuation
    repo = repo.rstrip(").,;")

    owner = owner.lower()
    repo = repo.lower()

    # Filter obvious non-repo routes for github.com
    non_repo = {"settings","pricing","topics","marketplace","collections","features","about","site","login","join","organizations","search","apps"}
    if host in {"github.com","www.github.com"} and owner in non_repo:
        return (None, None, None, host)

    # Basic repo/owner sanity: typical GitHub charset
    if not re.match(r"^[a-z0-9_.-]+$", owner) or not re.match(r"^[a-z0-9_.-]+$", repo):
        return (None, None, None, host)

    return (owner, repo, f"{owner}/{repo}", host)

def find_anchor_block(html: str, owner_repo: str, endpoint: str):
    """
    Return the full <a ...>...</a> block where href="/{owner_repo}/{endpoint}" (optionally with query params).
    """
    pattern = (
        rf'(<a\b[^>]*href="/{re.escape(owner_repo)}/{endpoint}(?:\?[^"]*)?"[^>]*>'
        rf'[\s\S]*?</a>)'
    )
    m = re.search(pattern, html, re.IGNORECASE)
    return m.group(1) if m else None

def extract_count_from_anchor(anchor_html: str):
    """
    Extract the metric count from inside an <a>...</a> block on GitHub.
    Supports layouts like:
      - <span class="Counter">243k</span>
      - <strong>6.7k</strong>
    Avoids pulling numbers from SVG icons and path data.
    """
    if not anchor_html:
        return None

    # Remove svg blocks to prevent matching numbers from svg path data
    cleaned = re.sub(r"<svg[\s\S]*?</svg>", " ", anchor_html, flags=re.IGNORECASE)

    # 1) Counter span
    m = re.search(r'class="[^"]*\bCounter\b[^"]*"[^>]*>\s*([^<]+?)\s*<', cleaned, re.IGNORECASE)
    if m:
        val = to_int_compact(m.group(1))
        if val is not None:
            return val

    # 2) <strong>243k</strong> / <strong>6.7k</strong>
    m = re.search(r"<strong[^>]*>\s*([^<\s]+)\s*</strong>", cleaned, re.IGNORECASE)
    if m:
        val = to_int_compact(m.group(1))
        if val is not None:
            return val

    # 3) aria-label fallback if present
    m = re.search(r'aria-label="([^"]+)"', cleaned, re.IGNORECASE)
    if m:
        mnum = re.search(r'(\d+(?:\.\d+)?[kKmM]?)', m.group(1))
        if mnum:
            val = to_int_compact(mnum.group(1))
            if val is not None:
                return val

    # 4) Last resort: number in visible text only (after stripping tags)
    # (still anchored to this <a> block, not the whole page)
    text_only = re.sub(r"<[^>]+>", " ", cleaned)
    text_only = clean_ws(text_only)
    m = re.search(r"\b(\d+(?:\.\d+)?[kKmM]?)\b", text_only)
    if m:
        val = to_int_compact(m.group(1))
        if val is not None:
            return val

    return None

def scrape_repo(owner_repo: str, session: requests.Session, sleep_s: float = SLEEP_SECONDS):
    """
    Scrape https://github.com/{owner_repo} for:
      - stars, watching, forks (from link anchors)
      - last_commit_date (relative-time datetime)
      - readme_text (markdown-body article)
    """
    url = f"https://github.com/{owner_repo}"
    out = {
        "owner_repo": owner_repo,
        "repo_url": url,
        "scrape_ok": False,
        "status_code": None,
        "stars": None,
        "watching": None,
        "forks": None,
        "last_commit_date": None,
        "readme_text": None,
    }

    try:
        r = session.get(url, timeout=30, allow_redirects=True)
        out["status_code"] = r.status_code
        html = r.text or ""
    except Exception:
        return out

    time.sleep(sleep_s)

    blocked_signals = ["Sign in to GitHub", "Verify", "rate limit", "Something went wrong"]
    if r.status_code != 200 or any(sig.lower() in html.lower() for sig in blocked_signals):
        return out

    out["scrape_ok"] = True

    # Counts: parse within the correct anchor only
    star_a  = find_anchor_block(html, owner_repo, "stargazers")
    watch_a = find_anchor_block(html, owner_repo, "watchers")
    fork_a  = find_anchor_block(html, owner_repo, "forks")

    out["stars"] = extract_count_from_anchor(star_a)
    out["watching"] = extract_count_from_anchor(watch_a)
    out["forks"] = extract_count_from_anchor(fork_a)

    # Commit date (best effort)
    m = re.search(r'Latest commit[\s\S]{0,9000}?<relative-time[^>]+datetime="([^"]+)"', html, re.IGNORECASE)
    if m:
        out["last_commit_date"] = m.group(1)
    else:
        m = re.search(r'<relative-time[^>]+datetime="([^"]+)"', html, re.IGNORECASE)
        if m:
            out["last_commit_date"] = m.group(1)

    # README text (best effort)
    readme_block = None
    m = re.search(
        r'id="readme"[\s\S]{0,25000}?<article[^>]*class="[^"]*markdown-body[^"]*"[^>]*>([\s\S]*?)</article>',
        html, re.IGNORECASE
    )
    if m:
        readme_block = m.group(1)
    else:
        m = re.search(
            r'<article[^>]*class="[^"]*markdown-body[^"]*"[^>]*>([\s\S]*?)</article>',
            html, re.IGNORECASE
        )
        if m:
            readme_block = m.group(1)

    if readme_block:
        readme_block = re.sub(r"<(script|style)[^>]*>[\s\S]*?</\1>", " ", readme_block, flags=re.IGNORECASE)
        text = re.sub(r"<[^>]+>", " ", readme_block)
        text = (text.replace("&amp;", "&")
                    .replace("&lt;", "<")
                    .replace("&gt;", ">")
                    .replace("&quot;", '"')
                    .replace("&#39;", "'"))
        text = clean_ws(text)
        out["readme_text"] = text[:MAX_README_CHARS] if text else None

    return out

# ----------------------------
# Main
# ----------------------------
df = pd.read_csv(CSV_PATH)
df.columns = [c.strip().lower() for c in df.columns]
df["url"] = df["url"].astype(str).fillna("").str.strip()

df[["gh_owner","gh_repo","owner_repo","gh_host"]] = df["url"].apply(lambda x: pd.Series(extract_owner_repo_strict(x)))

repos_df = df.dropna(subset=["owner_repo"]).copy()

print("Total rows:", len(df))
print("Rows with extracted owner/repo:", len(repos_df))
print("Distinct repos:", repos_df["owner_repo"].nunique())

repo_counts = (
    repos_df.groupby("owner_repo", as_index=False)
            .size()
            .rename(columns={"size":"proxy_hits"})
            .sort_values("proxy_hits", ascending=False)
)
display(repo_counts.head(30))

# Session headers
session = requests.Session()
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Connection": "keep-alive",
})

# Quick sanity test on facebook/react
test = scrape_repo("facebook/react", session=session, sleep_s=0.0)
print("Test scrape facebook/react:", test)

# Enrich all repos
distinct_repos = repo_counts["owner_repo"].astype(str).tolist()

enriched_rows = []
for idx, owner_repo in enumerate(distinct_repos, start=1):
    enriched_rows.append(scrape_repo(owner_repo, session=session, sleep_s=SLEEP_SECONDS))
    if idx % 25 == 0:
        print(f"Enriched {idx}/{len(distinct_repos)} repos...")

enriched = pd.DataFrame(enriched_rows)
repo_enrichment = repo_counts.merge(enriched, on="owner_repo", how="left")

display(repo_enrichment.sort_values(["proxy_hits","stars"], ascending=[False, False]).head(50))

# Optional: join back to row-level repo URL events
repos_enriched = repos_df.merge(repo_enrichment, on="owner_repo", how="left")
display(repos_enriched[["url","gh_host","owner_repo","proxy_hits","stars","watching","forks","last_commit_date"]].head(25))


Total rows: 8012
Rows with extracted owner/repo: 743
Distinct repos: 8


Unnamed: 0,owner_repo,proxy_hits
0,anthropics/skills,204
1,datatalksclub/data-engineering-zoomcamp,183
2,facebook/react,153
3,googlecloudplatform/terraformer,137
7,tensorflow/tensorflow,53
6,public-apis/public-apis,10
4,peass-ng/peass-ng,2
5,publi-apis/public-apis,1


Test scrape facebook/react: {'owner_repo': 'facebook/react', 'repo_url': 'https://github.com/facebook/react', 'scrape_ok': True, 'status_code': 200, 'stars': 243000, 'watching': 6700, 'forks': 50600, 'last_commit_date': '2026-01-26T18:29:43Z', 'readme_text': 'React ¬∑ React is a JavaScript library for building user interfaces. Declarative: React makes it painless to create interactive UIs. Design simple views for each state in your application, and React will efficiently update and render just the right components when your data changes. Declarative views make your code more predictable, simpler to understand, and easier to debug. Component-Based: Build encapsulated components that manage their own state, then compose them to make complex UIs. Since component logic is written in JavaScript instead of templates, you can easily pass rich data through your app and keep the state out of the DOM. Learn Once, Write Anywhere: We don\'t make assumptions about the rest of your technology stack,

Unnamed: 0,owner_repo,proxy_hits,repo_url,scrape_ok,status_code,stars,watching,forks,last_commit_date,readme_text
0,anthropics/skills,204,https://github.com/anthropics/skills,True,200,71200.0,530.0,7300.0,,"Note: This repository contains Anthropic's implementation of skills for Claude. For information about the Agent Skills standard, see agentskills.io . Skills Skills are folders of instructions, scripts, and resources that Claude loads dynamically to improve performance on specialized tasks. Skills teach Claude how to complete specific tasks in a repeatable way, whether that's creating documents with your company's brand guidelines, analyzing data using your organization's specific workflows, or automating personal tasks. For more information, check out: What are skills? Using skills in Claude How to create custom skills Equipping agents for the real world with Agent Skills About This Repository This repository contains skills that demonstrate what's possible with Claude's skills system. These skills range from creative applications (art, music, design) to technical tasks (testing web apps, MCP server generation) to enterprise workflows (communications, branding, etc.). Each skill is self-contained in its own folder with a SKILL.md file containing the instructions and metadata that Claude uses. Browse through these skills to get inspiration for your own skills or to understand different patterns and approaches. Many skills in this repo are open source (Apache 2.0). We've also included the document creation & editing skills that power Claude's document capabilities under the hood in the skills/docx , skills/pdf , skills/pptx , and skills/xlsx subfolders. These are source-available, not open source, but we wanted to share these with developers as a reference for more complex skills that are actively used in a production AI application. Disclaimer These skills are provided for demonstration and educational purposes only. While some of these capabilities may be available in Claude, the implementations and behaviors you receive from Claude may differ from what is shown in these skills. These skills are meant to illustrate patterns and possibilities. Always test skills thoroughly in your own environment before relying on them for critical tasks. Skill Sets ./skills : Skill examples for Creative & Design, Development & Technical, Enterprise & Communication, and Document Skills ./spec : The Agent Skills specification ./template : Skill template Try in Claude Code, Claude.ai, and the API Claude Code You can register this repository as a Claude Code Plugin marketplace by running the following command in Claude Code: /plugin marketplace add anthropics/skills Then, to install a specific set of skills: Select Browse and install plugins Select anthropic-agent-skills Select document-skills or example-skills Select Install now Alternatively, directly install either Plugin via: /plugin install document-skills@anthropic-agent-skills /plugin install example-skills@anthropic-agent-skills After installing the plugin, you can use the skill by just mentioning it. For instance, if you install the document-skills plugin from the marketplace, you can ask Claude Code to do something like: ""Use the PDF skill to extract the form fields from path/to/some-file.pdf "" Claude.ai These example skills are all already available to paid plans in Claude.ai. To use any skill from this repository or upload custom skills, follow the instructions in Using skills in Claude . Claude API You can use Anthropic's pre-built skills, and upload custom skills, via the Claude API. See the Skills API Quickstart for more. Creating a Basic Skill Skills are simple to create - just a folder with a SKILL.md file containing YAML frontmatter and instructions. You can use the template-skill in this repository as a starting point: --- name : my-skill-name description : A clear description of what this skill does and when to use it --- # My Skill Name [ Add your instructions here that Claude will follow when this skill is active ] ## Examples - Example usage 1 - Example usage 2 ## Guidelines - Guideline 1 - Guideline 2 The frontmatter requires only two fields: name - A unique identifier for your skill (lowercase, hyphens for spaces) description - A complete description of what the skill does and when to use it The markdown content below contains the instructions, examples, and guidelines that Claude will follow. For more details, see How to create custom skills . Partner Skills Skills are a great way to teach Claude how to get better at using specific pieces of software. As we see awesome example skills from partners, we may highlight some of them here: Notion - Notion Skills for Claude"
1,datatalksclub/data-engineering-zoomcamp,183,https://github.com/datatalksclub/data-engineering-zoomcamp,True,200,38500.0,564.0,7700.0,,"Data Engineering Zoomcamp: A Free 9-Week Course on Data Engineering Fundamentals Master the fundamentals of data engineering by building an end-to-end data pipeline from scratch. Gain hands-on experience with industry-standard tools and best practices. Join Slack ‚Ä¢ #course-data-engineering Channel ‚Ä¢ Telegram Announcements ‚Ä¢ Course Playlist ‚Ä¢ FAQ How to Enroll 2026 Cohort Start Date : 12 January 2026 Register Here : Sign up Self-Paced Learning All course materials are freely available for independent study. Follow these steps: Watch the course videos. Join the Slack community . Refer to the FAQ document for guidance. Syllabus Overview The course consists of structured modules, hands-on workshops, and a final project to reinforce your learning. Prerequisites To get the most out of this course, you should have: Basic coding experience Familiarity with SQL Experience with Python (helpful but not required) No prior data engineering experience is necessary. Modules Module 1: Containerization and Infrastructure as Code Introduction to GCP Docker and Docker Compose Running PostgreSQL with Docker Infrastructure setup with Terraform Homework Module 2: Workflow Orchestration Data Lakes and Workflow Orchestration Workflow orchestration with Kestra Homework Workshop 1: Data Ingestion API reading and pipeline scalability Data normalization and incremental loading Homework Module 3: Data Warehousing Introduction to BigQuery Partitioning, clustering, and best practices Machine learning in BigQuery Module 4: Analytics Engineering Analytics Engineering and Data Modeling dbt (data build tool) with DuckDB & BigQuery Testing, documentation, and deployment Module 5: Data Platforms Building end-to-end data pipelines with Bruin Data ingestion, transformation, and quality Deployment to cloud (BigQuery) Module 6: Batch Processing Introduction to Apache Spark DataFrames and SQL Internals of GroupBy and Joins Module 7: Streaming Introduction to Kafka Kafka Streams and KSQL Schema management with Avro Final Project Apply all concepts learned in a real-world scenario Peer review and feedback process Testimonials Thank you for what you do! The Data Engineering Zoomcamp gave me skills that helped me land my first tech job. ‚Äî Tim Claytor ( Source ) Three months might seem like a long time, but the growth and learning during this period are truly remarkable. It was a great experience with a lot of learning, connecting with like-minded people from all around the world, and having fun. I must admit, this was really hard. But the feeling of accomplishment and learning made it all worthwhile. And I would do it again! ‚Äî Nevenka Lukic ( Source ) One of the significant things I inferred from the Zoomcamp is to prioritize fundamentals and principles over ever-evolving tools and tech stacks. Hugely grateful to Alexey Grigorev for putting together this incredible course and offering it for free. ‚Äî Siddhartha Gogoi ( Source ) Such a fun deep dive into data engineering, cloud automation, and orchestration. I learned so much along the way. Big shoutout to Alexey Grigorev and the DataTalksClub team for the opportunity and guidance throughout the 3 months of the free course. ‚Äî Assitan NIARE ( Source ) If you‚Äôre serious about breaking into data engineering, start here. The repo‚Äôs structure, community, and hands-on focus make it unparalleled. ‚Äî Wady Osama ( Source ) Community & Support Getting Help on Slack Join the #course-data-engineering channel on DataTalks.Club Slack for discussions, troubleshooting, and networking. To keep discussions organized: Follow our guidelines when posting questions. Review the community guidelines . Meet the Instructors Alexey Grigorev Michael Shoemaker Will Russell Anna Geller Juan Manuel Perafan Arsalan Noorafkan Past instructors: Victoria Perez Mola Ankush Khanna Sejal Vaidya Irem Erturk Luis Oliveira Zach Wilson Sponsors & Supporters A special thanks to our course sponsors for making this initiative possible! Interested in supporting our community? Reach out to alexey@datatalks.club . About DataTalks.Club DataTalks.Club is a global online community of data enthusiasts. It's a place to discuss data, learn, share knowledge, ask and answer questions, and support each other. Website ‚Ä¢ Join Slack Community ‚Ä¢ Newsletter ‚Ä¢ Upcoming Events ‚Ä¢ YouTube ‚Ä¢ GitHub ‚Ä¢ LinkedIn ‚Ä¢ Twitter All the activity at DataTalks.Club mainly happens on Slack . We post updates there and discuss different aspects of data, career questions, and more. At DataTalksClub, we organize online events, community activities, and free courses. You can learn more about what we do at DataTalksClub Community Navigation ."
2,facebook/react,153,https://github.com/facebook/react,True,200,243000.0,6700.0,50600.0,2026-01-26T18:29:43Z,"React ¬∑ React is a JavaScript library for building user interfaces. Declarative: React makes it painless to create interactive UIs. Design simple views for each state in your application, and React will efficiently update and render just the right components when your data changes. Declarative views make your code more predictable, simpler to understand, and easier to debug. Component-Based: Build encapsulated components that manage their own state, then compose them to make complex UIs. Since component logic is written in JavaScript instead of templates, you can easily pass rich data through your app and keep the state out of the DOM. Learn Once, Write Anywhere: We don't make assumptions about the rest of your technology stack, so you can develop new features in React without rewriting existing code. React can also render on the server using Node and power mobile apps using React Native . Learn how to use React in your project . Installation React has been designed for gradual adoption from the start, and you can use as little or as much React as you need : Use Quick Start to get a taste of React. Add React to an Existing Project to use as little or as much React as you need. Create a New React App if you're looking for a powerful JavaScript toolchain. Documentation You can find the React documentation on the website . Check out the Getting Started page for a quick overview. The documentation is divided into several sections: Quick Start Tutorial Thinking in React Installation Describing the UI Adding Interactivity Managing State Advanced Guides API Reference Where to Get Support Contributing Guide You can improve it by sending pull requests to this repository . Examples We have several examples on the website . Here is the first one to get you started: import { createRoot } from 'react-dom/client' ; function HelloMessage ( { name } ) { return < div > Hello { name } </ div > ; } const root = createRoot ( document . getElementById ( 'container' ) ) ; root . render ( < HelloMessage name = ""Taylor"" /> ) ; This example will render ""Hello Taylor"" into a container on the page. You'll notice that we used an HTML-like syntax; we call it JSX . JSX is not required to use React, but it makes code more readable, and writing it feels like writing HTML. Contributing The main purpose of this repository is to continue evolving React core, making it faster and easier to use. Development of React happens in the open on GitHub, and we are grateful to the community for contributing bugfixes and improvements. Read below to learn how you can take part in improving React. Code of Conduct Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please read the full text so that you can understand what actions will and will not be tolerated. Contributing Guide Read our contributing guide to learn about our development process, how to propose bugfixes and improvements, and how to build and test your changes to React. Good First Issues To help you get your feet wet and get you familiar with our contribution process, we have a list of good first issues that contain bugs that have a relatively limited scope. This is a great place to get started. License React is MIT licensed ."
3,googlecloudplatform/terraformer,137,https://github.com/googlecloudplatform/terraformer,True,200,14500.0,158.0,1800.0,2025-03-26T23:37:41Z,"Terraformer A CLI tool that generates tf / json and tfstate files based on existing infrastructure (reverse Terraform). Disclaimer: This is not an official Google product Created by: Waze SRE Table of Contents Demo GCP Capabilities Installation Supported Providers Major Cloud Google Cloud AWS Azure AliCloud IBM Cloud Cloud DigitalOcean Equinix Metal Fastly Heroku LaunchDarkly Linode NS1 OpenStack TencentCloud Vultr Yandex Cloud Ionos Cloud Infrastructure Software Kubernetes OctopusDeploy RabbitMQ Network Cloudflare (broken, see #1761) Myrasec PAN-OS VCS Azure DevOps GitHub Gitlab Monitoring & System Management Datadog New Relic Mackerel PagerDuty Opsgenie Honeycomb.io Opal Community Keycloak Logz.io Commercetools Mikrotik Xen Orchestra GmailFilter Grafana Vault Identity Okta Auth0 AzureAD Contributing Developing Infrastructure Stargazers over time Demo GCP Capabilities Generate tf / json + tfstate files from existing infrastructure for all supported objects by resource. Remote state can be uploaded to a GCS bucket. Connect between resources with terraform_remote_state (local and bucket). Save tf / json files using a custom folder tree pattern. Import by resource name and type. Support terraform 0.13 (for terraform 0.11 use v0.7.9). Terraformer uses Terraform providers and is designed to easily support newly added resources. To upgrade resources with new fields, all you need to do is upgrade the relevant Terraform providers. Import current state to Terraform configuration from a provider Usage: import [provider] [flags] import [provider] [command] Available Commands: list List supported resources for a provider Flags: -b, --bucket string gs://terraform-state -c, --connect (default true) -–°, --compact (default false) -x, --excludes strings firewalls,networks -f, --filter strings compute_firewall=id1:id2:id4 -h, --help help for google -O, --output string output format hcl or json (default ""hcl"") -o, --path-output string (default ""generated"") -p, --path-pattern string {output}/{provider}/ (default ""{output}/{provider}/{service}/"") --projects strings -z, --regions strings europe-west1, (default [global]) -r, --resources strings firewall,networks or * for all services -s, --state string local or bucket (default ""local"") -v, --verbose verbose mode -n, --retry-number number of retries to perform if refresh fails -m, --retry-sleep-ms time in ms to sleep between retries Use "" import [provider] [command] --help"" for more information about a command. Permissions The tool requires read-only permissions to list service resources. Resources You can use --resources parameter to tell resources from what service you want to import. To import resources from all services, use --resources=""*"" . If you want to exclude certain services, you can combine the parameter with --excludes to exclude resources from services you don't want to import e.g. --resources=""*"" --excludes=""iam"" . Filtering Filters are a way to choose which resources terraformer imports. It's possible to filter resources by its identifiers or attributes. Multiple filtering values are separated by : . If an identifier contains this symbol, value should be wrapped in ' e.g. --filter=resource=id1:'project:dataset_id' . Identifier based filters will be executed before Terraformer will try to refresh remote state. Use Type when you need to filter only one of several types of resources. Multiple filters can be combined when importing different resource types. An example would be importing all AWS security groups from a specific AWS VPC: terraformer import aws -r sg,vpc --filter Type=sg;Name=vpc_id;Value=VPC_ID --filter Type=vpc;Name=id;Value=VPC_ID Notice how the Name is different for sg than it is for vpc . Migration state version For terraform >= 0.13, you can use replace-provider to migrate state from previous versions. Example usage: terraform state replace-provider -auto-approve ""registry.terraform.io/-/aws"" ""hashicorp/aws"" Resource ID Filtering is based on Terraform resource ID patterns. To find valid ID patterns for your resource, check the import part of the Terraform documentation . Example usage: terraformer import aws --resources=vpc,subnet --filter=vpc=myvpcid --regions=eu-west-1 Will only import the vpc with id myvpcid . This form of filters can help when it's necessary to select resources by its identifiers. Field name only It is possible to filter by specific field name only. It can be used e.g. when you want to retrieve resources only with a specific tag key. Example usage: terraformer import aws --resources=s3 --filter=""Name=tags.Abc"" --regions=eu-west-1 Will only import the s3 resources that have tag Abc . This form of filters can help when the field values are not important from filtering perspective. Field with dots It is possible to filter by a field that contains a dot. Example usage: terraformer import aws --resources=s3 --filter=""Name=tags.Abc.def"" --regions=eu-west-1 Will only import the s3 resources that have tag Abc.def . Planning The plan command generates a planfile that contains all the resources set to be imported. By modifying the planfile before running the import command, you can rename or filter the resources you'd like to import. The rest of subcommands and parameters are identical to the import command. $ terraformer plan google --resources=networks,firewall --projects=my-project --regions=europe-west1-d (snip) Saving planfile to generated/google/my-project/terraformer/plan.json After reviewing/customizing the planfile, begin the import by running import plan . $ terraformer import plan generated/google/my-project/terraformer/plan.json Resource structure Terraformer by default separates each resource into a file, which is put into a given service directory. The default path for resource files is {output}/{provider}/{service}/{resource}.tf and can vary for each provider. It's possible to adjust the generated structure by: Using --compact parameter to group resource files within a single service into one resources.tf file Adjusting the --path-pattern parameter and passing e.g. --path-pattern {output}/{provider}/ to generate resources for all services in one directory It's possible to combine --compact --path-pattern parameters together. Installation Both Terraformer and a Terraform provider plugin need to be installed. Terraformer From a package manager Homebrew users can use brew install terraformer . MacPorts users can use sudo port install terraformer . Chocolatey users can use choco install terraformer . From releases This installs all providers, set PROVIDER to one of google , aws or kubernetes if you only need one. Linux export PROVIDER=all curl -LO ""https://github.com/GoogleCloudPlatform/terraformer/releases/download/$(curl -s https://api.github.com/repos/GoogleCloudPlatform/terraformer/releases/latest | grep tag_name | cut -d '""' -f 4)/terraformer-${PROVIDER}-linux-amd64"" chmod +x terraformer-${PROVIDER}-linux-amd64 sudo mv terraformer-${PROVIDER}-linux-amd64 /usr/local/bin/terraformer MacOS export PROVIDER=all curl -LO ""https://github.com/GoogleCloudPlatform/terraformer/releases/download/$(curl -s https://api.github.com/repos/GoogleCloudPlatform/terraformer/releases/latest | grep tag_name | cut -d '""' -f 4)/terraformer-${PROVIDER}-darwin-amd64"" chmod +x terraformer-${PROVIDER}-darwin-amd64 sudo mv terraformer-${PROVIDER}-darwin-amd64 /usr/local/bin/terraformer Windows Install Terraform - https://www.terraform.io/downloads Download exe file for required provider from here - https://github.com/GoogleCloudPlatform/terraformer/releases Add the exe file path to path variable From source Run git clone <terraformer repo> && cd terraformer/ Run go mod download Run go build -v for all providers OR build with one provider go run build/main.go {google,aws,azure,kubernetes,etc} Terraform Providers Create a working folder and initialize the Terraform provider plugin. This folder will be where you run Terraformer commands. Run terraform init against a versions.tf file to install th"
4,tensorflow/tensorflow,53,https://github.com/tensorflow/tensorflow,True,200,194000.0,7400.0,75200.0,2025-08-13T17:45:14Z,"Documentation TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools , libraries , and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML-powered applications. TensorFlow was originally developed by researchers and engineers working within the Machine Intelligence team at Google Brain to conduct research in machine learning and neural networks. However, the framework is versatile enough to be used in other areas as well. TensorFlow provides stable Python and C++ APIs, as well as a non-guaranteed backward compatible API for other languages . Keep up-to-date with release announcements and security updates by subscribing to announce@tensorflow.org . See all the mailing lists . Install See the TensorFlow install guide for the pip package , to enable GPU support , use a Docker container , and build from source . To install the current release, which includes support for CUDA-enabled GPU cards (Ubuntu and Windows) : $ pip install tensorflow Other devices (DirectX and MacOS-metal) are supported using Device Plugins . A smaller CPU-only package is also available: $ pip install tensorflow-cpu To update TensorFlow to the latest version, add --upgrade flag to the above commands. Nightly binaries are available for testing using the tf-nightly and tf-nightly-cpu packages on PyPI. Try your first TensorFlow program $ python > >> import tensorflow as tf > >> tf . add ( 1 , 2 ). numpy () 3 > >> hello = tf . constant ( 'Hello, TensorFlow!' ) > >> hello . numpy () b'Hello, TensorFlow!' For more examples, see the TensorFlow Tutorials . Contribution guidelines If you want to contribute to TensorFlow, be sure to review the Contribution Guidelines . This project adheres to TensorFlow's Code of Conduct . By participating, you are expected to uphold this code. We use GitHub Issues for tracking requests and bugs, please see TensorFlow Forum for general questions and discussion, and please direct specific questions to Stack Overflow . The TensorFlow project strives to abide by generally accepted best practices in open-source software development. Patching guidelines Follow these steps to patch a specific version of TensorFlow, for example, to apply fixes to bugs or security vulnerabilities: Clone the TensorFlow repository and switch to the appropriate branch for your desired version‚Äîfor example, r2.8 for version 2.8. Apply the desired changes (i.e., cherry-pick them) and resolve any code conflicts. Run TensorFlow tests and ensure they pass. Build the TensorFlow pip package from source. Continuous build status You can find more community-supported platforms and configurations in the TensorFlow SIG Build Community Builds Table . Official Builds Build Type Status Artifacts Linux CPU PyPI Linux GPU PyPI Linux XLA TBA macOS PyPI Windows CPU PyPI Windows GPU PyPI Android Download Raspberry Pi 0 and 1 Py3 Raspberry Pi 2 and 3 Py3 Libtensorflow MacOS CPU Status Temporarily Unavailable Nightly Binary Official GCS Libtensorflow Linux CPU Status Temporarily Unavailable Nightly Binary Official GCS Libtensorflow Linux GPU Status Temporarily Unavailable Nightly Binary Official GCS Libtensorflow Windows CPU Status Temporarily Unavailable Nightly Binary Official GCS Libtensorflow Windows GPU Status Temporarily Unavailable Nightly Binary Official GCS Resources TensorFlow.org TensorFlow Tutorials TensorFlow Official Models TensorFlow Examples TensorFlow Codelabs TensorFlow Blog Learn ML with TensorFlow TensorFlow Twitter TensorFlow YouTube TensorFlow model optimization roadmap TensorFlow White Papers TensorBoard Visualization Toolkit TensorFlow Code Search Learn more about the TensorFlow Community and how to Contribute . Courses Coursera Udacity Edx License Apache License 2.0"
5,public-apis/public-apis,10,https://github.com/public-apis/public-apis,False,200,,,,,
6,peass-ng/peass-ng,2,https://github.com/peass-ng/peass-ng,True,200,19300.0,235.0,3300.0,2026-02-12T19:35:05Z,"PEASS-ng - Privilege Escalation Awesome Scripts SUITE new generation Basic Tutorial Here you will find privilege escalation tools for Windows and Linux/Unix* and MacOS . These tools search for possible local privilege escalation paths that you could exploit and print them to you with nice colors so you can recognize the misconfigurations easily. Check the Local Windows Privilege Escalation checklist from book.hacktricks.wiki WinPEAS - Windows local Privilege Escalation Awesome Script (C#.exe and .bat) Check the Local Linux Privilege Escalation checklist from book.hacktricks.wiki LinPEAS - Linux local Privilege Escalation Awesome Script (.sh) Quick Start Find the latest versions of all the scripts and binaries in the releases page . JSON, HTML & PDF output Check the parsers directory to transform PEASS outputs to JSON, HTML and PDF Join us! If you are a PEASS & Hacktricks enthusiast , you can get your hands now on our custom swag and show how much you like our projects! You can also, join the üí¨ Discord group or the telegram group to learn about the latest news in cybersecurity and meet other cybersecurity enthusiasts, or follow me on Twitter üê¶ @hacktricks_live . Let's improve PEASS together If you want to add something and have any cool idea related to this project, please let me know it in the telegram group https://t.me/peass or contribute reading the CONTRIBUTING.md file. Advisory All the scripts/binaries of the PEAS suite should be used for authorized penetration testing and/or educational purposes only. Any misuse of this software will not be the responsibility of the author or of any other collaborator. Use it at your own machines and/or with the owner's permission."
7,publi-apis/public-apis,1,https://github.com/publi-apis/public-apis,False,404,,,,,


Unnamed: 0,url,gh_host,owner_repo,proxy_hits,stars,watching,forks,last_commit_date
0,github.com/anthropics/skills,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
1,github.com/anthropics/skills/hovercards/citation/sidebar_partial?tree_name=main,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
2,github.com/anthropics/skills/used_by_list,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
3,github.com/anthropics/skills/refs?type=branch,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
4,github.com/anthropics/skills/tree-commit-info/main,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
5,github.com/anthropics/skills/branch-and-tag-count,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
6,github.com/anthropics/skills/latest-commit/main,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
7,github.com/anthropics/skills/archive/refs/heads/main.zip,github.com,anthropics/skills,204,71200.0,530.0,7300.0,
8,codeload.github.com/anthropics/skills/zip/refs/heads/main,codeload.github.com,anthropics/skills,204,71200.0,530.0,7300.0,
9,codeload.github.com/anthropics/skills/zip/refs/heads/main,codeload.github.com,anthropics/skills,204,71200.0,530.0,7300.0,
