## Data Extraction 

In [None]:
import requests, pandas as pd, time

repos = []
queries = ["machine learning", "data science", "deep learning", "artificial intelligence"]

for query in queries:
    for page in range(1, 6):
        url = "https://api.github.com/search/repositories"
        params = {
            "q": query,
            "sort": "stars",
            "order": "desc",
            "per_page": 100,
            "page": page
        }
        response = requests.get(url, params=params)
        data = response.json()

        for repo in data.get("items", []):
            created_at = pd.to_datetime(repo["created_at"], utc=True)
            age_days = (pd.Timestamp.now(tz="UTC") - created_at).days

            repos.append({
                "repo_name": repo["name"],
                "owner": repo["owner"]["login"],
                "stars": repo["stargazers_count"],
                "forks": repo["forks_count"],
                "watchers": repo["watchers_count"],
                "open_issues": repo["open_issues_count"],
                "language": repo["language"],
                "description_length": len(repo["description"] or ""),
                "repo_age_days": age_days,
                "forks_per_star": repo["forks_count"] / (repo["stargazers_count"] + 1),
                "is_fork": int(repo["fork"]),
                "has_wiki": int(repo["has_wiki"])
            })
        time.sleep(2)

df = pd.DataFrame(repos)
df.to_csv("github_raw_data00.csv", index=False)
print("Rows collected:", len(df))
