<a href="https://colab.research.google.com/github/tonykipkemboi/streamlit_pycon24_tutorial/blob/main/get_github_repo_stats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Script to download GitHub repo stats into 3 CSV files


---
- Commit activity stats
- Code frequency stats
- Contributor stats


## Install and import dependecies

In [None]:
%pip install --q requests pandas

In [None]:
import requests
import pandas as pd
import time

## Create functions to fetch data

In [None]:
def fetch_data(url, token, retries=5, delay=3):
  headers = {'Authorization':, f'token {token}'}
  for _ in range(retries):
    response = requests.get(url, headers)
    if response.status_code == 200:
      return response.json()
    elif response.status_code == 202:
      # Wait for a few seconds while data is being processed
      time.sleep(delay)
    else:
      raise Exception(f"Failed to fetch data: {response.status_code}, {response.text}")
  raise Exception("Data not ready after maximum retries.")

In [None]:
def fetch_repo_stats(org, repo, token):
  base_url = f"https://api.github.com/repos/{org}/{repo}"
  stats = {
      "commit_activity": fetch_data(f"{base_url}/stats/commit_activity", token),
      "code_frequency": fetch_data(f"{base_url}/stats/code_frequency", token),
      "contributors": fetch_data(f"{base_url}/stats/contributors", token)
  }
  return stats

## Save the downloaded data to CSV files

In [None]:
def save_stats_to_csv(stats, filename):
  # Process commit activity
  commit_activity = pd.json_normalize(stats["commit_activity"])
  commit_activity.to_csv(f"{filename}_contributors.csv", index=False)

  # Process code frequency
  code_frequency = pd.DataFrame(stats["code_frequency"], columns=["week", "additions", "deletions"])
  code_frequency.to_csv(f"{filename}_code_frequency.csv", index=False)

  # Process contributor
  contributors = pd.json_normalize(stats["contributors"], "weeks", "author")
  contributors.to_csv(f"{filename}contributors.csv", index=False)

## Run the script

In [None]:
if __name__ == "__main__":
  org = "streamlit" # Replace with your org name
  repo = "streamlit" # Replace with your repo name
  token = "github_pat_**********" # Replace with your GitHub token
  stats = fetch_repo_stats(org, repo, token)
  if stats:
    print("Data fetch complete. Saving to CSVs!")
    save_stats_to_csv(stats, f"{org}_{repo}_stats")
    print("Done!")