__cleanup_git_repos__
_First version:  January 1, 2026_

This script cleans up unused git repos from a subscription

In [8]:
import os
import requests
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta, timezone

# Constants

GITHUB_API = "https://api.github.com"

#TOKEN = os.getenv("GITHUB_TOKEN")
TOKEN = "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"
if not TOKEN:
    raise SystemExit("ERROR: Please set GITHUB_TOKEN environment variable.")

HEADERS = {
    "Authorization": f"Bearer {TOKEN}",
    "Accept": "application/vnd.github+json"
}

print("Environment ready.")


Environment ready.


In [None]:
# Get the authenticated user's username

def get_authenticated_user():
    r = requests.get(f"{GITHUB_API}/user", headers=HEADERS)
    r.raise_for_status()
    return r.json()["login"]

username = get_authenticated_user()
username


'mjbeattie'

In [15]:
# List repositories where the user is a collaborator

def list_collaborator_repos():
    repos = []
    page = 1

    while True:
        r = requests.get(
            f"{GITHUB_API}/user/repos",
            headers=HEADERS,
            params={"affiliation": "collaborator", "per_page": 100, "page": page}
        )
        r.raise_for_status()
        data = r.json()
        if not data:
            break

        repos.extend(data)
        page += 1

    return repos

collab_repos = list_collaborator_repos()
len(collab_repos)


18

In [5]:
# Create a DataFrame of collaborator repos and save to CSV

df = pd.DataFrame([{
    "full_name": r["full_name"],
    "html_url": r["html_url"],
    "pushed_at": r["pushed_at"],
    "updated_at": r["updated_at"],
    "owner": r["owner"]["login"]
} for r in collab_repos])

df.to_csv("collaborator_repos.csv", index=False)
df.head()


Unnamed: 0,full_name,html_url,pushed_at,updated_at,owner
0,Abhinov7402/Katragadda_OUDSA5900,https://github.com/Abhinov7402/Katragadda_OUDS...,2025-12-27T20:12:30Z,2025-12-27T20:12:34Z,Abhinov7402
1,adityakasturi8/Analysis-of-Disrupted-Transport...,https://github.com/adityakasturi8/Analysis-of-...,2022-07-30T21:20:29Z,2025-02-10T20:44:12Z,adityakasturi8
2,AjayDasAkunuri/Akunuri_OUDSA5900,https://github.com/AjayDasAkunuri/Akunuri_OUDS...,2025-11-08T01:37:12Z,2025-10-10T00:06:39Z,AjayDasAkunuri
3,Ajordan10/ProfessionalPractice23,https://github.com/Ajordan10/ProfessionalPract...,2023-07-17T23:06:19Z,2023-06-06T21:14:52Z,Ajordan10
4,ak3737/ASGHAR_OUDSA5900,https://github.com/ak3737/ASGHAR_OUDSA5900,2024-04-27T04:59:09Z,2024-04-27T04:59:12Z,ak3737


In [10]:
# Set a threshold to unsubscribe from repos
months = 6
cutoff = datetime.now(timezone.utc) - timedelta(days=30 * months)

df["pushed_at_dt"] = pd.to_datetime(df["pushed_at"], utc=True)

filtered_df = df[df["pushed_at_dt"] < cutoff]
filtered_df.head(), len(filtered_df)


(                                           full_name  \
 1  adityakasturi8/Analysis-of-Disrupted-Transport...   
 3                   Ajordan10/ProfessionalPractice23   
 4                            ak3737/ASGHAR_OUDSA5900   
 5                       aksharaArcot/Arcot_OUDSA5900   
 6                      amakhlouf1/Makhlouf_OUDSA5900   
 
                                             html_url             pushed_at  \
 1  https://github.com/adityakasturi8/Analysis-of-...  2022-07-30T21:20:29Z   
 3  https://github.com/Ajordan10/ProfessionalPract...  2023-07-17T23:06:19Z   
 4         https://github.com/ak3737/ASGHAR_OUDSA5900  2024-04-27T04:59:09Z   
 5    https://github.com/aksharaArcot/Arcot_OUDSA5900  2021-03-13T03:13:27Z   
 6   https://github.com/amakhlouf1/Makhlouf_OUDSA5900  2023-08-21T17:16:14Z   
 
              updated_at           owner              pushed_at_dt  
 1  2025-02-10T20:44:12Z  adityakasturi8 2022-07-30 21:20:29+00:00  
 3  2023-06-06T21:14:52Z       Ajordan10 2

In [11]:
# Unsubscribe from all watched repositories meeting filter criteria

def unsubscribe_from_all():
    print("Unsubscribing from watched repositories...")
    page = 1
    total_unsubscribed = 0

    while True:
        r = requests.get(
            f"{GITHUB_API}/user/subscriptions",
            headers=HEADERS,
            params={"per_page": 100, "page": page}
        )
        r.raise_for_status()
        subs = r.json()
        if not subs:
            break

        for repo in tqdm(subs, desc=f"Page {page}"):
            full = repo["full_name"]
            del_r = requests.delete(
                f"{GITHUB_API}/repos/{full}/subscription",
                headers=HEADERS
            )
            if del_r.status_code in (204, 202):
                total_unsubscribed += 1

        page += 1

    return total_unsubscribed

unsubscribed_count = unsubscribe_from_all()
unsubscribed_count


Unsubscribing from watched repositories...


Page 1: 100%|██████████| 17/17 [00:06<00:00,  2.80it/s]


17

In [12]:
# Remove self as collaborator from filtered repositories

def remove_self_as_collaborator(df, username, dry_run=True):
    removed = []
    failed = []

    for full in tqdm(df["full_name"], desc="Removing collaborator access"):
        if dry_run:
            removed.append(full)
            continue

        r = requests.delete(
            f"{GITHUB_API}/repos/{full}/collaborators/{username}",
            headers=HEADERS
        )

        if r.status_code in (204, 202):
            removed.append(full)
        else:
            failed.append((full, r.status_code, r.text))

    return removed, failed

removed, failed = remove_self_as_collaborator(filtered_df, username, dry_run=True)
len(removed), len(failed)


Removing collaborator access: 100%|██████████| 192/192 [00:00<?, ?it/s]


(192, 0)

In [13]:
# Uncomment to actually remove self as collaborator

removed, failed = remove_self_as_collaborator(filtered_df, username, dry_run=False)
removed, failed


Removing collaborator access: 100%|██████████| 192/192 [01:44<00:00,  1.84it/s]


(['adityakasturi8/Analysis-of-Disrupted-Transportation-Network-in-a-Multi-Hazard-Scenario',
  'Ajordan10/ProfessionalPractice23',
  'ak3737/ASGHAR_OUDSA5900',
  'aksharaArcot/Arcot_OUDSA5900',
  'amakhlouf1/Makhlouf_OUDSA5900',
  'anandamoyb/bhattacharya_OUDSA5900',
  'anozman/Nozka_OUDSA5900',
  'asvasan1974/Srinivasan_OUDSA5900',
  'AustinMould/Mould_OUDSA5900',
  'ayohamzat96/hamzat_OUDSA5900',
  'AzmaynInkishaf/INKISHAF_OUDSA5900',
  'b-khoshroo/Khoshroo_OUDSA5900',
  'balanagarajunarra-1/narra_OUDSA5900',
  'barbosaMatheus/RulPrediction',
  'bharawdwajrahul/Bharadwaj_OUDSA5900',
  'Bhavana-parupalli/PARUPALLI_OUDSA5900',
  'Bhavesh-Kilaru/NLP-Using-Federated-Learning',
  'bhavyareddykanuganti/kanuganti_OUDSA5900',
  'BigyaB/Bhattarai_OUDSA5900',
  'biniwollo/Fall24-DSA-5900',
  'bins0000/Binsaleh_OUDSA5900',
  'Biswas-N/recommendation-systems-for-yelp',
  'blevick7/Levicki_OUDSA5900',
  'bmkeafer2112/Keafer_OUDSA5900',
  'bwSimpkins/Simpkins_OUDSA5900',
  'caxton-muchono/Muchono_O

In [14]:
# Summary Report

report = {
    "total_collaborator_repos": len(df),
    "repos_filtered_for_removal": len(filtered_df),
    "unsubscribed_repos": unsubscribed_count,
    "would_remove_collaborator_access": len(removed),
    "failed_removals": len(failed)
}

report


{'total_collaborator_repos': 210,
 'repos_filtered_for_removal': 192,
 'unsubscribed_repos': 17,
 'would_remove_collaborator_access': 192,
 'failed_removals': 0}