In [1]:
import requests
import yaml
import os
from dotenv import load_dotenv

def fetch_gov_github_accounts(url):
    response = requests.get(url)
    if response.status_code == 200:
        return yaml.safe_load(response.text)
    else:
        return None

def fetch_repository_details(username, token):
    headers = {'Authorization': f'token {token}'}
    repos_url = f"https://api.github.com/users/{username}/repos"
    repos_response = requests.get(repos_url, headers=headers)
    
    if repos_response.status_code == 200:
        repos_data = repos_response.json()
        full_repo_details = []
        
        for repo in repos_data:
            repo_details = {
                'name': repo['name'],
                'description': repo['description'] or "No description",
                'stars': repo['stargazers_count'],
                'language': repo['language'] or "None specified"
            }
            # Fetch the README file
            readme_url = f"https://api.github.com/repos/{username}/{repo['name']}/readme"
            readme_response = requests.get(readme_url, headers=headers)
            if readme_response.status_code == 200:
                readme_data = readme_response.json()
                readme_content = requests.get(readme_data['download_url']).text
                repo_details['readme'] = readme_content[:100]  # Truncate for brevity
            else:
                repo_details['readme'] = "README not available"
            
            full_repo_details.append(repo_details)
        
        return full_repo_details
    else:
        return None

def save_to_markdown(repos, filename):
    with open(filename, 'w') as f:
        f.write('| Repository Name | Description | Stars | Language | README |\n')
        f.write('|-----------------|-------------|-------|----------|--------|\n')
        for repo in repos:
            f.write(f"| {repo['name']} | {repo['description']} | {repo['stars']} | {repo['language']} | {repo['readme'][:50]}... |\n")


# Load environment variables from .env file
load_dotenv('.env')

# Access environment variables
github_token = os.getenv('GITHUB_TOKEN')


# Main execution
url = "https://raw.githubusercontent.com/github/government.github.com/gh-pages/_data/governments.yml"
accounts = fetch_gov_github_accounts(url)

accounts


{'Argentina': ['argob',
  'cifasis',
  'gcba',
  'inti-cmnb',
  'municipalidad-de-vicente-lopez',
  'municipioriogrande'],
 'Australia': ['actesa',
  'actgov',
  'agnsw',
  'AtlasOfLivingAustralia',
  'ausdto',
  'australianantarcticdatacentre',
  'AustralianAntarcticDivision',
  'berowrarfb',
  'bom-radar',
  'city-of-melbourne',
  'commerce-wa-ols',
  'consumerdataright',
  'data61',
  'datagovau',
  'dbca-wa',
  'dpc-sdp',
  'dpipwe',
  'dssgovaus',
  'envris',
  'Fire-and-Rescue-NSW',
  'gccgisteam',
  'GeoscienceAustralia',
  'govau',
  'govcms',
  'gs-dawr',
  'healthgovau',
  'Healthway',
  'hiscom',
  'innovationgovau',
  'IPAustralia',
  'Landgate',
  'nla',
  'NSW-eTendering',
  'NSWPlanning',
  'pmcau',
  'PublicRecordOfficeVictoria',
  'qld-gov-au',
  'srnsw',
  'SunshineCoastCouncil',
  'treasury-aus',
  'victoriangovernment',
  'wagov',
  'wamuseum'],
 'Austria': ['datagvat'],
 'Belgium': ['belgianpolice',
  'CIRB',
  'Fedict',
  'inbo',
  'NationalBankBelgium',
  'onroer

In [3]:
import pandas as pd

all_repos = []
if accounts:
    for country, usernames in accounts.items():
        for username in usernames:
            repo_details = fetch_repository_details(username, github_token)
            if repo_details:
                all_repos.extend(repo_details)
                print(f"Data for {username} fetched and processed.")
            else:
                print(f"Failed to fetch data for {username}")

# Create DataFrame and save to Markdown
if all_repos:
    repos_df = pd.DataFrame(all_repos)
    markdown_file = "all_government_repositories.md"
    save_to_markdown(repos_df, markdown_file)
    print(f"All data saved to {markdown_file}")
    # Optionally save to CSV or another format
    repos_df.to_csv("all_government_repositories.csv", index=False)
    print("Data also saved as a CSV file.")
else:
    print("No repository data collected.")

Data for argob fetched and processed.
Data for cifasis fetched and processed.
Data for gcba fetched and processed.
Data for inti-cmnb fetched and processed.
Data for municipalidad-de-vicente-lopez fetched and processed.
Data for municipioriogrande fetched and processed.
Failed to fetch data for actesa
Data for actgov fetched and processed.
Data for agnsw fetched and processed.
Data for AtlasOfLivingAustralia fetched and processed.
Data for ausdto fetched and processed.
Data for australianantarcticdatacentre fetched and processed.
Data for AustralianAntarcticDivision fetched and processed.
Data for berowrarfb fetched and processed.
Data for bom-radar fetched and processed.
Data for city-of-melbourne fetched and processed.
Data for commerce-wa-ols fetched and processed.
Data for consumerdataright fetched and processed.
Data for data61 fetched and processed.
Data for datagovau fetched and processed.
Data for dbca-wa fetched and processed.
Data for dpc-sdp fetched and processed.
Data for d