Skip to content
GitHub scraper and source code for a dashboard showing the Open Science MOOC's repository statistics and user activities
Branch: master
Clone or download
Latest commit 795c790 Jun 18, 2019
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
.gitignore Update .gitignore Jun 7, 2019
README.md Update README.md Jun 10, 2019
index.Rmd Update text Jun 18, 2019
index.html Update text Jun 18, 2019
mooc-logo.png Add MOOC logo Jun 7, 2019
osmooc-github.RData Update data Jun 18, 2019
style.css Add MOOC logo Jun 7, 2019

README.md

Open Science MOOC Dashboard

Data and source code for this dashboard on the Open Science MOOC's GitHub repository statistics and user activities.

How to collect GitHub data

Setup

# Install and load packages using pacman
if (!require("pacman")) install.packages("pacman")
library(pacman)

p_load(httr, jsonlite, tidyverse)

Authentication

See e.g. this article for instructions on how to set up your own GitHub app.

# Set OAuth
oauth_endpoints("github")
gh_app <- oauth_app(appname = "[INSERT HERE]",
                   key = "[INSERT HERE]",
                   secret = "[INSERT HERE]")

# Get credentials and config
github_token <- oauth2.0_token(oauth_endpoints("github"), gh_app)
gtoken <- httr::config(token = github_token)

Custom functions to retrieve data from GitHub

# Function to submit API request, parse JSON content, and convert to data frame 
get_data <- function(url) {
  
  res <- httr::GET(url, query = list(state = "all", per_page = 100, page = 1), gtoken)
  stop_for_status(res)
  res_df <- jsonlite::fromJSON(content(res, type = 'text', encoding = "UTF-8"))
  
  return(res_df)
}

# Function to submit multiple API requests, parse JSON content, and convert to data frame 
get_data_multiple <- function(urls) {
  
  res <- lapply(urls, get_data)
  res_df <- map_df(res, ~as.data.frame(.x), .id = "df_id")
  
  return(res_df)
}

Collect GitHub data on the Open Science MOOC

# Retrieve Open Science MOOC repos (modules 1-10 only)
repos_df <- get_data("https://api.github.com/orgs/OpenScienceMOOC/repos")
repos_df_mod <- repos_df %>% 
  filter(stringr::str_detect(name, "Module-"))

# Retrieve contributors for each repo
contributors_df <- get_data_multiple(repos_df_mod$contributors_url)

# Retrieve stargazers for each repo
stargazers_df <- get_data_multiple(repos_df_mod$stargazers_url)

# Retrieve subscribers for each repo
subscribers_df <- get_data_multiple(repos_df_mod$subscribers_url)

# Export data
save(repos_df_mod, contributors_df, stargazers_df, subscribers_df, file = "osmooc-github.RData")
You can’t perform that action at this time.