In [30]:
from urllib.parse import urlencode
import requests
from datetime import datetime

In [31]:
base_url = 'https://api.github.com/search/repositories?q='

#Construct query
query = {}
query['pushed'] = '2021-01-01' #repositories pushed at date
query['sort'] = 'stars-desc' #sort by number of stars in descending order
query_str = ''
for key in query:
    query_str += str(key) + ':' + query[key]
    query_str += '+'
    
#Construct params
params = {}
params['per_page'] = 5 #number of results per page
params['page'] = 2 #return result in page #

#Query URL
url = base_url + query_str + '&' + urlencode(params)

print(url)


https://api.github.com/search/repositories?q=pushed:2021-01-01+sort:stars-desc+&per_page=5&page=2


In [32]:
user = 'user' #fill in username
token = 'token' #fill in token
response = requests.get(url, auth = requests.auth.HTTPBasicAuth(user, token))
response.headers

{'Server': 'GitHub.com', 'Date': 'Wed, 18 May 2022 15:17:34 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-cache', 'Vary': 'Accept, Authorization, Cookie, X-GitHub-OTP, Accept-Encoding, Accept, X-Requested-With', 'X-OAuth-Scopes': 'admin:enterprise, admin:gpg_key, admin:org, admin:org_hook, admin:public_key, admin:repo_hook, delete:packages, delete_repo, gist, notifications, repo, user, workflow, write:discussion, write:packages', 'X-Accepted-OAuth-Scopes': '', 'github-authentication-token-expiration': '2022-06-16 20:53:42 UTC', 'X-GitHub-Media-Type': 'github.v3; format=json', 'Link': '<https://api.github.com/search/repositories?q=pushed%3A2021-01-01+sort%3Astars-desc+&per_page=5&page=1>; rel="prev", <https://api.github.com/search/repositories?q=pushed%3A2021-01-01+sort%3Astars-desc+&per_page=5&page=3>; rel="next", <https://api.github.com/search/repositories?q=pushed%3A2021-01-01+sort%3Astars-desc+&per_page=5&page=200>; rel

In [33]:
data = response.json()
data

{'total_count': 38739,
 'incomplete_results': False,
 'items': [{'id': 61160796,
   'node_id': 'MDEwOlJlcG9zaXRvcnk2MTE2MDc5Ng==',
   'name': 'eslint-plugin-fp',
   'full_name': 'jfmengels/eslint-plugin-fp',
   'private': False,
   'owner': {'login': 'jfmengels',
    'id': 3869412,
    'node_id': 'MDQ6VXNlcjM4Njk0MTI=',
    'avatar_url': 'https://avatars.githubusercontent.com/u/3869412?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/jfmengels',
    'html_url': 'https://github.com/jfmengels',
    'followers_url': 'https://api.github.com/users/jfmengels/followers',
    'following_url': 'https://api.github.com/users/jfmengels/following{/other_user}',
    'gists_url': 'https://api.github.com/users/jfmengels/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/jfmengels/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/jfmengels/subscriptions',
    'organizations_url': 'https://api.github.com/users/jfmengels/orgs',
    'repos

# Q1: Language extracting from results

In [34]:
repos = data['items']
language = {}
for repo in repos:
    if repo['language'] not in language.keys():
        language[repo['language']] = 1
    else:
        language[repo['language']] += 1
language

{'JavaScript': 1, 'Java': 1, None: 2, 'Kotlin': 1}

# Q2: Commits frequency counts

In [48]:
freq_repo = {}
for repo in repos:
    id = repo['id']
    commit_url = repo['commits_url'].split('{')[0] #extract url for commit list
    created_date = datetime.strptime(repo['created_at'], "%Y-%m-%dT%H:%M:%SZ")
    now = datetime.now()
    day_diff = (now - created_date).days #days since repo created
    commit_response = requests.get(commit_url, auth = requests.auth.HTTPBasicAuth(user, token)) #request commit list
    commit_freq = len(commit_response.json()) / day_diff # number of commits/days
    freq_repo[id] = commit_freq

print(freq_repo)

{61160796: 0.013869625520110958, 79226461: 0.015408320493066256, 28907793: 0.005208333333333333, 258547297: 0.03713527851458886, 71367962: 0.014727540500736377}
