In [1]:
from requests import get
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# User defined info
username = "ryanorsinger"
start_date = "2021-07-15"
end_date = "2021-07-30"

In [3]:
# Setup
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
base_url = "https://github.com/"
url = base_url + username

In [4]:
response = get(url)

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')
all_commits = soup.select(".ContributionCalendar-day")

## Pretty Cool Takeaway:
- If you have a tag from BeautifulSoup, you can do `tag["attribute-name"]` to access the attribute name provided
- For example: `tag["data-date"]` or, `commit["data-count"]`

In [6]:
output = []
for commit in all_commits:
    if commit.has_attr("data-date") and commit.has_attr("data-date"):
        info = {}
        info["date"] = commit["data-date"]
        info["number_of_commits"] = commit["data-count"]
        output.append(info)

df = pd.DataFrame(output)

# Ensure proper data types
df.date = pd.to_datetime(df.date)
df.number_of_commits = df.number_of_commits.astype(int)

# Peak at the dataframe
df.head()

Unnamed: 0,date,number_of_commits
0,2020-07-26,0
1,2020-07-27,3
2,2020-07-28,0
3,2020-07-29,6
4,2020-07-30,5


In [10]:
after_start = start_date <= df.date 
before_end = df.date <= end_date

mask = after_start & before_end
df[mask].head()

Unnamed: 0,date,number_of_commits
354,2021-07-15,2
355,2021-07-16,1
356,2021-07-17,0
357,2021-07-18,0
358,2021-07-19,2


In [8]:
subset = df[mask]
total_commits = subset.number_of_commits.sum()
f"{total_commits} total commits in this time frame"

'42 total commits in this time frame'

In [9]:
number_of_days_with_zero_commits = subset[subset.number_of_commits == 0].shape[0]
f"{number_of_days_with_zero_commits} days with zero commits"

'5 days with zero commits'