In [1]:
from datetime import datetime, timedelta
from google.cloud import bigquery
import os
import pandas as pd

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../gcp_credentials.json'
client = bigquery.Client()

# Get active projects / repos

In [3]:
query = """
SELECT
    r.*
FROM `opensource-observer.oso.repos_by_project` r
JOIN `opensource-observer.oso.projects_by_collection` pc 
    ON r.project_id = pc.project_id
WHERE
    collection_slug = 'octant-03'
    AND last_commit_date >= '2023-10-01'
"""
result = client.query(query)
repos = result.to_dataframe()
repos.tail(3)

Unnamed: 0,project_id,project_slug,project_name,repository_source,artifact_id,repo_is_fork,repo_fork_count,repo_star_count,first_commit_date,last_commit_date,repo_name_with_owner
367,iilyWmaoJCXv0vPfyPQnIGYXoWyTZDlP0oQLMNIVj54=,metagame-metafam,MetaGame,GITHUB,dMDefE9C6JLEDLpTwA9Ppsf8cvgYXvLRHqWCloKEC-U=,True,1,0,2024-03-28 00:00:00+00:00,2024-03-28 00:00:00+00:00,metafam/profile-frame
368,iilyWmaoJCXv0vPfyPQnIGYXoWyTZDlP0oQLMNIVj54=,metagame-metafam,MetaGame,GITHUB,8YzrsNvfD4s9NpSgEMYr64deqY9n6jREE5_SJ3Mos1U=,True,0,0,2024-01-23 00:00:00+00:00,2024-01-23 00:00:00+00:00,metafam/op-chievemints
369,Erx9J64anc8oSeN-wDKm0sojJf8ONrFVYbQ7GFnqSyc=,opensource-observer,Open Source Observer,GITHUB,tPgwwzg8t0YIYr5h3bv2TV5WKNVnKvBw19F-qEkxz-4=,True,0,0,2024-04-17 00:00:00+00:00,2024-04-19 00:00:00+00:00,opensource-observer/tap-airbyte-wrapper


In [4]:
repos['repo_name_with_owner'].nunique()

343

In [5]:
dups = repos.groupby('repo_name_with_owner')['project_slug'].nunique()
dups[dups>1]

repo_name_with_owner
ethereum/eips       2
ethereum/web3.py    2
Name: project_slug, dtype: int64

In [6]:
project_ids = list(repos['project_id'].unique())
len(project_ids)

26

In [7]:
# no activity: 0x-boring-security and dao-drops-dorgtech
# not oss: ethdaily, refidao
sorted(list(repos['project_slug'].unique()))

['1hive',
 'drips-network',
 'etheralpha',
 'ethereum-attestation-service',
 'ethereum-cat-herders',
 'ethstaker',
 'fundingthecommons',
 'gitcoin',
 'givepraise',
 'giveth',
 'glo-foundation',
 'growthepie',
 'hypercerts',
 'l2beat',
 'metagame-metafam',
 'metagov',
 'nicenode',
 'opensource-observer',
 'pairwise-general-magic',
 'protocol-guild',
 'revoke-cash',
 'rotki',
 'shutter-network',
 'tor-project',
 'web3',
 'web3py-ethereum']

# Overall developer / contributor metrics

In [8]:
query = """
SELECT
    d.*
FROM `opensource-observer.oso.int_devs` d
JOIN `opensource-observer.oso.projects_by_collection` pc 
    ON d.project_id = pc.project_id
WHERE
    collection_slug = 'octant-03'
    AND date_last_contribution >= '2023-10-01'
    AND count_events > 1
"""
result = client.query(query)
devs = result.to_dataframe()
devs.tail(3)

Unnamed: 0,from_id,repository_source,project_id,date_first_contribution,date_last_contribution,count_events
2124,eeKlGvhYoE3HepHrWb3u2ncUqnRfvx7JAF1h0oQJIH4=,GITHUB,_rSUGbgCLjCC0SxFrS6u6299eSa8Vde7C1ftbaFzBuo=,2022-06-13 00:00:00+00:00,2023-10-26 00:00:00+00:00,5.0
2125,0RdvBAabiLVkYv822oxvZ1AmoXu_SEfFbYt61i67Gtc=,GITHUB,_rSUGbgCLjCC0SxFrS6u6299eSa8Vde7C1ftbaFzBuo=,2023-12-13 00:00:00+00:00,2023-12-14 00:00:00+00:00,7.0
2126,g-d8ktMh7fjuFiHiJxb-b8WrfVovswGIuL77e-Mqtks=,GITHUB,-lP_Uvqt9rP8pWZQDW4VejaqpQDfLuc7mG5D59wRfbs=,2023-05-15 00:00:00+00:00,2024-04-09 00:00:00+00:00,708.0


In [9]:
regular_devs = devs[devs['count_events'] >= 10]
print(regular_devs['from_id'].nunique())

new_devs = devs[devs['date_first_contribution'] >= '2023-10-01']
print(new_devs['from_id'].nunique())

651
881


# Fetch code metrics by project and create a master dataframe

In [10]:
query = """

SELECT
    c.project_name,

    c.commits_6_months,
    c.issues_closed_6_months,
    c.issues_opened_6_months,
    c.pull_requests_opened_6_months,
    c.pull_requests_merged_6_months,
    
    c.contributors,
    c.new_contributors_6_months,
    c.avg_fulltime_devs_6_months,
    c.avg_active_devs_6_months
    
FROM `opensource-observer.oso.code_metrics_by_project` c
JOIN `opensource-observer.oso.projects_by_collection` pc 
    ON c.project_id = pc.project_id
WHERE collection_slug = 'octant-03'

"""
result = client.query(query)
metrics = result.to_dataframe()
metrics.tail(3)

Unnamed: 0,project_name,commits_6_months,issues_closed_6_months,issues_opened_6_months,pull_requests_opened_6_months,pull_requests_merged_6_months,contributors,new_contributors_6_months,avg_fulltime_devs_6_months,avg_active_devs_6_months
26,Hypercerts,362.0,197.0,132.0,192.0,175.0,25,9,0.333333,2.666667
27,NiceNode,255.0,18.0,35.0,161.0,121.0,23,11,0.833333,1.333333
28,Drips,382.0,137.0,148.0,173.0,160.0,38,13,1.0,3.833333


In [79]:
df = metrics.drop_duplicates().set_index('project_name')  

col_names = [
    'Commit Code - All Repos, Last 6 Months',
    'Issue Closed - All Repos, Last 6 Months',
    'Issue Created - All Repos, Last 6 Months',
    'Pull Request Created - All Repos, Last 6 Months',
    'Pull Request Merged - All Repos, Last 6 Months',
    
    'Total Contributors - All Time',
    'New Contributors - Last 6 Months', 
    'Full-time Developers - Avg Last 6 Months',
    'Total Developers - Avg Last 6 Months'
]
df.columns = col_names
df.loc['Protocol Guild'] = df.loc['Protocol Guild'] - df.loc['Ethereum Cat Herders'] - df.loc['web3.py']
df = df[df['Commit Code - All Repos, Last 6 Months'].isna() == False]

df = (
    repos
    .groupby('project_name')
    .agg({
        'repo_name_with_owner': 'nunique',
        'repo_fork_count': 'max',
        'repo_star_count': 'max'
    })
    .rename(
        columns={
            'repo_name_with_owner': 'Active Repos - Last 6 Months',
            'repo_fork_count': 'Fork Count - Max of Active Repos',
            'repo_star_count': 'Star Count - Max of Active Repos'
        }
    ).
    join(df)
)

df.index.name = 'Project'
df.rename(index={
    'The Metagovernance Project': 'MetaGov',
    'Ether Alpha': 'StateOfEth (by Ether Alpha)',
    'Shutter Network': 'Shielded Voting (by Shutter Network)'
}, inplace=True)

df['name'] = df.index.str.lower()
df.sort_values(by='name', inplace=True)
df.drop(columns='name', inplace=True)

df.fillna(0, inplace=True)

for col in col_names:
    if col in ['Full-time Developers - Avg Last 6 Months', 'Total Developers - Avg Last 6 Months']:
        df[col] = df[col].apply(lambda x: round(x,1))
    else:
        df[col] = df[col].astype(int)

df

Unnamed: 0_level_0,Active Repos - Last 6 Months,Fork Count - Max of Active Repos,Star Count - Max of Active Repos,"Commit Code - All Repos, Last 6 Months","Issue Closed - All Repos, Last 6 Months","Issue Created - All Repos, Last 6 Months","Pull Request Created - All Repos, Last 6 Months","Pull Request Merged - All Repos, Last 6 Months",Total Contributors - All Time,New Contributors - Last 6 Months,Full-time Developers - Avg Last 6 Months,Total Developers - Avg Last 6 Months
Project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1Hive Gardens,9,37,28,139,42,65,88,67,326,2,0.0,2.2
Drips,9,17,58,382,137,148,173,160,38,13,1.0,3.8
Ethereum Attestation Service,8,55,213,275,20,24,75,42,82,43,0.0,2.0
Ethereum Cat Herders,1,5040,12523,270,31,59,393,252,1515,129,1.0,2.8
EthStaker,6,187,426,430,46,54,277,276,228,40,1.0,4.7
fundingthecommons,2,0,0,3,0,0,21,0,9,3,0.0,0.2
Gitcoin,47,422,886,2877,925,1007,1102,977,1807,67,5.0,17.0
Giveth,22,31,30,2708,710,563,612,566,314,9,1.7,9.7
Glo Dollar,2,1,3,62,0,2,6,4,8,0,0.0,1.0
growthepie,4,5,13,1025,1,0,56,54,8,2,1.2,4.0


In [80]:
df.sum()

Active Repos - Last 6 Months                         345.0
Fork Count - Max of Active Repos                   33823.0
Star Count - Max of Active Repos                   92390.0
Commit Code - All Repos, Last 6 Months             41985.0
Issue Closed - All Repos, Last 6 Months             4646.0
Issue Created - All Repos, Last 6 Months            4856.0
Pull Request Created - All Repos, Last 6 Months    11534.0
Pull Request Merged - All Repos, Last 6 Months      9745.0
Total Contributors - All Time                      24374.0
New Contributors - Last 6 Months                    1680.0
Full-time Developers - Avg Last 6 Months              33.5
Total Developers - Avg Last 6 Months                 163.0
dtype: float64

# Export markdown tables

In [83]:
tabs = {
    'summary': [0,1,2],
    'activity': [3,4,5,6,7],
    'contribs': [8,9,10,11]
}
new_projects = [
    '1Hive Gardens', 'StateOfEth (by Ether Alpha)', 'Ethereum Attestation Service',
    'growthepie', 'MetaGov', 'web3.js', 'web3.py'
]
returning_projects = [
    p for p in df.index
    if p not in new_projects and p != 'fundingthecommons'
]
groups = {'new': new_projects, 'returning': returning_projects}

In [84]:
for group_name, list_of_projects in groups.items():
    for tab_name, list_of_colnums in tabs.items():
        dff = df.loc[list_of_projects].iloc[:,list_of_colnums]
        dff.to_markdown(f'data/octant-03-{group_name}-{tab_name}.md')