In [1]:
from collections import defaultdict
from itertools import combinations
import json
import numpy as np
import os
import pandas as pd
from google.cloud import bigquery

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../oso_gcp_credentials.json'
client = bigquery.Client(project='opensource-observer')

## Grab historic Gitcoin funding event data for all projects
- Subscribe to the OSO Production dataset on BigQuery (see docs [here](https://docs.opensource.observer/docs/get-started/bigquery))
- Run the query below
- Add filter for OSS rounds starting GG18

In [6]:
query = """
    select
      oso_project_name,
      oso_display_name,
      round_number,
      oso_generated_round_label,
      gitcoin_project_id,
      count(distinct donor_address) as num_donors,
      sum(case when donor_address is not null then amount_in_usd else 0 end) as crowd_funding_usd,
      sum(case when donor_address is null then amount_in_usd else 0 end) as match_funding_usd
    from `oso_production.gitcoin_funding_events_by_project_v0`
    where ((round_number = 22 and gitcoin_round_id = '608') or
      (round_number = 22 and gitcoin_round_id = '609') or
      (round_number = 22 and gitcoin_round_id = '610') or
      (round_number = 22 and gitcoin_round_id = '611') or
      (round_number = 20 and gitcoin_round_id = '23') or
      (round_number = 20 and gitcoin_round_id = '25') or
      (round_number = 20 and gitcoin_round_id = '26') or
      (round_number = 20 and gitcoin_round_id = '27') or
      (round_number = 19 and lower(gitcoin_round_id) = '0xd4cc0dd193c7dc1d665ae244ce12d7fab337a008') or
      (round_number = 18 and lower(gitcoin_round_id) = '0x8de918f0163b2021839a8d84954dd7e8e151326d'))
    group by 1, 2, 3, 4, 5
    having match_funding_usd > 0
    order by 8 desc
"""
results = client.query(query)
gitcoin_df = results.to_dataframe()
gitcoin_df.tail(5)



Unnamed: 0,oso_project_name,oso_display_name,round_number,oso_generated_round_label,gitcoin_project_id,num_donors,crowd_funding_usd,match_funding_usd
856,statwig,StaTwig Supply Chain Visibility for Life Savin...,18,GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151...,0x0718e3d08633de4d7d2a6cf71e13f79507ba19f43895...,110,81.269239,6.300478
857,spacemarketplace,Space Marketplace,18,GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151...,0xd4e30855d467bf674510c3489d7e845152ed538a39c1...,109,79.64269,6.300478
858,scryprotocol,Scry Protocol,18,GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151...,0x4660be9bc678bba66a839a73c56212b592c15e97c8b6...,130,110.910948,6.000455
859,,,20,GG-20 - 25,0xc313a8cea83983edd81609810cba8f3828b2903438af...,8,60.266736,5.093193
860,icdevsorg,ICDevs.org,22,GG-22 - 610,0xab3b35780a86f5ec610e911fe359e8cbbef9278b300e...,45,61.867394,1.199074


## Apply some round and project quality filters

- Filter out less competitive rounds
- Ensure projects have been in multiple rounds

In [8]:
round_stats = gitcoin_df.groupby('oso_generated_round_label').agg({
    'oso_project_name': 'nunique',
    'gitcoin_project_id': 'nunique',
    'match_funding_usd': 'sum'
}).sort_values(by='match_funding_usd', ascending=False)

oss_heavy_rounds = round_stats[(round_stats['match_funding_usd'] >= 20_000) & (round_stats['gitcoin_project_id'] >= 10)]
oss_heavy_rounds


Unnamed: 0_level_0,oso_project_name,gitcoin_project_id,match_funding_usd
oso_generated_round_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
GG-20 - 27,38,46,386673.696867
GG-22 - 609,38,53,333145.428515
GG-20 - 26,44,75,314686.761894
GG-22 - 610,35,47,312112.764672
GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151326d,115,123,302566.342875
GG-22 - 608,82,139,301761.84701
GG-20 - 25,83,153,299575.594535
GG-19 - 0xd4cc0dd193c7dc1d665ae244ce12d7fab337a008,105,131,202796.188219
GG-20 - 23,10,52,100673.744546
GG-22 - 611,7,17,99922.467496


In [9]:
# Debugging: Check for projects with missing display names
missing_names_df = (gitcoin_df[gitcoin_df['oso_display_name'].isnull()]
    .groupby('gitcoin_project_id')
    .agg({
        'match_funding_usd': 'sum'
    })
    .sort_values('match_funding_usd', ascending=False)
)

print(f"Total projects with missing display names: {len(missing_names_df)}")
missing_names_df.head(10)

Total projects with missing display names: 247


Unnamed: 0_level_0,match_funding_usd
gitcoin_project_id,Unnamed: 1_level_1
0xa7b8948d5c8b8d2cd502cd9c8638dda011ecfbfc10abe0df58a504e4802f5d6d,29976.859375
0xa43d39ea6e0c57d3082983686ceffca5cd0e96dcef1ddf84fb124623b0bbe05a,16771.39264
0x1249835eaab22b2d2fd7f34556a20cae9cfd902f56c54789730081e6996ed012,15507.587969
0xbc0c38f75ec02f37af3c6cc5acef9951e0f69c0b49a26e9bf680b1fbcf4f76cb,14647.892565
0xf56dd3fa328f36673fd27422bf169559797919b4e8dd639b341d14127219c70a,14404.092418
0x1bb64e87b901e6e0f4d7877a99bc5a035ca4be1ec2517fd3f2e00337e636daf7,14360.838623
0x0a20445056c91be20dac69d794c92f57e0eb88ad54f2ef4106d7026bc4c6fdc1,12751.498895
0x4a960782a3b8681844dfecd31dee6fcbbb06028493240b524e8fa1dab18b2b64,11699.284623
0x04eceede743d19c498a90952e9903e9ee46f3dc77766bcc85d1eb9b3e7ce7a5e,11327.034335
0xc3dd0086d1972b4d51aaf1fdab9438d4e38ed8dccd6c19b1a34d03e5b4c76692,9992.286719


Filter for:
- OSS rounds based on size and competitiveness
- Projects with participation in at least 3 rounds of the 5 OSS rounds since GG18
- Projects indexed in OSO

In [30]:
oss_heavy_rounds_list = list(oss_heavy_rounds.index)
round_filter = gitcoin_df['oso_generated_round_label'].isin(oss_heavy_rounds_list)

project_round_count = gitcoin_df.groupby('oso_project_name')['round_number'].nunique()
projects_above_round_count_threshold = list(project_round_count[project_round_count >= 3].index)
project_filter = gitcoin_df['oso_project_name'].isin(projects_above_round_count_threshold)

oss_project_filter = gitcoin_df['oso_project_name'].isna() == False

filtered_df = gitcoin_df[round_filter & project_filter & oss_project_filter].reset_index(drop=True)
filtered_df.head()

Unnamed: 0,oso_project_name,oso_display_name,round_number,oso_generated_round_label,gitcoin_project_id,num_donors,crowd_funding_usd,match_funding_usd
0,defi-llama,DefiLlama,22,GG-22 - 609,0xbb1d90979d5a76457be64366c845853b41d0f849f51c...,8064,12537.023827,29976.859375
1,defi-llama,DefiLlama,20,GG-20 - 26,0xbb1d90979d5a76457be64366c845853b41d0f849f51c...,6539,28073.700596,29928.8375
2,defieye,DeFiEye,22,GG-22 - 610,0xe955bf7fdaaa527fdb72eeaeab78fd0be4b3acc51778...,2470,4185.604407,20631.273696
3,scopelift,ScopeLift,20,GG-20 - 26,0xd046794292aa91ab772ee8c2c37f86e822d43cd8e129...,3066,9928.825923,17278.815754
4,impersonator-eth,Impersonator,20,GG-20 - 27,0x4c8d716d67653b5aa14300221f05e19c96cde76fcf62...,198,3090.598027,16482.409388


## Model head-to-head appearances by projects in the same round

- Weight match funding more heavily than crowd funding
- Match funding accounts for collusion / Sybil, etc

In [31]:
simulation_data = []
for gitcoin_round in oss_heavy_rounds_list:
    dff = filtered_df[filtered_df['oso_generated_round_label'] == gitcoin_round]
    round_num = dff['round_number'].mean()
    if round_num != int(round_num):
        print(gitcoin_round)
    projects = list(dff['oso_project_name'].unique())
    comparisons = combinations(projects, 2)
    for (project_a, project_b) in comparisons:
        match_a = dff[dff['oso_project_name'] == project_a]['match_funding_usd'].sum()
        match_b = dff[dff['oso_project_name'] == project_b]['match_funding_usd'].sum()
        crowd_a = dff[dff['oso_project_name'] == project_a]['crowd_funding_usd'].sum()
        crowd_b = dff[dff['oso_project_name'] == project_b]['crowd_funding_usd'].sum()
        amount_total = match_a + match_b + crowd_a + crowd_b
        simulation_data.append({
            'round_number': int(round_num),
            'project_a': project_a,
            'project_b': project_b,
            'weight_a': (match_a + crowd_a * 0.5) / amount_total,
            'weight_b': (match_b + crowd_b * 0.5) / amount_total
        })
        
simulation_df = pd.DataFrame(simulation_data)
simulation_df.tail()

Unnamed: 0,round_number,project_a,project_b,weight_a,weight_b
5610,20,citizenwallet,zenguardxyz,0.878384,0.068939
5611,20,citizenwallet,brewitmoney,0.878384,0.068939
5612,20,dspytdao,zenguardxyz,0.579812,0.3941
5613,20,dspytdao,brewitmoney,0.579812,0.3941
5614,20,zenguardxyz,brewitmoney,0.486939,0.486939


In [32]:
simulation_df.head()

Unnamed: 0,round_number,project_a,project_b,weight_a,weight_b
0,20,impersonator-eth,eiptools,0.460525,0.460525
1,20,impersonator-eth,otterscan,0.481382,0.450657
2,20,impersonator-eth,synpress-synthexio,0.547542,0.39229
3,20,impersonator-eth,opensource-observer,0.618865,0.314837
4,20,impersonator-eth,elefria-labs,0.624188,0.301498


## Run the standard ELO algorithm

In [33]:
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(rating_a, rating_b, score_a, k_factor):
    expected_a = expected_score(rating_a, rating_b)
    rating_a_new = rating_a + k_factor * (score_a - expected_a)
    rating_b_new = rating_b + k_factor * ((1 - score_a) - (1 - expected_a))
    return rating_a_new, rating_b_new
    
def elo_simulation(dataframe):
    
    elo_ratings = defaultdict(lambda: 1500) # Default ELO rating for all projects
    appearances = defaultdict(int)
    
    for _,row in dataframe.iterrows():
        project_a = row['project_a']
        project_b = row['project_b']
        share_a = row['weight_a']
        score_a = 1 if share_a > 0.5 else 0 if share_a < 0.5 else 0.5

        k_a = 40 / (1 + appearances[project_a] / 5)
        k_b = 40 / (1 + appearances[project_b] / 5)
        k_factor = (k_a + k_b) / 2
        
        elo_ratings[project_a], elo_ratings[project_b] = update_elo(
            elo_ratings[project_a], elo_ratings[project_b], score_a, k_factor
        )
        appearances[project_a] += 1
        appearances[project_b] += 1
    
    margin_of_error = {project: 400 / np.sqrt(appearances[project]) for project in appearances}

    return (
        pd.DataFrame([
            {
                'project': project,
                'head-to-head_comparisons': appearances[project],        
                'elo_rating': rating,        
                'margin_of_error': margin_of_error[project]
            }
            for project, rating in elo_ratings.items()
        ])
        .sort_values(by='elo_rating', ascending=False)
        .set_index('project', drop=True)
    )

elo_df = elo_simulation(simulation_df)
elo_df.head()

Unnamed: 0_level_0,head-to-head_comparisons,elo_rating,margin_of_error
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
defi-llama,140,1776.838942,33.80617
heyxyz,200,1763.665548,28.284271
revoke-cash,200,1763.245051,28.284271
jedi-swap,167,1749.91489,30.952929
idriss-crypto,167,1743.057513,30.952929


In [34]:
df = (
    elo_df
    .join(gitcoin_df
          .groupby('oso_project_name')
          .agg({
            'round_number': 'nunique',
            'num_donors': 'max',
            'crowd_funding_usd': 'sum',
            'match_funding_usd': 'sum'
            })
        )
    .rename(columns={
        'round_number': 'num_main_round_appearances',
        'num_donors': 'highest_unique_donor_count_in_one_round',
    })
)
df.head()

Unnamed: 0_level_0,head-to-head_comparisons,elo_rating,margin_of_error,num_main_round_appearances,highest_unique_donor_count_in_one_round,crowd_funding_usd,match_funding_usd
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
defi-llama,140,1776.838942,33.80617,4,10751,78859.452136,87210.381367
heyxyz,200,1763.665548,28.284271,4,21309,129979.281943,59814.988384
revoke-cash,200,1763.245051,28.284271,4,15545,92239.365225,59814.988384
jedi-swap,167,1749.91489,30.952929,3,21132,101905.433032,44830.707134
idriss-crypto,167,1743.057513,30.952929,3,10623,39758.306933,42153.577948


In [35]:
df.to_csv('data/2025-01-30_gitcoin_oss_elo_ratings.csv')

# Fetch Code Metrics for the projects in the ELO ratings


In [36]:
# Get list of projects from df
project_list = df.index.tolist()

# Create SQL-friendly string of projects
project_string = "', '".join(project_list)

query = """
    SELECT *
    FROM `oso_production.code_metrics_by_project_v1`
    WHERE project_name IN ('""" + project_string + """')
"""

cm_results = client.query(query)
df_metrics = cm_results.to_dataframe()
print(f"Number of matching projects with code metrics: {len(df_metrics)}")
df_metrics.head()



Number of matching projects with code metrics: 71


Unnamed: 0,project_id,project_source,project_namespace,project_name,display_name,event_source,repository_count,first_created_at_date,last_updated_at_date,first_commit_date,...,active_developer_count_6_months,commit_count_6_months,opened_pull_request_count_6_months,merged_pull_request_count_6_months,opened_issue_count_6_months,closed_issue_count_6_months,comment_count_6_months,release_count_6_months,time_to_first_response_days_average_6_months,time_to_merge_days_average_6_months
0,Eg2IQ9X58yQRQAx7atRetDPqUdH6zOTTBScer1jSVrM=,OSS_DIRECTORY,oso,commons-stack,Commons Stack,GITHUB,36,2019-03-15 12:51:40+00:00,2024-12-13 17:49:17+00:00,2019-03-15 12:54:57+00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0meRIUEV48cb8Plf7X1knIWJZmcrGA3NY91bcOlUNk8=,OSS_DIRECTORY,oso,1hive,1Hive Gardens,GITHUB,169,2017-07-03 23:52:03+00:00,2025-01-06 10:57:50+00:00,2017-07-03 23:59:01+00:00,...,3.0,78.0,97.0,86.0,88.0,111.0,428.0,0.0,21.725579,0.0
2,osL3B-y8yn0ALvvF90E-K8sPXgenbqc3PCpeCC67MHE=,OSS_DIRECTORY,oso,metagov,The Metagovernance Project,GITHUB,57,2019-12-15 13:23:53+00:00,2025-01-17 09:44:58+00:00,2020-01-29 13:19:04+00:00,...,11.0,370.0,35.0,26.0,20.0,17.0,84.0,1.0,56.954514,0.0
3,V-4zX6sbpekcUa8dFB8nfRW3IpxnGtGjDIUHg8eJmnc=,OSS_DIRECTORY,oso,shapeshift,ShapeShift DAO,GITHUB,49,2019-08-05 16:50:09+00:00,2025-01-17 22:03:28+00:00,2019-08-05 17:49:36+00:00,...,6.0,801.0,834.0,799.0,439.0,442.0,4380.0,0.0,15.592773,0.0
4,_rSUGbgCLjCC0SxFrS6u6299eSa8Vde7C1ftbaFzBuo=,OSS_DIRECTORY,oso,giveth,Giveth,GITHUB,136,2016-10-04 05:02:00+00:00,2025-01-19 08:03:20+00:00,2016-10-04 20:53:50+00:00,...,20.0,1797.0,521.0,451.0,319.0,583.0,4085.0,11.0,187.526014,0.040972


In [37]:
combined_df = df.reset_index().merge(
    df_metrics,
    left_on='project',
    right_on='project_name',
    how='left'
)

# Optionally set project back as index
combined_df = combined_df.set_index('project')

# Drop the redundant project_name column if desired
combined_df = combined_df.drop('project_name', axis=1)

combined_df.head()

Unnamed: 0_level_0,head-to-head_comparisons,elo_rating,margin_of_error,num_main_round_appearances,highest_unique_donor_count_in_one_round,crowd_funding_usd,match_funding_usd,project_id,project_source,project_namespace,...,active_developer_count_6_months,commit_count_6_months,opened_pull_request_count_6_months,merged_pull_request_count_6_months,opened_issue_count_6_months,closed_issue_count_6_months,comment_count_6_months,release_count_6_months,time_to_first_response_days_average_6_months,time_to_merge_days_average_6_months
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
defi-llama,140,1776.838942,33.80617,4,10751,78859.452136,87210.381367,6lg5zQJ4GYDjvO5NvVAfPBWuxNgOeEcLAfcL01ws5Us=,OSS_DIRECTORY,oso,...,13.0,10469.0,4832.0,3921.0,148.0,143.0,10115.0,0.0,8.989402,0.0
heyxyz,200,1763.665548,28.284271,4,21309,129979.281943,59814.988384,m-ayiyYXbU1nR5aCLuliEKsYRca6b_8aNzdBbKn56pM=,OSS_DIRECTORY,oso,...,2.0,1786.0,199.0,159.0,160.0,152.0,307.0,1.0,55.857755,0.0
revoke-cash,200,1763.245051,28.284271,4,15545,92239.365225,59814.988384,zMr9ziUIls0NIAm5eLswVdNAjL-X9j4o2RcEQmrLf4c=,OSS_DIRECTORY,oso,...,1.0,169.0,37.0,34.0,9.0,8.0,70.0,17.0,160.482639,0.0
jedi-swap,167,1749.91489,30.952929,3,21132,101905.433032,44830.707134,Byy4BY4X4gNzY6uptJrFUgDV8o_25jaMAeNhiaUJT9c=,OSS_DIRECTORY,oso,...,5.0,88.0,44.0,43.0,20.0,25.0,54.0,0.0,125.282378,0.0
idriss-crypto,167,1743.057513,30.952929,3,10623,39758.306933,42153.577948,TpsYn6RRZN_9nai7dLsf294TRyQDmZYpeQEnaIqugw4=,OSS_DIRECTORY,oso,...,5.0,352.0,281.0,250.0,0.0,4.0,430.0,0.0,1.85744,0.0


In [38]:
combined_df.to_csv('data/2025-01-30_gitcoin_oss_elo_ratings_with_metrics.csv')