In [1]:
from collections import defaultdict
from itertools import combinations
import json
import numpy as np
import os
import pandas as pd
from google.cloud import bigquery

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../oso_gcp_credentials.json'
client = bigquery.Client(project='opensource-observer')

## Grab historic Gitcoin funding event data for all projects
- Subscribe to the OSO Production dataset on BigQuery (see docs [here](https://docs.opensource.observer/docs/get-started/bigquery))
- Run the query below

In [2]:
query = """
    select
      oso_project_name,
      oso_display_name,
      round_number,
      oso_generated_round_label,
      count(distinct donor_address) as num_donors,
      sum(case when donor_address is not null then amount_in_usd else 0 end) as crowd_funding_usd,
      sum(case when donor_address is null then amount_in_usd else 0 end) as match_funding_usd
    from `oso_production.gitcoin_funding_events_by_project_v0`
    where round_number >= 18
    group by 1, 2, 3, 4
    having match_funding_usd > 0
    order by 7 desc
"""
results = client.query(query)
gitcoin_df = results.to_dataframe()
gitcoin_df.tail(5)

Unnamed: 0,oso_project_name,oso_display_name,round_number,oso_generated_round_label,num_donors,crowd_funding_usd,match_funding_usd
1189,scryprotocol,Scry Protocol,18,GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151...,130,110.910948,6.000455
1190,coinpassport,Coinpassport,18,GG-18 - 0x9331fde4db7b9d9d1498c09d30149929f24c...,17,39.897208,5.098796
1191,zenbiteth,zenbit.eth,22,GG-22 - 642,32,323.949654,2.940548
1192,icdevsorg,ICDevs.org,22,GG-22 - 610,45,61.867394,1.199074
1193,ethelo,Ethelo,18,GG-18 - 0x2871742b184633f8dc8546c6301cbc209945...,30,177.376357,0.750057


## Apply some round and project quality filters

- Ensure rounds are OSS focused
- Filter out less competitive rounds
- Ensure projects have been in multiple rounds

In [3]:
round_stats = gitcoin_df.groupby('oso_generated_round_label').agg({
    'oso_project_name': 'nunique',
    'match_funding_usd': 'sum'
}).sort_values(by='match_funding_usd', ascending=False)

oss_heavy_rounds = round_stats[(round_stats['match_funding_usd'] >= 20_000) & (round_stats['oso_project_name'] >= 10)]
oss_heavy_rounds

Unnamed: 0_level_0,oso_project_name,match_funding_usd
oso_generated_round_label,Unnamed: 1_level_1,Unnamed: 2_level_1
GG-20 - 27,38,386673.696867
GG-22 - 609,38,333145.428515
GG-20 - 26,44,314686.761894
GG-22 - 610,35,312112.764672
GG-18 - 0x8de918f0163b2021839a8d84954dd7e8e151326d,115,302566.342875
GG-22 - 608,82,301761.84701
GG-20 - 25,83,299575.594535
GG-18 - 0xb6be0ecafdb66dd848b0480db40056ff94a9465d,17,250009.202386
GG-18 - 0x2871742b184633f8dc8546c6301cbc209945033e,58,249989.700907
GG-18 - 0x222ea76664ed77d18d4416d2b2e77937b76f0a35,28,206764.931207


In [4]:
oss_heavy_rounds_list = list(oss_heavy_rounds.index)
round_filter = gitcoin_df['oso_generated_round_label'].isin(oss_heavy_rounds_list)

project_round_count = gitcoin_df.groupby('oso_project_name')['round_number'].nunique()
projects_above_round_count_threshold = list(project_round_count[project_round_count > 3].index)
project_filter = gitcoin_df['oso_project_name'].isin(projects_above_round_count_threshold)

oss_project_filter = gitcoin_df['oso_project_name'].isna() == False

filtered_df = gitcoin_df[round_filter & project_filter & oss_project_filter].reset_index(drop=True)
filtered_df.head()

Unnamed: 0,oso_project_name,oso_display_name,round_number,oso_generated_round_label,num_donors,crowd_funding_usd,match_funding_usd
0,wevm,wevm,22,GG-22 - 609,943,1721.574276,33050.986304
1,defi-llama,DefiLlama,22,GG-22 - 609,8064,12537.023827,29976.859375
2,ethereum-attestation-service,Ethereum Attestation Service,22,GG-22 - 610,745,1634.645314,29976.859375
3,l2beat,L2BEAT,22,GG-22 - 610,2426,5250.7208,29976.859375
4,eth-limo,eth.limo,22,GG-22 - 610,622,1430.092934,29976.859375


## Model head-to-head appearances by projects in the same round

- Weight match funding more heavily than crowd funding
- Match funding accounts for collusion / Sybil, etc

In [5]:
simulation_data = []
for gitcoin_round in oss_heavy_rounds_list:
    dff = filtered_df[filtered_df['oso_generated_round_label'] == gitcoin_round]
    round_num = dff['round_number'].mean()
    if round_num != int(round_num):
        print(gitcoin_round)
    projects = list(dff['oso_project_name'].unique())
    comparisons = combinations(projects, 2)
    for (project_a, project_b) in comparisons:
        match_a = dff[dff['oso_project_name'] == project_a]['match_funding_usd'].sum()
        match_b = dff[dff['oso_project_name'] == project_b]['match_funding_usd'].sum()
        crowd_a = dff[dff['oso_project_name'] == project_a]['crowd_funding_usd'].sum()
        crowd_b = dff[dff['oso_project_name'] == project_b]['crowd_funding_usd'].sum()
        amount_total = match_a + match_b + crowd_a + crowd_b
        simulation_data.append({
            'round_number': int(round_num),
            'project_a': project_a,
            'project_b': project_b,
            'weight_a': (match_a + crowd_a * 0.5) / amount_total,
            'weight_b': (match_b + crowd_b * 0.5) / amount_total
        })
        
simulation_df = pd.DataFrame(simulation_data)
simulation_df.tail()

Unnamed: 0,round_number,project_a,project_b,weight_a,weight_b
5381,19,kleo-network,eppie-io,0.511243,0.312889
5382,19,kleo-network,luncosim,0.648158,0.162658
5383,19,dspytdao,eppie-io,0.490032,0.295584
5384,19,dspytdao,luncosim,0.612199,0.151419
5385,19,eppie-io,luncosim,0.596625,0.244643


## Run the standard ELO algorithm

In [6]:
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(rating_a, rating_b, score_a, k_factor):
    expected_a = expected_score(rating_a, rating_b)
    rating_a_new = rating_a + k_factor * (score_a - expected_a)
    rating_b_new = rating_b + k_factor * ((1 - score_a) - (1 - expected_a))
    return rating_a_new, rating_b_new
    
def elo_simulation(dataframe):
    
    elo_ratings = defaultdict(lambda: 1500) # Default ELO rating for all projects
    appearances = defaultdict(int)
    
    for _,row in dataframe.iterrows():
        project_a = row['project_a']
        project_b = row['project_b']
        share_a = row['weight_a']
        score_a = 1 if share_a > 0.5 else 0 if share_a < 0.5 else 0.5

        k_a = 40 / (1 + appearances[project_a] / 5)
        k_b = 40 / (1 + appearances[project_b] / 5)
        k_factor = (k_a + k_b) / 2
        
        elo_ratings[project_a], elo_ratings[project_b] = update_elo(
            elo_ratings[project_a], elo_ratings[project_b], score_a, k_factor
        )
        appearances[project_a] += 1
        appearances[project_b] += 1
    
    margin_of_error = {project: 400 / np.sqrt(appearances[project]) for project in appearances}

    return (
        pd.DataFrame([
            {
                'project': project,
                'head-to-head_comparisons': appearances[project],        
                'elo_rating': rating,        
                'margin_of_error': margin_of_error[project]
            }
            for project, rating in elo_ratings.items()
        ])
        .sort_values(by='elo_rating', ascending=False)
        .set_index('project', drop=True)
    )

elo_df = elo_simulation(simulation_df)
elo_df.head()

Unnamed: 0_level_0,head-to-head_comparisons,elo_rating,margin_of_error
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
revoke-cash,190,1771.362485,29.01905
defi-llama,143,1771.20959,33.44968
heyxyz,199,1759.166176,28.355248
scopelift,127,1723.981422,35.49426
l2beat,63,1701.297089,50.395263


In [7]:
df = (
    elo_df
    .join(gitcoin_df
          .groupby('oso_project_name')
          .agg({
            'round_number': 'nunique',
            'num_donors': 'max',
            'crowd_funding_usd': 'sum',
            'match_funding_usd': 'sum'
            })
        )
    .rename(columns={
        'round_number': 'num_main_round_appearances',
        'num_donors': 'highest_unique_donor_count_in_one_round',
    })
)
df.head()

Unnamed: 0_level_0,head-to-head_comparisons,elo_rating,margin_of_error,num_main_round_appearances,highest_unique_donor_count_in_one_round,crowd_funding_usd,match_funding_usd
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
revoke-cash,190,1771.362485,29.01905,5,15545,95462.523968,69654.668479
defi-llama,143,1771.20959,33.44968,5,10751,82395.777199,100643.195183
heyxyz,199,1759.166176,28.355248,5,21309,146242.825379,62701.547776
scopelift,127,1723.981422,35.49426,4,6580,32539.162881,50401.863174
l2beat,63,1701.297089,50.395263,4,8021,42316.973985,94956.414806


In [8]:
df.to_csv('data/2025-01-30_gitcoin_oss_elo_ratings.csv')