In [1]:
import json
import pandas as pd
import yaml

In [2]:
#gateway = "https://ipfs.filebase.io/ipfs/bafybeicxog6mwiga37znhopbxkwfbj5du4sp6rsga6vaht6y7cxugibpv4/"
gateway = "parquet/"

projects = pd.read_parquet(gateway+"projects.parquet")
projects.set_index('project_id', inplace=True)

votes = pd.read_parquet(gateway+"round_votes.parquet")
votes = (
    votes
    .groupby(['round_id', 'project_id'])
    .agg({'amount_usd': 'sum', 'grant_address': 'min', 'grant_address': lambda x: x.unique()[0]})
    .reset_index()
)

rounds = pd.read_parquet(gateway+"rounds.parquet")
rounds.set_index('id', inplace=True)
rounds = rounds[rounds['amount_usd'] > 0 ]
rounds_data = rounds.to_dict(orient='index')
round_name_mapping = rounds['name'].to_dict()

In [3]:
matching = pd.read_csv("csv/matching-distributions.csv")
matching['RoundId'] = matching['RoundId'].str.lower()
round_ids = list(matching['RoundId'].unique())
matching = matching.groupby(['RoundId', 'ProjectId'])['MatchAmountUSD'].sum()

In [4]:
address_records = json.load(open("validated_addresses.json"))
addresses = {}
for slug, adata in address_records.items():
    if slug == 'gitcoin':
        continue
    for addr in adata.keys():
        addresses.update({addr:slug})
len(addresses)        

17023

In [5]:
github_records = (
    pd.read_csv("csv/github_orgs_to_oso_slugs.csv", index_col=0)
    .groupby('github_org')['project_slug']
    .agg(lambda x: ", ".join(set(x)))
).to_dict()
#github_records.index.value_counts()
github_records['ethereum']

'distributed-validator-specs-ethereum, research-ethereum, ethereum-cat-herders, c-kzg-4844-ethereum, pm-ethereum, kzg-ceremony-specs-ethereum, fe-ethereum, ethereum-org-website-ethereum, portal-network-specs-ethereum, trin-ethereum, protocol-guild, go-ethereum, beaconrunner-ethereum, solidity, glados-ethereum, eth-portal-ethereum, rig-ethereum, execution-specs-ethereum, hive-ethereum, py-evm-ethereum, consensus-specs-ethereum, execution-spec-tests-ethereum, portal-hive-ethereum, utp-ethereum, execution-apis-ethereum, remix-project, sourcify-ethereum, js-ethereum-cryptography-ethereum'

In [6]:
missing_rounds = []
allo = {}
for project_id, row in projects.iterrows():
    md = row.get('metadata')
    if md == 'null':
        continue
    metadata = eval(md)
    rounddata = (
        votes[votes['project_id']==project_id]
        .set_index('project_id')
        .to_dict(orient='records')
    )
    for r in rounddata:
        
        round_id = r['round_id']
        round_data = rounds_data.get(round_id,{})
        r.update({
            'round_name': round_data.get('name'),
            'chain_id': round_data.get('chain_id'),
            'program_address': round_data.get('program_address'),
            'app_end_time': round_data.get('applications_end_time')
        })
        try:
            r.update({'match_usd': matching[(r['round_id'], project_id)]})
        except:
            r.update({'match_usd': None})
            if r['round_id'] not in round_ids:
                missing_rounds.append(r['round_id'])
            else:
                pass
    project_github = metadata.get('projectGithub')
    record = {
        'project_name': metadata['title'],
        'project_github': project_github,
        'rounds': rounddata
    }
    if rounddata:
        allo.update({project_id: record})

In [7]:
with open("gitcoin-allo.json", "w") as f:
    json.dump(allo, f, indent=2)

In [8]:
csv_data = []
for pid, pdata in allo.items():
    for rdata in pdata.get('rounds'):
        r = {
            'project_id': pid,
            'project_name': pdata['project_name'].encode('utf-8', 'replace').decode(),
            'project_github': pdata['project_github'],
            **rdata
        }
        a = r['grant_address'].lower()
        r.update({'oso_address_slug': addresses.get(a)})
        csv_data.append(r)
        
df = pd.DataFrame(csv_data)
df['project_github'] = df['project_github'].str.lower()
df['oso_github_slug'] = df['project_github'].map(github_records)

df = df[df['round_name'].str.contains("test|Test") == False]

cols = [
    'project_id', 
    'project_name', 
    'project_github', 
    'oso_github_slug',
    'grant_address',
    'oso_address_slug',
    'round_id',
    'round_name', 
    'chain_id',
    'program_address',
    'app_end_time',
    'amount_usd',  
    'match_usd', 
]
df = df[cols].set_index('project_id', drop=True)
df.head()

Unnamed: 0_level_0,project_name,project_github,oso_github_slug,grant_address,oso_address_slug,round_id,round_name,chain_id,program_address,app_end_time,amount_usd,match_usd
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0xe0f08b3b36137e01398e52e0db42d175732fede1624f364ffd3109acba81bef0,Crypto Sapiens,,,0x01999e431752136372a3d485f527907a6b02a1d1,,0x2871742b184633f8dc8546c6301cbc209945033e,Web3 Community and Education,10,0xa1e8c5a8ca033ac3cb738506c6f1ad15bf54a730,1692274000.0,197.889151,598.929745
0xe0f08b3b36137e01398e52e0db42d175732fede1624f364ffd3109acba81bef0,Crypto Sapiens,,,0x624e05d9a8deff331d2685ef3d789b25f9bec80f,,0x98720dd1925d34a2453ebc1f91c9d48e7e89ec29,Web3 Community and Education,424,0xe13da583181b19dace7c021f57774659edc1f901,1701302000.0,153.544471,611.130908
0x646d41be0fbbe228c01a54aecfde54250c0ae01d3de2221ce80971e8ff542a50,NFT Price Floor,nft-pricefloor,nft-pricefloor,0x31856e11ddaabc67e3b7b5de50dd0efce86e361e,,0x98720dd1925d34a2453ebc1f91c9d48e7e89ec29,Web3 Community and Education,424,0xe13da583181b19dace7c021f57774659edc1f901,1701302000.0,307.369775,424.77368
0x8427d56c001d898a2afc097dfffbe471b26dfcc4bf8a9fac386266a5ed1ef37b,Ethereum News Podcast,ethdailyhub,,0xeb40a065854bd90126a4e697aea0976ba51b2ee7,,0x2871742b184633f8dc8546c6301cbc209945033e,Web3 Community and Education,10,0xa1e8c5a8ca033ac3cb738506c6f1ad15bf54a730,1692274000.0,248.08766,668.553916
0x8427d56c001d898a2afc097dfffbe471b26dfcc4bf8a9fac386266a5ed1ef37b,Ethereum News Podcast,ethdailyhub,,0xeb40a065854bd90126a4e697aea0976ba51b2ee7,,0x59d79b22595b17af659ce9b03907615f53742c57,"The Education, community growth & events Round",42161,0xce06ebb79f95943ca9125b85ca0294c302e846b5,1696118000.0,195.740634,1269.179638


In [9]:
df.to_csv("gitcoin-allo.csv")