In [1]:
import json
import pandas as pd
import yaml

In [2]:
#gateway = "https://ipfs.filebase.io/ipfs/bafybeicxog6mwiga37znhopbxkwfbj5du4sp6rsga6vaht6y7cxugibpv4/"
gateway = "parquet/"

projects = pd.read_parquet(gateway+"projects.parquet")
projects.set_index('project_id', inplace=True)

votes = pd.read_parquet(gateway+"round_votes.parquet")
votes = (
    votes
    .groupby(['round_id', 'project_id'])
    .agg({'amount_usd': 'sum', 'grant_address': 'min', 'grant_address': lambda x: x.unique()[0]})
    .reset_index()
)

rounds = pd.read_parquet(gateway+"rounds.parquet")
rounds.set_index('id', inplace=True)
rounds = rounds[rounds['amount_usd'] > 0 ]
rounds_data = rounds.to_dict(orient='index')
round_name_mapping = rounds['name'].to_dict()

In [3]:
matching = pd.read_csv("csv/matching-distributions.csv")
matching['RoundId'] = matching['RoundId'].str.lower()
round_ids = list(matching['RoundId'].unique())
matching = matching.groupby(['RoundId', 'ProjectId'])['MatchAmountUSD'].sum()

In [4]:
address_records = json.load(open("validated_addresses.json"))
addresses = {}
for slug, adata in address_records.items():
    if slug == 'gitcoin':
        continue
    for addr in adata.keys():
        addresses.update({addr:slug})
len(addresses)        

17023

In [5]:
github_records = (
    pd.read_csv("csv/github_orgs_to_oso_slugs.csv", index_col=0)
    .groupby('github_org')['project_slug']
    .agg(lambda x: ", ".join(set(x)))
).to_dict()
#github_records.index.value_counts()
github_records['ethereum']

'c-kzg-4844-ethereum, portal-network-specs-ethereum, ethereum-cat-herders, beaconrunner-ethereum, execution-spec-tests-ethereum, rig-ethereum, solidity, eth-portal-ethereum, pm-ethereum, glados-ethereum, trin-ethereum, portal-hive-ethereum, research-ethereum, consensus-specs-ethereum, ethereum-org-website-ethereum, kzg-ceremony-specs-ethereum, js-ethereum-cryptography-ethereum, go-ethereum, fe-ethereum, protocol-guild, py-evm-ethereum, sourcify-ethereum, execution-apis-ethereum, utp-ethereum, distributed-validator-specs-ethereum, hive-ethereum, remix-project, execution-specs-ethereum'

In [6]:
missing_rounds = []
allo = {}
for project_id, row in projects.iterrows():
    md = row.get('metadata')
    if md == 'null':
        continue
    metadata = eval(md)
    rounddata = (
        votes[votes['project_id']==project_id]
        .set_index('project_id')
        .to_dict(orient='records')
    )
    for r in rounddata:
        
        round_id = r['round_id']
        round_data = rounds_data.get(round_id,{})
        r.update({
            'round_name': round_data.get('name'),
            'chain_id': round_data.get('chain_id'),
            'program_address': round_data.get('program_address'),
            'app_end_time': round_data.get('applications_end_time')
        })
        try:
            r.update({'match_usd': matching[(r['round_id'], project_id)]})
        except:
            r.update({'match_usd': None})
            if r['round_id'] not in round_ids:
                missing_rounds.append(r['round_id'])
            else:
                pass
    project_github = metadata.get('projectGithub')
    record = {
        'project_name': metadata['title'],
        'project_github': project_github,
        'rounds': rounddata
    }
    if rounddata:
        allo.update({project_id: record})

In [7]:
with open("gitcoin-allo.json", "w") as f:
    json.dump(allo, f, indent=2)

In [8]:
csv_data = []
for pid, pdata in allo.items():
    for rdata in pdata.get('rounds'):
        r = {
            'project_id': pid,
            'project_name': pdata['project_name'].encode('utf-8', 'replace').decode(),
            'project_github': pdata['project_github'],
            **rdata
        }
        a = r['grant_address'].lower()
        r.update({'oso_address_slug': addresses.get(a)})
        csv_data.append(r)
        
df = pd.DataFrame(csv_data)
df['project_github'] = df['project_github'].str.lower()
df['oso_github_slug'] = df['project_github'].map(github_records)
df.head()

cols = [
    'project_id', 
    'project_name', 
    'project_github', 
    'oso_github_slug',
    'grant_address',
    'oso_address_slug',
    'round_id',
    'round_name', 
    'chain_id',
    'program_address',
    'app_end_time',
    'amount_usd',  
    'match_usd', 
]
df = df[cols]
df.head()

Unnamed: 0,project_id,project_name,project_github,oso_github_slug,grant_address,oso_address_slug,round_id,round_name,chain_id,program_address,app_end_time,amount_usd,match_usd
0,0xffb3caf800fddd73dc8f3550b01d404c7e331238e841...,EthWarsaw,,,0x5f390415db0f7d4d336095f3fd266d6b3b616e7a,,0xab2cc13126df8f75bd51215bfb6577d2cefac05f,Testing without Lit,421613,0xe06f640b3d224d0cbd98f3f10de11f269fcab061,1710979000.0,1.618423,
1,0x6e8a6bf67a82a570286b02e16a5042899cc7effcf5ca...,coconut,,,0x5f390415db0f7d4d336095f3fd266d6b3b616e7a,,0xab2cc13126df8f75bd51215bfb6577d2cefac05f,Testing without Lit,421613,0xe06f640b3d224d0cbd98f3f10de11f269fcab061,1710979000.0,0.0,
2,0xfda3e1d25066ed5c4376ba53b5887a10d84ff9557872...,PrankStack,,,0x79427367e9be16353336d230de3031d489b1b3c3,,0x4473725beb9a9d503547d2fe677f4b5aa39b68f6,PGN Internal Test Round: Meme Contest,424,0xff95561425a0040cb4b53c89b63357b22395cde9,1690502000.0,4.317825,0.72125
3,0xa8491e678e4613cac34314d9cff03654a2561898d9a2...,hummingbirds unite,,,0xabf28f8d9adfb2255f4a059e37d3bce9104969db,,0x44f67acb49986768d92e9dd2844ed83acd663bbe,Espresso test,424,0x248fd88bc562fa472f0bda0f77f5063547c2f1ab,1691194000.0,1.833823,
4,0xa8491e678e4613cac34314d9cff03654a2561898d9a2...,hummingbirds unite,,,0xabf28f8d9adfb2255f4a059e37d3bce9104969db,,0xefa3336cf157bfc308aca6fa66cdb93004813b8c,[Optimism] MRC Testing Round #1,10,0xcffe7a9a9765f2910124e5d48979b5a777114605,1693613000.0,0.629848,


In [9]:
df.to_csv("gitcoin-allo.csv")