# 02 Label apps based on the type of contract

This notebook filters the apps based on what type of contract they have:

- token_contract: ERC20 token contracts. Metrics are derived from token transfer event data (Dune).

- dapp_contract: Regular smart contracts. Metrics are derived from raw transaction data (Goldsky). 

- factory_contract: Contracts used to generate other contracts. Metrics pendings.

- trace_contract: Something else, but that has events in traces.

In [1]:
from google.cloud import bigquery
import os
import pandas as pd

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../../oso_gcp_credentials.json"
client = bigquery.Client()
PROJECT = 'opensource-observer'
CURRENT_CHAINS = ['Optimism', 'Base', 'Zora', 'Mode', 'Frax', 'Metal']

In [3]:
all_apps = pd.read_csv('data/apps/applications_reviewed.csv')

all_apps = all_apps[all_apps['project_type'] != 'Creator']
all_apps['chain'].fillna('', inplace=True)
all_apps['address'].fillna('', inplace=True)
all_apps = all_apps[~all_apps['chain'].isin(['All Superchain', ''])]
all_apps = all_apps[all_apps['address'].apply(len)==42]
all_apps.set_index('uuid', inplace=True)

all_apps.tail(1)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_apps['chain'].fillna('', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_apps['address'].fillna('', inplace=True)


Unnamed: 0_level_0,charmverseId,agoraProjectRefUID,id,recipient,time,name,status,profile_name,profile_url,metadata_name,...,chain_id,chain,flag_multiple_projects_same_profile,flag_creator_no_address,flag_app_missing_contract,flag_channel_no_channel,flag_charmverse_in_name,flag_creator_address_conflict,count_flags,has_flag
uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
49ee3492-1af1-41e2-8ef2-f5f388470d46,49ee3492-1af1-41e2-8ef2-f5f388470d46,0xaab7d148103b030cac721846d658f1fa5f61d4f6d5c8...,0x4b3ddc41c068a349620d8d6ffd78775ee4ca727a88da...,0x9b7816f3eb2d35A24edD9BB4F33f05E9a2332494,1724766497,Yunan90,pending,yunan90,https://warpcast.com/yunan90,Yunan90,...,8453.0,Base,False,False,False,False,False,False,0,0


In [4]:
INVALID_CONTRACTS = {
    "0x4e1dcf7ad4e460cfd30791ccc4f9c8a4f820ec67": "safe",
    "0x4200000000000000000000000000000000000020": "eas",
    "0x4200000000000000000000000000000000000021": "eas",
}

CONTRACT_LABELS = {
    uuid: {
        'address': row['address'],
        'chain': row['chain'],
        'label': 'unknown' if row['address'] not in INVALID_CONTRACTS else 'invalid_contract'
    }
    for uuid, row in all_apps.iterrows()
}

## Part 1. Token contracts

In [5]:
token_list = pd.read_csv('data/raw_metric_data/dune_token_list.csv', low_memory=False)

tokens = {
    'Base': [],
    'Optimism': [],    
    'Zora': []
}

for _,row in token_list.iterrows():
    chain = row['blockchain'].title()
    addresses = row['token_addresses']
    addresses = addresses[1:-1]
    for a in addresses.split(' '):
        if len(a) == 42:
            tokens[chain].append(a.lower())

In [6]:
for uuid,app in CONTRACT_LABELS.items():
    
    if app['label'] != 'unknown':
        continue
    
    chain = app['chain']
    address = app['address']
    
    if chain not in CURRENT_CHAINS:
        CONTRACT_LABELS[uuid]['label'] = f"pending_{chain.lower()}"
        print(CONTRACT_LABELS[uuid])
        continue
        
    if tokens.get(chain):
        if address in tokens[chain]:
            CONTRACT_LABELS[uuid]['label'] = 'token_contract'
            print(CONTRACT_LABELS[uuid])
            continue

{'address': '0x940181a94a35a4569e4529a3cdfb74e38fd98631', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0xbeebbaee8f085f506ce0ea3591f8fbb9c24af356', 'chain': 'Mint', 'label': 'pending_mint'}
{'address': '0x83084cb182162473d6feffcd3aa48ba55a7b66f7', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0x944766f715b51967e56afde5f0aa76ceacc9e7f9', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0xa5f8daa537afeca842cdd39758744fd3155a26cb', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0x814fe70e85025bec87d4ad3f3b713bdcaac0579b', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0xb472838c1fb18aa671a63bdd42b0c00e34ce622a', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0x17931cfc3217261ce0fa21bb066633c463ed8634', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0xfcb65dc6758e2cfd8f1dcdfad185bd4648b0e9c5', 'chain': 'Base', 'label': 'token_contract'}
{'address': '0x43f6de3d9fb0d5eed93d7e7e14a8a526b98f8a58', 'chain': 'Optimism', 'labe

## Part 2. Determine best way to handle other contracts

In [7]:
def oso_lookup_query(uuid_contract_chain_tuples):
    
    list_of_contracts = [x[1] for x in uuid_contract_chain_tuples if isinstance(x[1], str)]
    list_of_contracts_str = "'" + "','".join(list_of_contracts) + "'"
    
    query = f"""
        with contracts as (
            select 
                to_address as address,
                upper(chain) as chain,
                txns as num_events,
                'transactions' as data_source
            from `{PROJECT}.static_data_sources.sunny_contract_lookup`
            union all
            select 
                address,
                upper(chain) as chain,
                count_transactions as num_events,
                'traces' as data_source
            from `{PROJECT}.static_data_sources.sunny_trace_lookup`
            union all
            select
                factory_address as address,
                upper(chain) as chain,
                count_contracts as num_events,
                'factories' as data_source
            from `{PROJECT}.static_data_sources.sunny_factory_lookup`
        )
        select
            address,
            chain,
            sum(case when data_source = 'transactions' then num_events else 0 end) as count_transactions,
            sum(case when data_source = 'traces' then num_events else 0 end) as count_traces,
            sum(case when data_source = 'factories' then num_events else 0 end) as count_factory_deploys
        from contracts
        where address in ({list_of_contracts_str})
        group by address, chain
    """
    
    result = client.query(query)
    dataframe = result.to_dataframe()

    labeled_contracts = []
    for (uuid, address, chain) in uuid_contract_chain_tuples:
        
        item = {'uuid': uuid, 'address': address, 'chain': chain}
    
        if not isinstance(chain, str) or not isinstance(address, str):
            continue

        temp_ = dataframe[(dataframe['address']==address) & (dataframe['chain']==chain.upper())]
        
        if not len(temp_):
            labeled_contracts.append({**item, 'label': 'unknown'})
            continue

        c = temp_.set_index('address').iloc[0]

        # look for factories
        if c.count_factory_deploys > c.count_traces:
            labeled_contracts.append({**item, 'label': 'factory_contract'})
            continue

        # low activity        
        if c.count_transactions < 10 and c.count_traces < 10:
            if c.count_factory_deploys:
                labeled_contracts.append({**item, 'label': 'factory_contract'})
            else:            
                if c.count_factory_deploys:
                    labeled_contracts.append({**item, 'label': 'inactive_contract'})
            continue

        # only traces cases
        if not c.count_transactions:
            labeled_contracts.append({**item, 'label': 'trace_contract'})
            continue

        # only transactions cases
        if not c.count_traces:
            labeled_contracts.append({**item, 'label': 'dapp_contract'})
            continue

        # decided based on ratio of transactions to traces
        if c.count_traces / c.count_transactions >= 1.5:
            labeled_contracts.append({**item, 'label': 'trace_contract'})
        else:
            labeled_contracts.append({**item, 'label': 'dapp_contract'})  
            
    return labeled_contracts

In [8]:
lookup_results = oso_lookup_query([
    (uuid, app['address'], app['chain'])
    for uuid, app in CONTRACT_LABELS.items()
    if app['label'] == 'unknown'
])

In [9]:
for res in lookup_results:
    uuid = res['uuid']
    label = res['label']
    CONTRACT_LABELS[uuid]['label'] = label

## Part 3. Check what's left and dump it

In [10]:
df_labels = pd.DataFrame(CONTRACT_LABELS).T
df_labels.rename(columns={'label': 'contract_type'}, inplace=True)
df_labels.tail(1)

Unnamed: 0,address,chain,contract_type
49ee3492-1af1-41e2-8ef2-f5f388470d46,0x9b7816f3eb2d35a24edd9bb4f33f05e9a2332494,Base,trace_contract


In [11]:
cols = ['recipient', 'project_type', 'category']
all_apps[cols].join(df_labels).to_csv('data/apps/project_apps_labeled.csv')