# 01 Process Applications

Run this notebook to get the latest application data, normalize it, and flag apps with problems.

In [1]:
from dotenv import load_dotenv
import json
import os
import pandas as pd
import requests

In [2]:
load_dotenv()

CHAIN_MAPPINGS = json.load(open('data/chains.json', 'r'))
CREATOR_CHAINS = ['Base', 'Optimism', 'Zora']

EXPORTED_DATA_DIR = "data/apps/"
RAW_APPS_JSON_PATH = EXPORTED_DATA_DIR + "applications.json"
REVIEWED_APPS_CSV_PATH = EXPORTED_DATA_DIR + "applications_reviewed.csv"

PRIMARY_KEY = 'uuid'
GROUPER_KEY = 'recipient'

## Part 1. Fetch application data

In [3]:
def fetch_application_data(limit, cursor, api_key):
    url = f'https://ezrf-impact.vercel.app/api/trpc/projects.list?input=%7B%22json%22%3A%7B%22limit%22%3A{limit}%2C%22cursor%22%3A{cursor}%7D%7D'
    headers = {
        'content-type': 'application/json',
        'round-id': 'the-sunnys',
        'x-api-key': api_key
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        print(f"Data fetched successfully! (Page {cursor})")
        payload = response.json()
        json_data = payload['result']['data']['json']
        return json_data
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")

        
def fetch_all_applications(curs=0, est_apps=2000, lim=200):        
    api_key = os.getenv('EZRF_API_KEY')
    applications = []
    while curs * lim < est_apps:
        data = fetch_application_data(lim, curs, api_key)
        if data:
            applications.extend(data)
            curs += 1
        if not data or len(data) < lim:
            break
    print(f"Total of {len(applications)} applications fetched.")
    return applications

def refresh_applications():
    applications = fetch_all_applications()
    with open(RAW_APPS_JSON_PATH, "w") as f:
        json.dump(applications, f, indent=2)
    print("Applications saved to:", RAW_APPS_JSON_PATH)
    
def load_applications():
    with open(RAW_APPS_JSON_PATH, "r") as f:
        applications = json.load(f)
    print(f"Total of {len(applications)} applications loaded.")
    return applications

In [5]:
refresh_applications()
applications = load_applications()

Data fetched successfully! (Page 0)
Data fetched successfully! (Page 1)
Data fetched successfully! (Page 2)
Data fetched successfully! (Page 3)
Data fetched successfully! (Page 4)
Data fetched successfully! (Page 5)
Data fetched successfully! (Page 6)
Data fetched successfully! (Page 7)
Data fetched successfully! (Page 8)
Total of 1706 applications fetched.
Applications saved to: data/apps/applications.json
Total of 1706 applications loaded.


## Part 2. Process and clean application data

In [6]:
def clean_address(a):
    if not isinstance(a, str):
        return None
    a = a.lower().strip()
    if a[:2] != '0x' or len(a) != 42:
        return None
    return a

def process_application_data(applications):

    normalized_data = []
    for (i,app) in enumerate(applications):

        profile = app.get('profile', {})
        if not profile:
            profile = {}
        profile_name = profile.get('name', '')
        metadata = app.get('metadata', {})
        awards = metadata.get('sunnyAwards', {})
        project_type = awards.get('projectType', '').title()
        if project_type == 'Other':
            project_type = 'Other Application'
        category = awards.get('category', '')
        if category == 'Other':
            category = 'Other Category'
        contracts = awards.get('contracts', [])    

        if len(contracts) > 1:
            print("WARNING: Array encountered at index:", i)
            break
        elif len(contracts) == 1:
            contract = contracts[0]
            address_type = 'contract'
            address = contract.get('address')
            chain_id = contract.get('chainId')
            chain = CHAIN_MAPPINGS.get(str(chain_id), 'All Superchain')
        else:
            address_type = 'mintingWallet'
            address = awards.get('mintingWalletAddress')
            chain_id = None
            chain = 'All Superchain'
        address = clean_address(address)
        if not address:
            address_type = 'N/A'
            chain = None

        app_data = {
            **awards.get('projectReferences'),
            'id': app['id'],
            'uuid': app['uuid'],        
            'attester': app['attester'],
            'recipient': app['recipient'],
            'time': app['time'],
            'name': app['name'],
            'schemaId': app['schemaId'],
            'status': app['status'],
            'round': app['round'],
            'profile_name': profile_name,
            'profile_url': f"https://warpcast.com/{profile_name}" if profile_name else '',
            'profile_image': profile.get('profileImageUrl', ''),
            'profile_banner': profile.get('bannerImageUrl', ''),
            'metadata_name': metadata.get('name', ''),
            'metadata_bio': metadata.get('bio', ''),
            'metadata_website': metadata.get('websiteUrl', ''),
            'project_type': project_type,
            'category': category,
            'category_details': awards.get('categoryDetails', ''),
            'avatar_url': awards.get('avatarUrl', ''),
            'cover_image_url': awards.get('coverImageUrl', ''),
            'address_type': address_type,
            'address': address,
            'chain_id': chain_id,
            'chain':  chain,
        }
        normalized_data.append(app_data)

    df = pd.DataFrame(normalized_data)
    return df


def review_application_dataframe(df):
        
    # Flag 1: applied as 3 distinct projects from the same Farcaster account
    project_count = df.groupby('profile_name')['name'].nunique()
    #flagged_farcaster_users = project_count[project_count > 3].index
    flagged_farcaster_users = ['kawz']
    df['flag_multiple_projects_same_profile'] = df['profile_name'].isin(flagged_farcaster_users)

    # Flag 2: applied as an NFT creator category but no valid address
    valid_categories_creator = ['Art NFTs', 'Other Media NFTs', 'Community & Curation']
    df['flag_creator_no_address'] = (
        (df['category'].isin(valid_categories_creator))
         & (df['address'].isna())
    )

    # Flag 3: applied as an app but no address : chain mapping
    other_categories_app = ['Channels', 'Frames', 'Other']
    df['flag_app_missing_contract'] = (
        (~df['category'].isin(valid_categories_creator))
        & (~df['category'].isin(other_categories_app))
        & (df['address'].isna() | df['chain_id'].isna())
    )

    # Flag 4: applied as a channel but the url does not conform to the Warpcast channel pattern
    df['flag_channel_no_channel'] = (
        (df['category'] == 'Channels')
        & (df['metadata_website'].str.contains("warpcast.com/~/channel/") == False)
    )

    # Flag 5: test project with Charmverse in the name :)
    df['flag_charmverse_in_name'] = df['name'].str.contains('charmverse', case=False, na=False)

    # Flag 6: creator project with the same address claimed by multiple profiles
    conflicting_addresses = df[df['project_type'] == 'Creator'].groupby('profile_name')['address'].nunique()
    conflicting_addresses = conflicting_addresses[conflicting_addresses>1].index
    df['flag_creator_address_conflict'] = (
        (df['address'].isin(conflicting_addresses))
        & (df['project_type'] == 'Creator')
    )
    
    df['count_flags'] = df[[
        'flag_multiple_projects_same_profile', 
        'flag_creator_no_address', 
        'flag_app_missing_contract', 
        'flag_channel_no_channel',
        'flag_charmverse_in_name',
        'flag_creator_address_conflict'
    ]].sum(axis=1)
    df['has_flag'] = (df.count_flags > 0).astype(int)
    
    print("Applications processed...\n\n", df['has_flag'].value_counts())
    
    return df

def process_and_review_apps(applications):
    
    df_apps = process_application_data(applications)
    df = review_application_dataframe(df_apps)
    df.drop(columns=[
        'attester', 'schemaId', 'round', 'profile_image', 'profile_banner',
        'metadata_bio', 'avatar_url', 'cover_image_url', 'category_details'
    ], inplace=True)
    df.set_index(PRIMARY_KEY, inplace=True)
    df.to_csv(REVIEWED_APPS_CSV_PATH)    
    return df

In [7]:
df = process_and_review_apps(applications)    

Applications processed...

 has_flag
0    998
1    708
Name: count, dtype: int64


In [8]:
df[df['flag_multiple_projects_same_profile'] == True]['profile_name'].value_counts()

profile_name
kawz    210
Name: count, dtype: int64

## Part 3. Classify valid projects by metric category

In [9]:
def classify_eligible_apps(df_apps):
    
    creator_apps = []
    project_apps = []
    warpcast_apps = []

    for primary_key, app in df_apps.iterrows():

        app_data = {
            PRIMARY_KEY: primary_key,
            GROUPER_KEY: app[GROUPER_KEY],
            'project_type': app['project_type'],
            'category': app['category']
        }

        if app['has_flag']:
            continue

        if pd.isna(app['address']):
            app_data.update({'website': app['metadata_website']})
            warpcast_apps.append(app_data)
            continue

        app_data.update({
            'address': app['address'],
            'chain': app['chain']
        })

        if app['chain'] != 'All Superchain':
            project_apps.append(app_data)
            continue

        for chain in CREATOR_CHAINS:
            temp_app_data = app_data.copy()
            temp_app_data.update({'chain': chain})
            creator_apps.append(temp_app_data)

    print(f"Classified {len(project_apps)} apps as `onchain projects`.")
    pd.DataFrame(project_apps).to_csv(EXPORTED_DATA_DIR + "project_apps.csv")
    
    print(f"Classified {len(creator_apps) // len(CREATOR_CHAINS)} apps as `creators`.")
    pd.DataFrame(creator_apps).to_csv(EXPORTED_DATA_DIR + "creator_apps.csv")
    
    print(f"Classified {len(warpcast_apps)} apps as `Warpcast projects`.")
    pd.DataFrame(warpcast_apps).to_csv(EXPORTED_DATA_DIR + "warpcast_apps.csv")

In [10]:
classify_eligible_apps(df)

Classified 412 apps as `onchain projects`.
Classified 523 apps as `creators`.
Classified 63 apps as `Warpcast projects`.


In [11]:
df.groupby('has_flag')['recipient'].nunique()

has_flag
0    550
1    429
Name: recipient, dtype: int64

## Part 4. Dump the triage pile

In [12]:
df_reject_missing = df[
    (df['flag_channel_no_channel'] == 1) 
    | (df['flag_creator_no_address'] == 1)
    | (df['flag_app_missing_contract'] == 1)
    & (df['flag_multiple_projects_same_profile'] == 0)
    & (df['status'] == 'pending')
]
print(len(df_reject_missing))
df_reject_missing.to_csv(EXPORTED_DATA_DIR + "reject_missing_info.csv")

497


In [13]:
df_reject_spam = df[
    (df['flag_multiple_projects_same_profile'] == 1)
    | (df['flag_charmverse_in_name'] == 1)
    & (df['status'] == 'pending')
]
print(len(df_reject_spam))
df_reject_spam.to_csv(EXPORTED_DATA_DIR + "reject_likely_spam.csv")

211


In [14]:
df_review_flag = df[
    (df['has_flag'] == 1)
    & (df['status'] == 'approved')
]
print(len(df_review_flag))
df_review_flag.to_csv(EXPORTED_DATA_DIR + "review_approved_with_flag.csv")

91
