# 03 Fetch onchain data about projects

Grabs lots of data from differnet sources. Requires credentials and several GB storage. Not for the faint of heart.

In [1]:
from dotenv import load_dotenv
from google.cloud import bigquery
from dune_client.types import QueryParameter
from dune_client.client import DuneClient
from dune_client.query import QueryBase
import os
import pandas as pd
import re

In [2]:
load_dotenv()

PROJECT = 'opensource-observer'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../../oso_gcp_credentials.json"
client = bigquery.Client()

DUNE_API_KEY = os.getenv('DUNE_API_KEY')
dune = DuneClient(DUNE_API_KEY)

In [3]:
projects = pd.read_csv('data/apps/project_apps_labeled.csv', index_col=0).set_index('uuid')
projects.tail(1)

Unnamed: 0_level_0,recipient,project_type,category,address,chain,contract_type
uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
49ee3492-1af1-41e2-8ef2-f5f388470d46,0x9b7816f3eb2d35A24edD9BB4F33f05E9a2332494,App,DEX,0x9b7816f3eb2d35a24edd9bb4f33f05e9a2332494,Base,dapp_contract


In [4]:
projects['contract_type'].value_counts()

contract_type
dapp_contract       157
trace_contract       40
unknown              27
token_contract       23
factory_contract     16
mint                  8
swanchain             4
kroma                 4
cyber                 2
orderly               2
polynomial            2
lisk                  1
redstone              1
Name: count, dtype: int64

In [5]:
event_data = []

## Part 1. Get token data from Dune

In [6]:
this_contract_type = 'token_contract'

In [7]:
token_addresses = list(projects[projects['contract_type'] == this_contract_type]['address'].unique())
token_addresses_str = ',\n\t\t'.join(token_addresses)
query_sql = f"""
    with
      transfers as (
        select
          concat(blockchain, ' ', cast(contract_address as varchar)) as contract_address,
          to_char(block_date, 'yyyy-mm-dd') as block_date,
          tx_from,
          tx_to
        from
          tokens.transfers as transfers
        where
          contract_address in (
                {token_addresses_str}
          )
          and blockchain in ('base', 'optimism', 'zora')
          and block_date between date('2024-02-01') and date('2024-09-01')
      ),
      union_events as (
        select
          contract_address,
          block_date,
          tx_from as user_address,
          0.5 as amount
        from
          transfers
        union all
        select
          contract_address,
          block_date,
          tx_to as user_address,
          0.5 as amount
        from
          transfers
      ),
      pre_agg_events as (
        select
          contract_address,
          block_date,
          user_address,
          sum(amount) as count_transactions
        from
          union_events
        group by
          contract_address,
          block_date,
          user_address
      )
    select
      contract_address,
      user_address,
      array_agg(
        (block_date, count_transactions)
        order by
          block_date
      ) as transaction_details
    from
      pre_agg_events
    group by
      contract_address,
      user_address
    """

In [8]:
# query_id = dune.create_query(name='sunny_tokens', query_sql=query_sql, is_private=False)
# query = QueryBase(name='sunny_tokens', query_id=query_id.base.query_id)
# results_df = dune.run_query_dataframe(query)
# results_df.to_parquet('data/raw_metric_data/dune_raw_token_transfers.parquet')
results_df = pd.read_parquet('data/raw_metric_data/dune_raw_token_transfers.parquet')

def parse_string(row):
    matches = re.findall(r'\[(\d{4}-\d{2}-\d{2}) (\d+\.?\d*)\]', row)
    return [(date, float(value)) for date, value in matches]

results_df['transactions'] = results_df['transaction_details'].apply(parse_string)
results_df.tail(1)

Unnamed: 0,contract_address,user_address,transaction_details,transactions
596152,base 0x940181a94a35a4569e4529a3cdfb74e38fd98631,0x20313461ea24b6aae8a2d4cf39751d5e6458152c,[[2024-06-12 0.5]],"[(2024-06-12, 0.5)]"


In [9]:
for uuid, project_data in projects.iterrows():
    if project_data['contract_type'] != this_contract_type:
        continue
    address = project_data['address']
    chain = project_data['chain']
    lbl = f"{chain.lower()} {address}"
    
    for user_address, transactions in (
        results_df[results_df['contract_address'] == lbl]
        .set_index('user_address')
        ['transactions']
        .items()
    ):
        for (date,amount) in transactions:
            event_data.append({
                'uuid': uuid,
                'chain': chain,
                'contract_address': address,
                'contract_type': this_contract_type,
                'user_address': user_address,
                'date': date,
                'count_transactions': amount
            })

## Part 2. Get OSO dapp transactions

In [10]:
this_contract_type = 'dapp_contract'

In [11]:
dapp_addresses = list(projects[projects['contract_type'] == this_contract_type]['address'].unique())
dapp_addresses_str = "'" + "','".join(dapp_addresses) + "'"

In [12]:
query = f"""
  select
      date,
      to_address as contract_address,
      from_address as user_address,
      chain,
      transactions
    from `{PROJECT}.static_data_sources.sunny_transactions`
    where to_address in ({dapp_addresses_str})
"""
# result = client.query(query)
# txns_df = result.to_dataframe()
# txns_df.to_parquet('data/raw_metric_data/oso_raw_dapp_txns.parquet')
txns_df = pd.read_parquet('data/raw_metric_data/oso_raw_dapp_txns.parquet')

In [13]:
for uuid, project_data in projects.iterrows():
    if project_data['contract_type'] != this_contract_type:
        continue
    address = project_data['address']
    chain = project_data['chain']
    
    for ((user_address, date), amount) in (
        txns_df[(txns_df['contract_address'] == address) & (txns_df['chain'] == chain)]
        .set_index(['user_address', 'date'])
        ['transactions']
        .items()
    ):
        event_data.append({
            'uuid': uuid,
            'chain': chain,
            'contract_address': address,
            'contract_type': this_contract_type,
            'user_address': user_address,
            'date': date,
            'count_transactions': amount
        })

## Part 3: Get OSO trace events

In [14]:
this_contract_type = 'trace_contract'

In [15]:
trace_addresses = list(projects[projects['contract_type'] == this_contract_type]['address'].unique())
trace_addresses_str = "'" + "','".join(trace_addresses) + "'"

In [16]:
query = f"""
    with traces as (
      select
        format_date('%Y-%m-%d', CAST(block_timestamp AS DATE)) AS date,
        transaction_hash,
        chain,
        to_address as contract_address,
        from_address as user_address
      from `{PROJECT}.static_data_sources.sunny_traces`
        where to_address in ({trace_addresses_str})

    union all

      select
        format_date('%Y-%m-%d', CAST(block_timestamp AS DATE)) AS date,
        transaction_hash,
        chain,
        from_address as contract_address,
        to_address as user_address
      from `{PROJECT}.static_data_sources.sunny_traces`
        where from_address in ({trace_addresses_str})
    )

    select 
      date,
      chain,
      contract_address,
      user_address,
      approx_count_distinct(transaction_hash) as transactions
    from traces
    group by 
      date,
      chain,
      contract_address,
      user_address
"""
# result = client.query(query)
# traces_df = result.to_dataframe()
# traces_df.to_parquet('data/raw_metric_data/oso_raw_trace_events.parquet')
traces_df = pd.read_parquet('data/raw_metric_data/oso_raw_trace_events.parquet')

In [17]:
for uuid, project_data in projects.iterrows():
    if project_data['contract_type'] != this_contract_type:
        continue
    address = project_data['address']
    chain = project_data['chain']
    
    for ((user_address, date), amount) in (
        traces_df[(traces_df['contract_address'] == address) & (traces_df['chain'] == chain)]
        .groupby(['user_address', 'date'])
        ['transactions']
        .sum()
        .items()
    ):
        event_data.append({
            'uuid': uuid,
            'chain': chain,
            'contract_address': address,
            'contract_type': this_contract_type,
            'user_address': user_address,
            'date': date,
            'count_transactions': amount
        })

## Part 4. Consolidate and join on Farcaster data

In [18]:
FARCASTER_DUMP = 'data/raw_metric_data/farcaster.parquet'

fc = pd.read_parquet(FARCASTER_DUMP)
fids = fc['fid'].to_dict()
fusers = fc['username'].to_dict()

In [19]:
df = pd.DataFrame(event_data)
df['farcaster_id'] = df['user_address'].map(fids)
df['farcaster_username'] = df['user_address'].map(fusers)
df['recipient'] = df['uuid'].map(projects['recipient'].to_dict())

df.tail(1)

Unnamed: 0,uuid,chain,contract_address,contract_type,user_address,date,count_transactions,farcaster_id,farcaster_username,recipient
5521952,1c0ae0df-9238-497d-8fa0-ad05f381cf34,Base,0x52b7fdb72db7b1279919ec1cf69d3f5cb51d1243,trace_contract,0xd120c31eb8a5e43144361e3266d701b38ea4ed63,2024-08-28,6.0,,,0xf3B06b503652a5E075D423F97056DFde0C4b066F


In [20]:
df.to_parquet("data/raw_metric_data/project_events.parquet")