In [44]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## GraphQL

In [45]:
import time
import json
import tqdm
import requests
import pandas as pd

In [46]:
DUNE_GRAPHQL = "https://core-hsr.dune.com/v1/graphql"
DUNE_AUTH = "https://dune.com/api/auth/session"

HEADERS = {
    "content-type": "application/json",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
    "x-hasura-api-key": "",
}

### Find dashboards

In [47]:
def find_dashboards(query: str, limit: int = 20):
    data = {
        "operationName": "ListBrowseDashboards",
        "variables": {
            "include_favs_last_24h": False,
            "include_favs_last_7d": False,
            "include_favs_last_30d": False,
            "include_favs_all_time": True,
            "filter_name": {
                "_ilike": f"%{query}%"
            },
            "filter_tags": {},
            "filter_custom": [],
            "order": [
                {
                    "trending_scores": {
                        "score_24h": "desc_nulls_last"
                    }
                },
                {
                    "dashboard_favorite_count_all": {
                        "favorite_count": "desc_nulls_last"
                    }
                }
            ],
            "limit": limit,
            "offset": 0
        },
        "query": "query ListBrowseDashboards($filter_name: String_comparison_exp, $filter_tags: jsonb_comparison_exp, $filter_custom: [dashboards_bool_exp!], $order: [dashboards_order_by!], $limit: Int!, $offset: Int!, $include_favs_last_24h: Boolean! = false, $include_favs_last_7d: Boolean! = false, $include_favs_last_30d: Boolean! = false, $include_favs_all_time: Boolean! = false) {\n  dashboards(\n    where: {is_archived: {_eq: false}, name: $filter_name, tags: $filter_tags, _and: $filter_custom}\n    limit: $limit\n    offset: $offset\n    order_by: $order\n  ) {\n    ...DashboardItem\n    __typename\n  }\n  dashboards_aggregate(\n    where: {is_archived: {_eq: false}, name: $filter_name, tags: $filter_tags, _and: $filter_custom}\n  ) {\n    aggregate {\n      count\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment DashboardItem on dashboards {\n  id\n  name\n  slug\n  created_at\n  tags\n  user {\n    name\n    profile_image_url\n    __typename\n  }\n  team {\n    handle\n    profile_image_url\n    __typename\n  }\n  is_private\n  dashboard_favorite_count_all @include(if: $include_favs_all_time) {\n    favorite_count\n    __typename\n  }\n  dashboard_favorite_count_last_24h @include(if: $include_favs_last_24h) {\n    favorite_count\n    __typename\n  }\n  dashboard_favorite_count_last_7d @include(if: $include_favs_last_7d) {\n    favorite_count\n    __typename\n  }\n  dashboard_favorite_count_last_30d @include(if: $include_favs_last_30d) {\n    favorite_count\n    __typename\n  }\n  trending_scores {\n    score_1h\n    score_4h\n    score_24h\n    updated_at\n    __typename\n  }\n  __typename\n}\n"
    }

    r = requests.post(DUNE_GRAPHQL, json=data, headers=HEADERS)
    json = r.json()

    dashboards = []
    for dashboard in json["data"]["dashboards"]:
        if dashboard["user"] is None:
            continue

        user, slug = dashboard["user"]["name"], dashboard["slug"]

        favorite_count = dashboard["dashboard_favorite_count_all"]
        dashboards.append({
            "url": f"https://dune.com/{user}/{slug}",
            "favorite_count": favorite_count["favorite_count"] if favorite_count else 0
        })

    dashboards = pd.DataFrame(dashboards)
    return dashboards

### Parse dashboard

In [48]:
def parse_dashboard(url: str):
    *_, user, slug = url.rsplit("/", 2)

    data = {
        "operationName": "FindDashboard",
        "variables": {
            "session_filter": {},
            "user": user,
            "slug": slug,
        },
        "query": "query FindDashboard($session_filter: Int_comparison_exp!, $user: String!, $slug: String!) {\n  dashboards(\n    where: {slug: {_eq: $slug}, _or: [{user: {name: {_eq: $user}}}, {team: {handle: {_eq: $user}}}]}\n  ) {\n    ...Dashboard\n    favorite_dashboards(where: {user_id: $session_filter}, limit: 1) {\n      created_at\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment Dashboard on dashboards {\n  id\n  name\n  slug\n  is_private\n  is_archived\n  created_at\n  updated_at\n  tags\n  user {\n    ...User\n    __typename\n  }\n  team {\n    ...Team\n    __typename\n  }\n  forked_dashboard {\n    slug\n    name\n    user {\n      name\n      __typename\n    }\n    team {\n      handle\n      __typename\n    }\n    __typename\n  }\n  text_widgets {\n    id\n    created_at\n    updated_at\n    text\n    options\n    __typename\n  }\n  visualization_widgets {\n    id\n    created_at\n    updated_at\n    options\n    visualization {\n      ...Visualization\n      __typename\n    }\n    __typename\n  }\n  param_widgets {\n    id\n    key\n    visualization_widget_id\n    query_id\n    dashboard_id\n    options\n    created_at\n    updated_at\n    __typename\n  }\n  dashboard_favorite_count_all {\n    favorite_count\n    __typename\n  }\n  trending_scores {\n    score_1h\n    score_4h\n    score_24h\n    updated_at\n    __typename\n  }\n  __typename\n}\n\nfragment User on users {\n  id\n  name\n  profile_image_url\n  __typename\n}\n\nfragment Team on teams {\n  id\n  name\n  handle\n  profile_image_url\n  __typename\n}\n\nfragment Visualization on visualizations {\n  id\n  type\n  name\n  options\n  created_at\n  query_details {\n    query_id\n    name\n    description\n    show_watermark\n    parameters\n    user {\n      id\n      name\n      profile_image_url\n      __typename\n    }\n    team {\n      id\n      name\n      handle\n      profile_image_url\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n"
    }

    r = requests.post(DUNE_GRAPHQL, json=data, headers=HEADERS)
    json = r.json()

    assert "data" in json, "token expired"

    assert len(json["data"]["dashboards"]) == 1
    dashboard = json["data"]["dashboards"][0]

    ids = []
    for widget in dashboard["visualization_widgets"]:
        query_id = widget["visualization"]["query_details"]["query_id"]
        ids.append(query_id)

    return ids

### Parse query

In [49]:
def parse_query(query_id: int):
    data = {
        "operationName": "FindQuery",
        "variables": {
            "favs_last_24h": False,
            "favs_last_7d": False,
            "favs_last_30d": False,
            "favs_all_time": True,
            "session_filter": {},
            "id": query_id
        },
        "query": "query FindQuery($session_filter: Int_comparison_exp!, $id: Int!, $favs_last_24h: Boolean! = false, $favs_last_7d: Boolean! = false, $favs_last_30d: Boolean! = false, $favs_all_time: Boolean! = true) {\n  queries(where: {id: {_eq: $id}}) {\n    ...Query\n    favorite_queries(where: {user_id: $session_filter}, limit: 1) {\n      created_at\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment Query on queries {\n  ...BaseQuery\n  ...QueryVisualizations\n  ...QueryForked\n  ...QueryUsers\n  ...QueryTeams\n  ...QueryFavorites\n  __typename\n}\n\nfragment BaseQuery on queries {\n  id\n  dataset_id\n  name\n  description\n  query\n  is_private\n  is_temp\n  is_archived\n  created_at\n  updated_at\n  schedule\n  tags\n  parameters\n  __typename\n}\n\nfragment QueryVisualizations on queries {\n  visualizations {\n    id\n    type\n    name\n    options\n    created_at\n    __typename\n  }\n  __typename\n}\n\nfragment QueryForked on queries {\n  forked_query {\n    id\n    name\n    user {\n      name\n      __typename\n    }\n    team {\n      handle\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment QueryUsers on queries {\n  user {\n    ...User\n    __typename\n  }\n  team {\n    id\n    name\n    handle\n    profile_image_url\n    __typename\n  }\n  __typename\n}\n\nfragment User on users {\n  id\n  name\n  profile_image_url\n  __typename\n}\n\nfragment QueryTeams on queries {\n  team {\n    ...Team\n    __typename\n  }\n  __typename\n}\n\nfragment Team on teams {\n  id\n  name\n  handle\n  profile_image_url\n  __typename\n}\n\nfragment QueryFavorites on queries {\n  query_favorite_count_all @include(if: $favs_all_time) {\n    favorite_count\n    __typename\n  }\n  query_favorite_count_last_24h @include(if: $favs_last_24h) {\n    favorite_count\n    __typename\n  }\n  query_favorite_count_last_7d @include(if: $favs_last_7d) {\n    favorite_count\n    __typename\n  }\n  query_favorite_count_last_30d @include(if: $favs_last_30d) {\n    favorite_count\n    __typename\n  }\n  __typename\n}\n"
    }

    r = requests.post(DUNE_GRAPHQL, json=data, headers=HEADERS)
    json_data = r.json()

    assert len(json_data["data"]["queries"]) == 1
    query = json_data["data"]["queries"][0]

    return {
        "id": query["id"],
        "name": query["name"], 
        "description": query["description"],
        "visualizations": json.dumps(query["visualizations"]),
        "query": query["query"]
    }

### Collect data

In [50]:
def drop_duplicates(df: pd.DataFrame) -> pd.DataFrame:
    df = df.drop_duplicates(subset="url", keep="first")
    df = df.reset_index(drop=True)
    return df

In [64]:
tags = ["defi", "dex", "nft", "ethereum", "volume", "opensea", "uniswap", "0x", "amm", "lens", "ens"]
dashboards = [find_dashboards(tag, limit=20) for tag in tags]

dashboards = pd.concat(dashboards)
dashboards = drop_duplicates(dashboards)

dashboards.head()

Unnamed: 0,url,favorite_count
0,https://dune.com/rchen8/defi-users-over-time,2563
1,https://dune.com/jefftshaw/Balancer,405
2,https://dune.com/dsalv/Liquidations,207
3,https://dune.com/k06a/DeFi-Project-Users,101
4,https://dune.com/eliasimos/DeFi-granular-monit...,65


In [65]:
len(dashboards)

185

In [66]:
queries = []

for i, row in tqdm.tqdm(dashboards.iterrows(), total=len(dashboards)):
    for id in parse_dashboard(row.url):
        try:
            queries.append(parse_query(id))
        except Exception as e:
            pass

 12%|█▏        | 22/185 [06:37<1:02:49, 23.12s/it]

In [59]:
queries = pd.DataFrame(queries)
queries = queries.drop_duplicates(subset="id", keep="first")
queries = queries.drop_duplicates(subset="name", keep="first")

mask = [it.strip() != "" for it in queries["name"]]
queries = queries[mask].reset_index(drop=True)

queries = queries.reset_index(drop=True)

In [60]:
queries.head()

Unnamed: 0,id,name,description,visualizations,query
0,13352,Total Harvest Finance users over time,,"[{""id"": 26744, ""type"": ""table"", ""name"": ""Table...","SELECT date, sum(users) OVER (\n ..."
1,37629,Total Tornado Cash users over time,,"[{""id"": 74571, ""type"": ""table"", ""name"": ""Query...","SELECT date,\n sum(users) OVER (ORDER BY..."
2,11519,Total SushiSwap users over time,,"[{""id"": 22882, ""type"": ""table"", ""name"": ""Table...","SELECT date, sum(users) OVER (\n ..."
3,2998,Total 0x users over time,,"[{""id"": 5791, ""type"": ""table"", ""name"": ""Table""...","SELECT date, sum(users) OVER (\n ..."
4,2740,Total Uniswap users over time,,"[{""id"": 5276, ""type"": ""table"", ""name"": ""Table""...","SELECT date, sum(users) OVER (\n ..."


In [61]:
len(queries)

432

In [62]:
queries.to_csv("data/data.csv", index=False)

## Selenium

In [None]:
import bs4
import time

from selenium import webdriver
from webdriver_manager import chrome
from selenium.webdriver.chrome import service

In [None]:
def get_page_html(url: str, delay: int = 10) -> str:
    driver = webdriver.Chrome(service=service.Service(chrome.ChromeDriverManager().install()))
    driver.get(url)
    time.sleep(delay)
    return driver.page_source