In [1]:
from google.cloud import bigquery
import json
import os
import pandas as pd
import requests
import time

In [2]:
# https://docs.opensource.observer/docs/get-started/
# add GCP project and credentials here

PROJECT = 'opensource-observer'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../../gcp_credentials.json'
client = bigquery.Client()

# Load and process the attestations

In [3]:
query = f"""
WITH decoded AS (
  SELECT
    id,
    recipient,
    attester,
    TIMESTAMP_SECONDS(time) AS timestamp,
    decoded_data_json,
    (SELECT JSON_EXTRACT_SCALAR(item, '$.value.value')
     FROM UNNEST(JSON_EXTRACT_ARRAY(decoded_data_json)) AS item
     WHERE JSON_EXTRACT_SCALAR(item, '$.name') = 'metadataurl') AS metadataurl,
    (SELECT JSON_EXTRACT_SCALAR(item, '$.value.value')
     FROM UNNEST(JSON_EXTRACT_ARRAY(decoded_data_json)) AS item
     WHERE JSON_EXTRACT_SCALAR(item, '$.name') = 'contributionRegUID') AS contributionRegUID
  FROM `{PROJECT}.ethereum_attestation_service_optimism.attestations`
  WHERE
    revoked = FALSE
    AND schema_id = '0xc9bc703e3c48be23c1c09e2f58b2b6657e42d8794d2008e3738b4ab0e2a3a8b6'
)
SELECT
  id,
  recipient,
  attester,
  timestamp,
  metadataurl,
  contributionRegUID
FROM decoded
WHERE
  contributionRegUID IS NOT NULL
  AND metadataurl IS NOT NULL
ORDER BY timestamp DESC
"""

result = client.query(query)
df_attestations = result.to_dataframe()

In [6]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Referer': 'https://metricsgarden.xyz'
}
data = []
for idx, row in df_attestations.iterrows():
    time.sleep(1)
    response = requests.get(row['metadataurl'], headers=headers)
    if response.status_code != 200:
        print(f'Error at {idx} getting {row["metadataurl"]}, {response.status_code}')
        time.sleep(30)
        response = requests.get(row['metadataurl'], headers=headers)
        if response.status_code != 200:
            print(f'Second error at {idx} getting {row["metadataurl"]}, {response.status_code}')
            continue

    row_data = row.to_dict()  
    row_data['metadata'] = response.json()
    data.append(row_data)

In [26]:
clean_data = []
for i in data:
    j = i.copy()
    j['timestamp'] = pd.Timestamp.strftime(j['timestamp'],'%Y-%m-%d')
    clean_data.append(j)

In [27]:
with open("data/attestations.json", "w") as f:
    json.dump(clean_data,f,indent=2)

In [28]:
clean_data

[{'id': '0xa0b0dbb9056ea7e5b12bd9c5d6d6e06e677f0fccdba62694f0dc570b436619b5',
  'recipient': '0x0000000000000000000000000000000000000000',
  'attester': '0x7484aABFef9f39464F332e632047983b67571C0a',
  'timestamp': '2024-09-28',
  'metadataurl': 'https://gateway.pinata.cloud/ipfs/QmURDLkfckkJCb8NvFcYExNpziwthdxdZFSxFagvkULjGL',
  'contributionRegUID': '0xda07eb285eba8117820328823b944fa76ea1b3f0ad0e7bd29ef57814bb73129f',
  'metadata': {'id': 1780,
   'userFid': '394078',
   'projectName': 'Women Biz ',
   'ecosystem': 'Optimism',
   'category': 'Onchain Builders',
   'subcategory': 'Crosschain',
   'secondaryecosystem': '',
   'contribution': 'Women empower women through web3. ',
   'desc': 'We develop projects, bootcamps, hackathons, workshops, mentorship, networking and events to empower more women through blockchain and forming women leaders in web3. ',
   'link': 'https://x.com/hiwomenbiz?s=21',
   'ethAddress': '',
   'primarycontributionuid': '0xda07eb285eba8117820328823b944fa76ea1