# Collect Transactions

## Set up

In [1]:

import os
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import shutil

PATH = Path.cwd().parents[0]

from utils.data_loader import (
  get_info_by_contract_addr,
  save_bytecode_by_contract_addr,
  get_bytecode_by_contract_addr,
  save_transactions_by_contract_addr,
  get_source_code_by_contract_addr,
  save_sol_by_contract_addr
)

In [2]:
DATA_PATH = os.path.join(PATH, 'data')
LABELED_PATH = os.path.join(DATA_PATH, 'labeled')
TXN_PATH = os.path.join(LABELED_PATH, 'txn')
HEX_PATH = os.path.join(LABELED_PATH, 'hex')
SOL_PATH = os.path.join(LABELED_PATH, 'sol')
GROUND_PATH = os.path.join(DATA_PATH, 'external/crpwarner/dataset/groundtruth')
HEX_GROUND_PATH = os.path.join(GROUND_PATH, 'hex')
SOL_GROUND_PATH = os.path.join(GROUND_PATH, 'sol')
TMP_PATH = os.path.join(DATA_PATH, 'interim')
HEX_TMP_PATH = os.path.join(TMP_PATH, 'hex')
SOL_TMP_PATH = os.path.join(TMP_PATH, 'sol')


## Load Dataset

In [3]:
df = pd.read_excel(os.path.join(GROUND_PATH, 'groundTruth.xlsx')).rename(columns={
  'address': 'Address'
})
df['Address'] = df['Address'].str.lower()

In [4]:
df.to_csv(os.path.join(LABELED_PATH, 'groundtruth.csv'), index=False)

In [5]:
contract_addresses = df['Address'].unique().tolist()

In [6]:
len(contract_addresses), contract_addresses

(69,
 ['0x93023f1d3525e273f291b6f76d2f5027a39bf302',
  '0x2753dce37a7edb052a77832039bcc9aa49ad8b25',
  '0x94b7d24552933f50a5a5705c446528806dcea381',
  '0xe0b9d4146aad6936cbfcbe4dae47e34aab96b093',
  '0x10f6f2b97f3ab29583d9d38babf2994df7220c21',
  '0x11cbc781dadaad13fc3a361772c80b1c027820af',
  '0x3e597ea168a85aa2ae5e2c4333665bcd875ed10f',
  '0x292e89d5d5bdab3af2f5838c194c1983f0140b43',
  '0xef20505c8b343d12da174bf9d8495c1ce2670989',
  '0xc71d244f7ad6c869ecbf13cbd9acae31718be4f8',
  '0x5927b72440d8a8b8c6ca5a8be60e88975f9063fc',
  '0xc709878167ed069aea15fd0bd4e9758ceb4da193',
  '0x1e4402fa427a7a835fc64ea6d051404ce767a569',
  '0xcb6cd204d783dc8d66896a6def5867d332228d7b',
  '0x455dedacbe41c178953119847f2b95e2d9ad0a1d',
  '0x831467b7b6bf9c705dc87899d48b57ee55c8d5cc',
  '0x6609f543d38816116fa5b9a98c918ca947f5455d',
  '0x90f75ca026add95ae15ecbf48efc77ed272945be',
  '0x52e4339b4b9ff254738d6e971e83440f60dc029c',
  '0x51c5807dd8398aedfcc91e6483417838b41eaeb8',
  '0xa0ffc741f109159ee203424a299e6d

## Load API Key

In [7]:
for src_file in tqdm(list(Path(HEX_GROUND_PATH).glob('*.hex'))):
    address = src_file.stem.lower()
    if address in contract_addresses:
        shutil.copy(src_file, os.path.join(HEX_PATH, f'{address}.hex'))
    else:
        shutil.copy(src_file, os.path.join(HEX_TMP_PATH, f'{address}.hex'))

  0%|          | 0/70 [00:00<?, ?it/s]

100%|██████████| 70/70 [00:00<00:00, 390.01it/s]


In [8]:
for src_file in tqdm(list(Path(SOL_GROUND_PATH).glob('*.sol'))):
    address = src_file.stem.lower()
    if address in contract_addresses:
        shutil.copy(src_file, os.path.join(SOL_PATH, f'{address}.sol'))
    else:
        shutil.copy(src_file, os.path.join(SOL_TMP_PATH, f'{address}.sol'))

100%|██████████| 71/71 [00:00<00:00, 462.24it/s]


In [9]:
# 3. Collect and extract
for address in tqdm(contract_addresses):
    addr = address.lower()
    if addr not in [file.stem.lower() for file in Path(TXN_PATH).glob("*.json")]:
        info = get_info_by_contract_addr(address)
        save_transactions_by_contract_addr(TXN_PATH, address, info)
        if 'creationBytecode' in info.get('creator'):
            save_bytecode_by_contract_addr(HEX_PATH, address, info['creator']['creationBytecode'])

    if addr not in [file.stem.lower() for file in Path(HEX_PATH).glob("*.hex")]:
        bytecode = get_bytecode_by_contract_addr(addr)
        save_bytecode_by_contract_addr(HEX_PATH, address, bytecode)

    if addr not in [file.stem.lower() for file in Path(SOL_PATH).glob("*.hex")]:
        source = get_source_code_by_contract_addr(address)
        if 'SourceCode' in source:
            save_sol_by_contract_addr(SOL_PATH, address, source['SourceCode'])

  0%|          | 0/69 [00:00<?, ?it/s]

  1%|▏         | 1/69 [00:00<00:36,  1.85it/s]

Saved 0x93023f1d3525e273f291b6f76d2f5027a39bf302.sol


  3%|▎         | 2/69 [00:00<00:32,  2.03it/s]

Saved 0x2753dce37a7edb052a77832039bcc9aa49ad8b25.sol


  4%|▍         | 3/69 [00:01<00:31,  2.10it/s]

Saved 0x94b7d24552933f50a5a5705c446528806dcea381.sol


  6%|▌         | 4/69 [00:01<00:30,  2.12it/s]

Saved 0xe0b9d4146aad6936cbfcbe4dae47e34aab96b093.sol


  7%|▋         | 5/69 [00:02<00:29,  2.14it/s]

Saved 0x10f6f2b97f3ab29583d9d38babf2994df7220c21.sol


  9%|▊         | 6/69 [00:02<00:29,  2.15it/s]

Saved 0x11cbc781dadaad13fc3a361772c80b1c027820af.sol


 10%|█         | 7/69 [00:03<00:29,  2.09it/s]

Saved 0x3e597ea168a85aa2ae5e2c4333665bcd875ed10f.sol


 12%|█▏        | 8/69 [00:03<00:28,  2.11it/s]

Saved 0x292e89d5d5bdab3af2f5838c194c1983f0140b43.sol


 13%|█▎        | 9/69 [00:04<00:28,  2.13it/s]

Saved 0xef20505c8b343d12da174bf9d8495c1ce2670989.sol


 14%|█▍        | 10/69 [00:04<00:28,  2.09it/s]

Saved 0xc71d244f7ad6c869ecbf13cbd9acae31718be4f8.sol


 16%|█▌        | 11/69 [00:05<00:27,  2.12it/s]

Saved 0x5927b72440d8a8b8c6ca5a8be60e88975f9063fc.sol


 17%|█▋        | 12/69 [00:05<00:27,  2.06it/s]

Saved 0xc709878167ed069aea15fd0bd4e9758ceb4da193.sol


 19%|█▉        | 13/69 [00:06<00:26,  2.09it/s]

Saved 0x1e4402fa427a7a835fc64ea6d051404ce767a569.sol


 20%|██        | 14/69 [00:06<00:26,  2.12it/s]

Saved 0xcb6cd204d783dc8d66896a6def5867d332228d7b.sol


 22%|██▏       | 15/69 [00:07<00:25,  2.14it/s]

Saved 0x455dedacbe41c178953119847f2b95e2d9ad0a1d.sol


 23%|██▎       | 16/69 [00:07<00:24,  2.15it/s]

Saved 0x831467b7b6bf9c705dc87899d48b57ee55c8d5cc.sol


 25%|██▍       | 17/69 [00:08<00:24,  2.16it/s]

Saved 0x6609f543d38816116fa5b9a98c918ca947f5455d.sol


 26%|██▌       | 18/69 [00:08<00:23,  2.16it/s]

Saved 0x90f75ca026add95ae15ecbf48efc77ed272945be.sol


 28%|██▊       | 19/69 [00:08<00:23,  2.11it/s]

Saved 0x52e4339b4b9ff254738d6e971e83440f60dc029c.sol


 29%|██▉       | 20/69 [00:09<00:23,  2.08it/s]

Saved 0x51c5807dd8398aedfcc91e6483417838b41eaeb8.sol


 30%|███       | 21/69 [00:09<00:22,  2.09it/s]

Saved 0xa0ffc741f109159ee203424a299e6d2731dcfc76.sol


 32%|███▏      | 22/69 [00:10<00:24,  1.94it/s]

Saved 0x9372b371196751dd2f603729ae8d8014bbeb07f6.sol


 33%|███▎      | 23/69 [00:11<00:23,  1.98it/s]

Saved 0x186ed770eecea82def7c92dcc077c4ba27acd5bd.sol


 35%|███▍      | 24/69 [00:11<00:22,  2.00it/s]

Saved 0x28c748535cc0c774d7bb046adba0c9d77e3b4c92.sol


 36%|███▌      | 25/69 [00:12<00:21,  2.02it/s]

Saved 0xd00736f864ecd5bef5996c735f98769ae0d10c7c.sol


 38%|███▊      | 26/69 [00:12<00:21,  2.04it/s]

Saved 0x9a3fb36bf72a387fcc821a38ee9f50f1a0eb8cbd.sol


 39%|███▉      | 27/69 [00:12<00:20,  2.07it/s]

Saved 0xe7e63e244c52b2230666e263657ba8db2b6b3705.sol


 41%|████      | 28/69 [00:13<00:19,  2.09it/s]

Saved 0x25d8f027fd25eecbcd812521fb2f75f175807a91.sol


 42%|████▏     | 29/69 [00:14<00:20,  1.97it/s]

Saved 0x1250b98cbde9f99f4c42dcdacee193221f17eb50.sol


 43%|████▎     | 30/69 [00:14<00:19,  2.01it/s]

Saved 0x0414d8c87b271266a5864329fb4932bbe19c0c49.sol


 45%|████▍     | 31/69 [00:17<00:42,  1.12s/it]

Saved 0x292f57c7fcd726ba651e46b620d99cc6afe0ec1c.sol


 46%|████▋     | 32/69 [00:19<01:00,  1.63s/it]

Saved 0x9db8a10c7fe60d84397860b3af2e686d4f90c2b7.sol


 48%|████▊     | 33/69 [00:20<00:46,  1.28s/it]

Saved 0xabe776435f7459e2f5ba773bfb753ed19a053dd0.sol


 49%|████▉     | 34/69 [00:20<00:36,  1.05s/it]

Saved 0x8d07f605926837ea0f9e1e24dba0fb348cb3e97d.sol


 51%|█████     | 35/69 [00:21<00:29,  1.14it/s]

Saved 0x42269ac712372ac89a158ad5a32806c6b6782d66.sol


 52%|█████▏    | 36/69 [00:21<00:25,  1.32it/s]

Saved 0xdf7ff95aa3d855a6fb21399432166a92fdcf1b1a.sol


 54%|█████▎    | 37/69 [00:22<00:21,  1.47it/s]

Saved 0x198376f921570e3cc547fd5c16e482cded8b4d1d.sol


 55%|█████▌    | 38/69 [00:23<00:25,  1.23it/s]

Saved 0x9d52414c4cc1fb8e7864a9b59495f430f8e5de44.sol


 57%|█████▋    | 39/69 [00:23<00:21,  1.42it/s]

Saved 0x50c6ec50a89a946c5886aeb54a22fe732558f7d1.sol


 58%|█████▊    | 40/69 [00:24<00:18,  1.59it/s]

Saved 0xde9e52f1838951e4d2bb6c59723b003c353979b6.sol


 59%|█████▉    | 41/69 [00:24<00:16,  1.66it/s]

Saved 0xe1a0ce8b94c6a5e4791401086763d7bd0a6c18f5.sol


 61%|██████    | 42/69 [00:25<00:15,  1.74it/s]

Saved 0x85aa3f04e539e426cbb55c0d584ea99cfe1d96a1.sol


 62%|██████▏   | 43/69 [00:25<00:14,  1.82it/s]

Saved 0xe4182e57eeb29fbc2b3469e45c9e385cea8995ab.sol


 64%|██████▍   | 44/69 [00:26<00:13,  1.87it/s]

Saved 0xf0b692ace03ffb689628e68d4919f91723d1c5a2.sol


 65%|██████▌   | 45/69 [00:26<00:12,  1.96it/s]

Saved 0xaaf8c293ed36989d1871d2310b2845450d885673.sol


 67%|██████▋   | 46/69 [00:27<00:11,  2.03it/s]

Saved 0x4165084a6e5388ce53c9d9892f904a2712dd943a.sol


 68%|██████▊   | 47/69 [00:27<00:10,  2.07it/s]

Saved 0x91383a15c391c142b80045d8b4730c1c37ac0378.sol


 70%|██████▉   | 48/69 [00:28<00:09,  2.11it/s]

Saved 0x16dfb898cf7029303c2376031392cb9bac450f94.sol


 71%|███████   | 49/69 [00:28<00:09,  2.14it/s]

Saved 0x17e65e6b9b166fb8e7c59432f0db126711246bc0.sol


 72%|███████▏  | 50/69 [00:29<00:08,  2.15it/s]

Saved 0xec4cb1148ec60e00a6bfcfce4482db724db6bdde.sol


 74%|███████▍  | 51/69 [00:29<00:08,  2.16it/s]

Saved 0x6b5e9e55921e5e412cf1002599c05d4428cf50c5.sol


 75%|███████▌  | 52/69 [00:30<00:07,  2.17it/s]

Saved 0xa942890d7fc60f0d4a516f63dd273dcde72ae6c9.sol


 77%|███████▋  | 53/69 [00:30<00:07,  2.18it/s]

Saved 0x797885c0a6cfffcbc4d2e3c1ca0b4f07112db6a3.sol


 78%|███████▊  | 54/69 [00:30<00:06,  2.18it/s]

Saved 0xb504035a11e672e12a099f32b1672b9c4a78b22f.sol


 80%|███████▉  | 55/69 [00:31<00:07,  1.97it/s]

Saved 0xd28c8ff18f811e5fcd9b5b07889a343da8fd6502.sol


 81%|████████  | 56/69 [00:32<00:06,  1.98it/s]

Saved 0xa7cd93ed3133d82781cc17460fe1500b69a1b514.sol


 83%|████████▎ | 57/69 [00:32<00:06,  1.91it/s]

Saved 0xd217dc0cab1c952a7ce6f4d7ca4549cde1f37bb0.sol


 84%|████████▍ | 58/69 [00:33<00:06,  1.81it/s]

Saved 0xb131f4a55907b10d1f0a50d8ab8fa09ec342cd74.sol


 86%|████████▌ | 59/69 [00:33<00:05,  1.87it/s]

Saved 0x548c9731ae163a73a28916eeb11717fe446dab54.sol


 87%|████████▋ | 60/69 [00:34<00:04,  1.96it/s]

Saved 0x8b2e68075a06959e3e35aa0e451a13e099e41b23.sol


 88%|████████▊ | 61/69 [00:34<00:03,  2.03it/s]

Saved 0x1c5ee1ffebec5f3e1686e8e59d43f96a3c702b7f.sol


 90%|████████▉ | 62/69 [00:35<00:03,  2.07it/s]

Saved 0x108d0f1fc10ed324f8cc65d0a91cad11cd4994a4.sol


 91%|█████████▏| 63/69 [00:35<00:02,  2.10it/s]

Saved 0xee45e37e2b73e86c709d9edd1c8ea3b0ec72dad3.sol


 93%|█████████▎| 64/69 [00:36<00:02,  2.09it/s]

Saved 0x8275ebf521dc217aa79c88132017a5bcef001dd9.sol


 94%|█████████▍| 65/69 [00:36<00:01,  2.12it/s]

Saved 0xb954562066c71b3e6e7b2ac330b03c74c0dcd5ae.sol


 96%|█████████▌| 66/69 [00:36<00:01,  2.15it/s]

Saved 0xba751bff276907c438e927d2c2f18de574195e4b.sol


 97%|█████████▋| 67/69 [00:37<00:00,  2.01it/s]

Saved 0xf19308f923582a6f7c465e5ce7a9dc1bec6665b1.sol


 99%|█████████▊| 68/69 [00:37<00:00,  2.06it/s]

Saved 0x87230146e138d3f296a9a77e497a2a83012e9bc5.sol


100%|██████████| 69/69 [00:38<00:00,  1.79it/s]

Saved 0x82902c20c5826984588dcd2dfcc322e05dcc435c.sol





In [10]:
len(list(Path(TXN_PATH).glob("*.json"))), len(list(Path(HEX_PATH).glob("*.hex"))), len(list(Path(SOL_PATH).glob("*.sol")))

(69, 69, 69)