In [None]:
import time
import json
import requests
import pandas as pd
from tqdm import tqdm
from typing import Dict

df = pd.read_csv("E:/TA/DeFi/data/raw/bsc_contractaddress_opensource_license.csv", skiprows=1)  

API_KEY = 'bscscan_api_key.txt'
API_URL = 'https://api.bscscan.com/api'
BATCH_SIZE = 5

print(df.head())

In [None]:
def fetch_contract_source(address: str) -> Dict:
    params = {
        'module': 'contract',
        'action': 'getsourcecode',
        'address': address,
        'apikey': API_KEY
    }
    try:
        response = requests.get(API_URL, params=params, timeout=10)
        data = response.json()
        if data['status'] == '1':
            return data['result'][0]
        else:
            return {'error': data.get('message', 'Unknown error')}
    except Exception as e:
        return {'error': str(e)}

In [None]:
def extract_features(source_data: Dict) -> Dict:
    if 'error' in source_data:
        return {'error': source_data['error']}
    return {
        'ContractName': source_data.get('ContractName'),
        'CompilerVersion': source_data.get('CompilerVersion'),
        'OptimizationUsed': source_data.get('OptimizationUsed'),
        'Runs': source_data.get('Runs'),
        'SourceCodeLength': len(source_data.get('SourceCode', '')),
        'ConstructorArgsLength': len(source_data.get('ConstructorArguments', '')),
        'Proxy': source_data.get('Proxy'),
        'LicenseType': source_data.get('LicenseType')
    }

In [None]:
results = []
sourcecode_dict = {} 

for i in tqdm(range(0, len(df), BATCH_SIZE)):
    batch = df.iloc[i:i + BATCH_SIZE]
    for _, row in batch.iterrows():
        address = row['ContractAddress']
        source_data = fetch_contract_source(address)
        if source_data:
            results.append({
                'ContractAddress': address,
                'ContractName': source_data.get('ContractName'),
                'CompilerVersion': source_data.get('CompilerVersion'),
                'OptimizationUsed': source_data.get('OptimizationUsed'),
                'Runs': source_data.get('Runs'),
                'SourceCodeLength': len(source_data.get('SourceCode', '')),
                'ConstructorArgsLength': len(source_data.get('ConstructorArguments', '')),
                'Proxy': source_data.get('Proxy'),
                'LicenseType': source_data.get('LicenseType')
            })
            sourcecode_dict[address] = {
                'ContractAddress': address,
                'ContractName': source_data.get('ContractName'),
                'SourceCode': source_data.get('SourceCode'),
                'CompilerVersion': source_data.get('CompilerVersion'),
                'OptimizationUsed': source_data.get('OptimizationUsed'),
                'Runs': source_data.get('Runs'),
                'ConstructorArguments': source_data.get('ConstructorArguments'),
                'LicenseType': source_data.get('LicenseType'),
                'Proxy': source_data.get('Proxy')
            }
    time.sleep(1)


results_df = pd.DataFrame(results)
results_df.to_csv("E:/TA/DeFi/data/raw/bsc_contract_features.csv", index=False)
with open("E:/TA/DeFi/data/raw/bsc_contract_sourcecode.json", "w") as json_file:
    json.dump(sourcecode_dict, json_file, indent=2)

print("Done! File Saved")