In [5]:
import pandas as pd
import json
from typing import Dict, Any, Optional, List
import ast
from datetime import datetime

class DataLoader:
    def __init__(self):
        self.addresses_data: Dict[str, Any] = {}
        self.tokens_data: Dict[str, Any] = {}
        self.nodes_data: Optional[pd.DataFrame] = None
        self.contracts: List[str] = []
        self.non_contracts: List[str] = []
        
    def load_addresses(self, file_path: str) -> None:
        try:
            with open(file_path, 'r') as file:
                self.addresses_data = json.load(file)
                print(f"Loaded {len(self.addresses_data)} addresses")
        except Exception as e:
            print(f"Error loading addresses: {str(e)}")
            self.addresses_data = {}
            
    def load_tokens(self, file_path: str) -> None:
        try:
            with open(file_path, 'r') as file:
                self.tokens_data = json.load(file)
                print(f"Loaded {len(self.tokens_data)} tokens")
        except Exception as e:
            print(f"Error loading tokens: {str(e)}")
            self.tokens_data = {}
            
    def load_nodes(self, file_path: str) -> None:
        try:
            self.nodes_data = pd.read_csv(file_path)
            
            if 'active' in self.nodes_data.columns:
                self.nodes_data['active'] = self.nodes_data['active'].apply(ast.literal_eval)
                
            if 'balances' in self.nodes_data.columns:
                self.nodes_data['balances'] = self.nodes_data['balances'].apply(ast.literal_eval)
                
            if 'txs' in self.nodes_data.columns:
                self.nodes_data['txs'] = self.nodes_data['txs'].apply(ast.literal_eval).apply(
                    lambda data: [tx for tx in data if tx['value$'] <= 1000000]
                )
                
            # Розділяємо контракти і не-контракти
            self._separate_contracts()
            
            print(f"Loaded {len(self.nodes_data)} nodes")
            print(f"Contracts: {len(self.contracts)}")
            print(f"Non-contracts: {len(self.non_contracts)}")
            
        except Exception as e:
            print(f"Error loading nodes: {str(e)}")
            self.nodes_data = None

    def process_date(self, arr, index):
        try:
            return datetime.strptime(arr[index], '%Y-%m-%dT%H:%M:%S.%fZ')
        except IndexError:
            return datetime.strptime('2000-01-01T00:00:00.000Z', '%Y-%m-%dT%H:%M:%S.%fZ')
    
    def process_node_dates(self) -> None:
        if self.nodes_data is not None and 'active' in self.nodes_data.columns:
                        
            self.nodes_data['first_active'] = self.nodes_data['active'].apply(lambda x: self.process_date(x, 0))
            self.nodes_data['last_active'] = self.nodes_data['active'].apply(lambda x: self.process_date(x, 1))
            self.nodes_data['active_days'] = (
                self.nodes_data['last_active'] - self.nodes_data['first_active']
            ).dt.total_seconds() / (24 * 3600)
            
    def is_contract(self, address: str) -> bool:
        if self.nodes_data is None or address not in self.nodes_data['address'].values:
            return False
        return address in self.contracts
        
    def get_address_info(self, address: str) -> Dict[str, Any]:
        basic_info = self.addresses_data.get(address, {'label': 'Unknown', 'hasKyc': False})
        basic_info['isContract'] = self.is_contract(address)
        return basic_info
    
    def get_token_info(self, token_address: str) -> Dict[str, Any]:
        return self.tokens_data.get(token_address, {
            'spam': False,
            'verified': False,
            'price$': 0.0
        })

    def get_node_data(self, address: str) -> Optional[pd.Series]:
        """Отримання даних вузла"""
        if self.nodes_data is None:
            return None
        
        node_data = self.nodes_data[self.nodes_data['address'] == address]
        if node_data.empty:
            return None
        
        return node_data.iloc[0]
    
    def get_node_transactions(self, address: str) -> List[Dict]:
        node_data = self.get_node_data(address)
        return node_data['txs'] if node_data is not None else []
        
    def get_all_addresses(self) -> List[str]:
        return self.nodes_data['address'].tolist() if self.nodes_data is not None else []
        
    def get_non_contract_addresses(self) -> List[str]:
        return self.non_contracts
        
    def get_contract_addresses(self) -> List[str]:
        return self.contracts
        
    def _separate_contracts(self) -> None:
        """Розділення адрес на контракти і не-контракти"""
        if self.nodes_data is not None:
            self.contracts = self.nodes_data[
                self.nodes_data['isContract']
            ]['address'].tolist()
            
            self.non_contracts = self.nodes_data[
                ~self.nodes_data['isContract']
            ]['address'].tolist()
            
    def get_contract_interactions(self, address: str) -> Dict[str, int]:
        """Отримання статистики взаємодій з контрактами"""
        transactions = self.get_node_transactions(address)
        contract_interactions = {}
        
        for tx in transactions:
            # Перевіряємо взаємодії з контрактами
            if tx['to'] in self.contracts:
                contract_interactions[tx['to']] = contract_interactions.get(tx['to'], 0) + 1
                
        return contract_interactions
        
    def get_address_balance_history(self, address: str) -> List[Dict]:
        """Отримання історії балансів адреси"""
        node_data = self.get_node_data(address)
        if node_data is None:
            return []
            
        balances = node_data['balances']
        transactions = node_data['txs']
        
        balance_history = []
        current_balance = 0
        
        for tx in sorted(transactions, key=lambda x: datetime.strptime(x['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ')):
            if tx['from'] == address:
                current_balance -= float(tx['value'])
            else:
                current_balance += float(tx['value'])
                
            balance_history.append({
                'timestamp': tx['timestamp'],
                'balance': current_balance,
                'transaction_type': 'out' if tx['from'] == address else 'in',
                'value': float(tx['value$'])
            })
            
        return balance_history

    def __str__(self) -> str:
        return f"DataLoader(addresses: {len(self.addresses_data)}, " \
               f"tokens: {len(self.tokens_data)}, " \
               f"nodes: {len(self.nodes_data) if self.nodes_data is not None else 0}, " \
               f"contracts: {len(self.contracts)}, " \
               f"non_contracts: {len(self.non_contracts)})"



def test():
    # Ініціалізація завантажувача
    loader = DataLoader()
    
    # Завантаження всіх файлів
    loader.load_addresses("data/addresses.json")
    loader.load_tokens("data/tokens.json")
    loader.load_nodes("data/nodes_new.csv")
    # loader.load_nodes("data/nodes_sample.csv")
    loader.process_node_dates()
    
    print("\nDataLoader Status:")
    print(str(loader))
    
    # Приклад аналізу контрактів
    print("\nContracts Analysis:")
    contracts = loader.get_contract_addresses()
    non_contracts = loader.get_non_contract_addresses()
    print(f"Total contracts: {len(contracts)}")
    print(f"Total non-contract addresses: {len(non_contracts)}")
    
    # Детальний аналіз конкретної адреси
    test_address = "0x39cf2e49ea4d620e77d67088a8d815348e0abdf6"
        
    # Базова інформація про адресу
    print(f"\nAnalyzing address: {test_address}")
    address_info = loader.get_address_info(test_address)
    print("\nAddress Info:")
    print(json.dumps(address_info, indent=2))
    
    # Якщо це не контракт - показуємо детальну інформацію
    if not loader.is_contract(test_address):
        # Отримання транзакцій
        transactions = loader.get_node_transactions(test_address)
        print(f"\nTransaction count: {len(transactions)}")
        
        # Взаємодії з контрактами
        contract_interactions = loader.get_contract_interactions(test_address)
        print("\nContract Interactions:")
        for contract, count in contract_interactions.items():
            contract_info = loader.get_address_info(contract)
            print(f"Contract: {contract}")
            print(f"Label: {contract_info['label']}")
            print(f"Interaction count: {count}")
            
        # Історія балансів
        balance_history = loader.get_address_balance_history(test_address)
        print("\nBalance History (last 5 records):")
        for record in balance_history[-5:]:
            print(f"Time: {record['timestamp']}")
            print(f"Balance: {record['balance']}")
            print(f"Type: {record['transaction_type']}")
            print(f"Value: {record['value']}")
            print("---")
    else:
        print("\nThis is a contract address. Basic info only.")
        
    # Аналіз токенів
    print("\nToken Analysis:")
    for tx in transactions[:5]:  # Перші 5 транзакцій
        token_info = loader.get_token_info(tx['contract'])
        print(f"\nToken: {tx['contract']}")
        print(f"Verified: {token_info['verified']}")
        print(f"Price: ${token_info['price$']}")
        print(f"Is Spam: {token_info['spam']}")

In [6]:
test()

Loaded 1732 addresses
Loaded 5825 tokens
Loaded 1025 nodes
Contracts: 51
Non-contracts: 974

DataLoader Status:
DataLoader(addresses: 1732, tokens: 5825, nodes: 1025, contracts: 51, non_contracts: 974)

Contracts Analysis:
Total contracts: 51
Total non-contract addresses: 974

Analyzing address: 0x39cf2e49ea4d620e77d67088a8d815348e0abdf6

Address Info:
{
  "label": "Unknown",
  "hasKyc": false,
  "isContract": false
}

Transaction count: 6

Contract Interactions:

Balance History (last 5 records):
Time: 2022-11-03T19:22:32.544Z
Balance: 120.02141862
Type: in
Value: 383.28017
---
Time: 2023-02-07T23:38:56.800Z
Balance: 119.72041862
Type: out
Value: 1053.5
---
Time: 2023-10-01T07:46:54.006Z
Balance: 121.2347416
Type: in
Value: 5300.13043
---
Time: 2024-03-08T00:22:16.651Z
Balance: -8.6771684
Type: out
Value: 454691.685
---
Time: 2024-08-08T00:08:17.873Z
Balance: 1.3228316000000007
Type: in
Value: 35000.0
---

Token Analysis:

Token: 0x1
Verified: True
Price: $3500
Is Spam: False

Token: 