In [10]:
import networkx as nx
import pandas as pd
import numpy as np
from typing import Dict, List, Set, Optional
from datetime import datetime
from collections import defaultdict
import community
import itertools

class NetworkAnalyzer:
    def __init__(self, data_loader):
        self.data_loader = data_loader
        self.G = nx.DiGraph()
        self.communities = None
        self.contract_communities = None
        self.wallet_communities = None
        self.centrality_metrics = {}
        
    def build_network(self):
        """Побудова мережі з розділенням на контракти і гаманці"""
        # Додаємо вузли
        for address in self.data_loader.get_all_addresses():
            node_type = 'contract' if self.data_loader.is_contract(address) else 'wallet'
            address_info = self.data_loader.get_address_info(address)
            
            self.G.add_node(
                address,
                type=node_type,
                label=address_info.get('label', 'Unknown'),
                kyc=address_info.get('hasKyc', False)
            )

        # Додаємо ребра
        for address in self.data_loader.get_all_addresses():
            transactions = self.data_loader.get_node_transactions(address)
            for tx in transactions:
                # Додаємо відсутні вузли якщо такі є
                for addr in [tx['from'], tx['to']]:
                    if addr not in self.G:
                        self.G.add_node(addr, type='unknown', label='Unknown', kyc=False)
                
                # Додаємо ребро
                self.G.add_edge(
                    tx['from'],
                    tx['to'],
                    weight=float(tx['value']),
                    timestamp=tx['timestamp']
                )

        print('Calc metrics...')
        # Розрахунок метрик централізації
        self._calculate_centrality_metrics()
        
    def analyze_network(self) -> Dict:
        """Загальний аналіз мережі"""
        if not self.G:
            return {}
            
        # Базові метрики
        basic_metrics = self._calculate_basic_metrics()
        
        # Аналіз спільнот
        community_metrics = self._analyze_communities()
        
        # Аналіз контрактів
        contract_metrics = self._analyze_contracts()
        
        # Аналіз структури
        structure_metrics = self._analyze_network_structure()
        
        return {
            'basic_metrics': basic_metrics,
            'community_metrics': community_metrics,
            'contract_metrics': contract_metrics,
            'structure_metrics': structure_metrics,
            'centrality': self.centrality_metrics
        }
        
    def get_node_neighborhood(self, address: str, depth: int = 1) -> Dict:
        """Аналіз околу вузла з розділенням на типи сусідів"""
        if not self.G.has_node(address):
            return {}
            
        # Знаходження сусідів
        neighborhood = set()
        current_layer = {address}
        
        for _ in range(depth):
            next_layer = set()
            for node in current_layer:
                next_layer.update(self.G.predecessors(node))
                next_layer.update(self.G.successors(node))
            neighborhood.update(current_layer)
            current_layer = next_layer - neighborhood
            
        # Створення підграфу
        subgraph = self.G.subgraph(neighborhood)
        
        # Аналіз складу околу
        neighborhood_composition = self._analyze_neighborhood_composition(subgraph)
        
        return {
            'size': len(neighborhood),
            'composition': neighborhood_composition,
            'metrics': self._calculate_subgraph_metrics(subgraph),
            'centrality': self._calculate_node_centrality(address)
        }
        
    def analyze_community_interactions(self) -> Dict:
        """Аналіз взаємодій між спільнотами"""
        if not self.communities:
            self._detect_communities()
            
        interactions = defaultdict(lambda: defaultdict(float))
        
        # Аналіз взаємодій між спільнотами
        for edge in self.G.edges(data=True):
            from_community = self.communities.get(edge[0])
            to_community = self.communities.get(edge[1])
            if from_community is not None and to_community is not None:
                interactions[from_community][to_community] += edge[2].get('weight', 0)
                
        return {
            'interactions': dict(interactions),
            'statistics': self._calculate_community_interaction_stats(interactions)
        }
        
    def find_critical_nodes(self) -> Dict:
        """Пошук критичних вузлів мережі"""
        if not self.centrality_metrics:
            self._calculate_centrality_metrics()
            
        # Знаходження топ вузлів за різними метриками
        top_nodes = {}
        for metric, values in self.centrality_metrics.items():
            sorted_nodes = sorted(values.items(), key=lambda x: x[1], reverse=True)
            top_nodes[metric] = [
                {
                    'address': node[0],
                    'score': node[1],
                    'type': self.G.nodes[node[0]]['type'],
                    'label': self.G.nodes[node[0]]['label']
                }
                for node in sorted_nodes[:10]
            ]
            
        return {
            'top_nodes': top_nodes,
            'statistics': self._calculate_critical_nodes_stats()
        }
        
    def _calculate_basic_metrics(self) -> Dict:
        """Розрахунок базових метрик мережі"""
        contract_nodes = [n for n, d in self.G.nodes(data=True) 
                         if d['type'] == 'contract']
        wallet_nodes = [n for n, d in self.G.nodes(data=True) 
                       if d['type'] == 'wallet']
        
        return {
            'total_nodes': self.G.number_of_nodes(),
            'total_edges': self.G.number_of_edges(),
            'density': nx.density(self.G),
            'contract_count': len(contract_nodes),
            'wallet_count': len(wallet_nodes),
            'contract_ratio': len(contract_nodes) / self.G.number_of_nodes()
                            if self.G.number_of_nodes() > 0 else 0,
            'avg_degree': sum(dict(self.G.degree()).values()) / self.G.number_of_nodes()
                         if self.G.number_of_nodes() > 0 else 0
        }
        
    def _analyze_communities(self) -> Dict:
        """Аналіз спільнот мережі"""
        if not self.communities:
            self._detect_communities()
            
        community_stats = defaultdict(lambda: {
            'size': 0,
            'contracts': 0,
            'wallets': 0,
            'volume': 0,
            'internal_edges': 0,
            'external_edges': 0
        })
        
        # Збір статистики по спільнотах
        for node, comm_id in self.communities.items():
            node_type = self.G.nodes[node]['type']
            community_stats[comm_id]['size'] += 1
            
            if node_type == 'contract':
                community_stats[comm_id]['contracts'] += 1
            elif node_type == 'wallet':
                community_stats[comm_id]['wallets'] += 1
                
        # Аналіз ребер
        for edge in self.G.edges(data=True):
            from_comm = self.communities.get(edge[0])
            to_comm = self.communities.get(edge[1])
            weight = edge[2].get('weight', 0)
            
            if from_comm == to_comm:
                community_stats[from_comm]['internal_edges'] += 1
                community_stats[from_comm]['volume'] += weight
            else:
                if from_comm is not None:
                    community_stats[from_comm]['external_edges'] += 1
                if to_comm is not None:
                    community_stats[to_comm]['external_edges'] += 1
                    
        return {
            'total_communities': len(community_stats),
            'community_details': dict(community_stats),
            'modularity': community.modularity(
                self.communities,
                self.G.to_undirected()
            )
        }
        
    def _analyze_contracts(self) -> Dict:
        """Специфічний аналіз контрактів у мережі"""
        contract_nodes = [n for n, d in self.G.nodes(data=True) 
                         if d['type'] == 'contract']
                         
        contract_stats = {
            'total_contracts': len(contract_nodes),
            'interaction_patterns': defaultdict(int),
            'volume_distribution': defaultdict(float),
            'user_base': defaultdict(set)
        }
        
        for contract in contract_nodes:
            # Аналіз вхідних транзакцій
            in_edges = self.G.in_edges(contract, data=True)
            unique_senders = set(edge[0] for edge in in_edges)
            contract_stats['user_base'][contract].update(unique_senders)
            
            # Патерни взаємодій
            if len(unique_senders) < 10:
                pattern = 'low_interaction'
            elif len(unique_senders) < 100:
                pattern = 'medium_interaction'
            else:
                pattern = 'high_interaction'
            contract_stats['interaction_patterns'][pattern] += 1
            
            # Об'єм транзакцій
            volume = sum(edge[2].get('weight', 0) for edge in in_edges)
            if volume < 1:
                volume_category = 'low_volume'
            elif volume < 10:
                volume_category = 'medium_volume'
            else:
                volume_category = 'high_volume'
            contract_stats['volume_distribution'][volume_category] += 1
            
        return contract_stats
        
    def _analyze_network_structure(self) -> Dict:
        """Аналіз структури мережі"""
        # Конвертуємо в неорієнтований граф для деяких метрик
        undirected = self.G.to_undirected()
        
        # Компоненти зв'язності
        weakly_connected = list(nx.weakly_connected_components(self.G))
        strongly_connected = list(nx.strongly_connected_components(self.G))
        
        return {
            'connectivity': {
                'weakly_connected_components': len(weakly_connected),
                'strongly_connected_components': len(strongly_connected),
                'largest_wcc_size': len(max(weakly_connected, key=len)),
                'largest_scc_size': len(max(strongly_connected, key=len))
            },
            'clustering': {
                'avg_clustering': nx.average_clustering(undirected),
                'transitivity': nx.transitivity(undirected)
            },
            'path_metrics': self._calculate_path_metrics()
        }
        
    def _detect_communities(self):
        """Виявлення спільнот"""
        # Конвертуємо в неорієнтований граф для виявлення спільнот
        undirected = self.G.to_undirected()
        
        # Виявлення спільнот
        self.communities = community.best_partition(undirected)
        
        # Розділення на контрактні та гаманцеві спільноти
        self.contract_communities = defaultdict(set)
        self.wallet_communities = defaultdict(set)
        
        for node, comm_id in self.communities.items():
            if self.G.nodes[node]['type'] == 'contract':
                self.contract_communities[comm_id].add(node)
            elif self.G.nodes[node]['type'] == 'wallet':
                self.wallet_communities[comm_id].add(node)
                
    def _calculate_centrality_metrics(self):
        """Розрахунок метрик централізації"""
        self.centrality_metrics = {
            'degree': nx.degree_centrality(self.G),
            'in_degree': nx.in_degree_centrality(self.G),
            'out_degree': nx.out_degree_centrality(self.G),
            'betweenness': nx.betweenness_centrality(self.G),
            'pagerank': nx.pagerank(self.G)
        }
        
    def _analyze_neighborhood_composition(self, subgraph: nx.DiGraph) -> Dict:
        """Аналіз складу околу"""
        composition = {
            'contracts': 0,
            'wallets': 0,
            'unknown': 0,
            'kyc_verified': 0
        }
        
        for node in subgraph.nodes():
            node_type = subgraph.nodes[node]['type']
            if node_type == 'contract':
                composition['contracts'] += 1
            elif node_type == 'wallet':
                composition['wallets'] += 1
            else:
                composition['unknown'] += 1
                
            if subgraph.nodes[node].get('kyc', False):
                composition['kyc_verified'] += 1
                
        return composition
        
    def _calculate_subgraph_metrics(self, subgraph: nx.DiGraph) -> Dict:
        """Розрахунок метрик для підграфу"""
        return {
            'density': nx.density(subgraph),
            'avg_degree': sum(dict(subgraph.degree()).values()) / subgraph.number_of_nodes()
                         if subgraph.number_of_nodes() > 0 else 0,
            'clustering_coefficient': nx.average_clustering(subgraph.to_undirected()),
            'edge_count': subgraph.number_of_edges()
        }
        
    def _calculate_node_centrality(self, node: str) -> Dict:
        """Розрахунок метрик централізації для вузла"""
        return {
            metric: values[node]
            for metric, values in self.centrality_metrics.items()
            if node in values
        }
        
    def _calculate_community_interaction_stats(self, interactions: Dict) -> Dict:
        """Розрахунок статистики взаємодій між спільнотами"""
        total_interactions = sum(
            sum(values.values())
            for values in interactions.values()
        )
        
        internal_interactions = sum(
            interactions[comm][comm]
            for comm in interactions
            if comm in interactions[comm]
        )
        
        return {
            'total_interactions': total_interactions,
            'internal_ratio': internal_interactions / total_interactions
                            if total_interactions > 0 else 0,
        'community_interaction_density': len(interactions) / (
                len(self.communities) * (len(self.communities) - 1)
            ) if len(self.communities) > 1 else 0
        }
        
    def _calculate_critical_nodes_stats(self) -> Dict:
        """Розрахунок статистики критичних вузлів"""
        critical_stats = {
            'contract_ratio': defaultdict(float),
            'kyc_ratio': defaultdict(float),
            'avg_scores': defaultdict(float)
        }
        
        for metric, values in self.centrality_metrics.items():
            # Беремо топ 10% вузлів
            threshold = np.percentile(list(values.values()), 90)
            critical_nodes = [node for node, score in values.items() 
                            if score >= threshold]
            
            # Розрахунок співвідношень
            critical_stats['contract_ratio'][metric] = sum(
                1 for node in critical_nodes
                if self.G.nodes[node]['type'] == 'contract'
            ) / len(critical_nodes) if critical_nodes else 0
            
            critical_stats['kyc_ratio'][metric] = sum(
                1 for node in critical_nodes
                if self.G.nodes[node].get('kyc', False)
            ) / len(critical_nodes) if critical_nodes else 0
            
            critical_stats['avg_scores'][metric] = np.mean([
                values[node] for node in critical_nodes
            ])
            
        return dict(critical_stats)
        
    def _calculate_path_metrics(self) -> Dict:
        """Розрахунок метрик шляхів у мережі"""
        # Беремо підвибірку вузлів для оцінки
        sample_size = min(100, self.G.number_of_nodes())
        sample_nodes = np.random.choice(
            list(self.G.nodes()),
            size=sample_size,
            replace=False
        )
        
        path_lengths = []
        reachable_pairs = 0
        total_pairs = 0
        
        # Розрахунок довжин шляхів
        for source, target in itertools.combinations(sample_nodes, 2):
            total_pairs += 1
            try:
                path_length = nx.shortest_path_length(self.G, source, target)
                path_lengths.append(path_length)
                reachable_pairs += 1
            except nx.NetworkXNoPath:
                continue
                
        return {
            'avg_path_length': np.mean(path_lengths) if path_lengths else float('inf'),
            'max_path_length': max(path_lengths) if path_lengths else float('inf'),
            'reachability': reachable_pairs / total_pairs if total_pairs > 0 else 0
        }

In [14]:
%run data_loader.ipynb

import json
from pprint import pprint
from collections import defaultdict

def analyze_network_structure(analyzer):
    """Аналіз загальної структури мережі"""
    print("\nAnalyzing Network Structure")
    print("=" * 50)
    
    network_analysis = analyzer.analyze_network()
    
    # Базові метрики
    basic = network_analysis['basic_metrics']
    print("\nBasic Network Metrics:")
    print(f"Total Nodes: {basic['total_nodes']}")
    print(f"Total Edges: {basic['total_edges']}")
    print(f"Density: {basic['density']:.4f}")
    print(f"Contract Ratio: {basic['contract_ratio']:.2%}")
    print(f"Average Degree: {basic['avg_degree']:.2f}")
    
    # Аналіз спільнот
    community = network_analysis['community_metrics']
    print(f"\nCommunity Analysis:")
    print(f"Total Communities: {community['total_communities']}")
    print(f"Modularity: {community['modularity']:.4f}")
    
    # Топ спільноти за розміром
    sorted_communities = sorted(
        community['community_details'].items(),
        key=lambda x: x[1]['size'],
        reverse=True
    )[:5]
    
    print("\nTop 5 Communities:")
    for comm_id, stats in sorted_communities:
        print(f"\nCommunity {comm_id}:")
        print(f"Size: {stats['size']}")
        print(f"Contracts: {stats['contracts']}")
        print(f"Wallets: {stats['wallets']}")
        print(f"Internal/External Edges: {stats['internal_edges']}/{stats['external_edges']}")
        
    return network_analysis

def analyze_contract_ecosystem(analyzer):
    """Аналіз екосистеми контрактів"""
    print("\nAnalyzing Contract Ecosystem")
    print("=" * 50)
    
    network_analysis = analyzer.analyze_network()
    contract_metrics = network_analysis['contract_metrics']
    
    print(f"\nContract Statistics:")
    print(f"Total Contracts: {contract_metrics['total_contracts']}")
    
    # Патерни взаємодій
    print("\nInteraction Patterns:")
    for pattern, count in contract_metrics['interaction_patterns'].items():
        print(f"{pattern}: {count}")
        
    # Розподіл об'ємів
    print("\nVolume Distribution:")
    for category, count in contract_metrics['volume_distribution'].items():
        print(f"{category}: {count}")
        
    # Аналіз бази користувачів
    user_bases = contract_metrics['user_base']
    print("\nTop Contracts by User Base:")
    top_contracts = sorted(
        user_bases.items(),
        key=lambda x: len(x[1]),
        reverse=True
    )[:5]
    
    for contract, users in top_contracts:
        print(f"\nContract: {contract}")
        print(f"Unique Users: {len(users)}")
        
    return contract_metrics

def analyze_critical_nodes(analyzer):
    """Аналіз критичних вузлів"""
    print("\nAnalyzing Critical Nodes")
    print("=" * 50)
    
    critical_nodes = analyzer.find_critical_nodes()
    
    # Топ вузли за різними метриками
    for metric, nodes in critical_nodes['top_nodes'].items():
        print(f"\nTop nodes by {metric}:")
        for i, node in enumerate(nodes[:5], 1):
            print(f"{i}. Address: {node['address']}")
            print(f"   Type: {node['type']}")
            print(f"   Score: {node['score']:.4f}")
            
    # Статистика критичних вузлів
    stats = critical_nodes['statistics']
    print("\nCritical Nodes Statistics:")
    for metric, ratio in stats['contract_ratio'].items():
        print(f"\n{metric}:")
        print(f"Contract Ratio: {ratio:.2%}")
        print(f"KYC Ratio: {stats['kyc_ratio'][metric]:.2%}")
        print(f"Average Score: {stats['avg_scores'][metric]:.4f}")
        
    return critical_nodes

def analyze_neighborhood(analyzer, address):
    """Аналіз околу конкретної адреси"""
    print(f"\nAnalyzing Neighborhood for {address}")
    print("=" * 50)
    
    neighborhood = analyzer.get_node_neighborhood(address, depth=2)
    
    print("\nNeighborhood Composition:")
    composition = neighborhood['composition']
    print(f"Contracts: {composition['contracts']}")
    print(f"Wallets: {composition['wallets']}")
    print(f"Unknown: {composition['unknown']}")
    print(f"KYC Verified: {composition['kyc_verified']}")
    
    print("\nNeighborhood Metrics:")
    metrics = neighborhood['metrics']
    print(f"Density: {metrics['density']:.4f}")
    print(f"Average Degree: {metrics['avg_degree']:.2f}")
    print(f"Clustering Coefficient: {metrics['clustering_coefficient']:.4f}")
    
    print("\nCentrality in Network:")
    for metric, value in neighborhood['centrality'].items():
        print(f"{metric}: {value:.4f}")
        
    return neighborhood

def analyze_community_interactions(analyzer):
    """Аналіз взаємодій між спільнотами"""
    print("\nAnalyzing Community Interactions")
    print("=" * 50)
    
    interactions = analyzer.analyze_community_interactions()
    
    # Загальна статистика
    stats = interactions['statistics']
    print("\nInteraction Statistics:")
    print(f"Total Interactions: {stats['total_interactions']}")
    print(f"Internal Ratio: {stats['internal_ratio']:.2%}")
    print(f"Interaction Density: {stats['community_interaction_density']:.4f}")
    
    # Топ взаємодій між спільнотами
    community_interactions = interactions['interactions']
    top_interactions = []
    
    for from_comm, to_comms in community_interactions.items():
        for to_comm, volume in to_comms.items():
            if from_comm != to_comm:  # Виключаємо внутрішні взаємодії
                top_interactions.append((from_comm, to_comm, volume))
                
    top_interactions.sort(key=lambda x: x[2], reverse=True)
    
    print("\nTop Community Interactions:")
    for from_comm, to_comm, volume in top_interactions[:5]:
        print(f"Community {from_comm} -> {to_comm}: {volume:.2f}")
        
    return interactions

def test():
    # Ініціалізація
    print("Initializing...")
    loader = DataLoader()
    loader.load_addresses("data/addresses.json")
    loader.load_tokens("data/tokens.json")
    loader.load_nodes("data/nodes_sample.csv")
    
    analyzer = NetworkAnalyzer(loader)
    print("Building network...")
    analyzer.build_network()
    
    # Комплексний аналіз
    print("Network analysis...")
    network_analysis = analyze_network_structure(analyzer)
    print("Contract analysis...")
    contract_analysis = analyze_contract_ecosystem(analyzer)
    print("Critical analysis...")
    critical_nodes = analyze_critical_nodes(analyzer)
    
    # Аналіз конкретних адрес
    addresses = [
        '0xeba88149813bec1cccccfdb0dacefaaa5de94cb1', # binance
        '0x39cf2e49ea4d620e77d67088a8d815348e0abdf6', # normal
        '0xa1b1bbb8070df2450810b8eb2425d543cfcef79b', # fund
    ]
    
    for address in addresses:
        node_type = "Contract" if loader.is_contract(address) else "Wallet"
        print(f"\nAnalyzing {node_type}: {address}")
        neighborhood = analyze_neighborhood(analyzer, address)
        
    # Аналіз взаємодій між спільнотами
    community_interactions = analyze_community_interactions(analyzer)
    
    # Зберігання результатів
    results = {
        'network_analysis': network_analysis,
        'contract_analysis': contract_analysis,
        'critical_nodes': critical_nodes,
        'community_interactions': community_interactions,
        'neighborhoods': {
            addr: analyze_neighborhood(analyzer, addr)
            for addr in addresses
        }
    }
    
    # Вивід загального підсумку
    print("\nAnalysis Summary")
    print("=" * 50)
    print(f"Total Nodes Analyzed: {network_analysis['basic_metrics']['total_nodes']}")
    print(f"Contracts Analyzed: {contract_analysis['total_contracts']}")
    print(f"Communities Detected: {network_analysis['community_metrics']['total_communities']}")
    print(f"Critical Nodes Identified: {len(critical_nodes['top_nodes']['pagerank'])}")

In [15]:
test()

Initializing...
Loaded 1733 addresses
Loaded 5825 tokens
Loaded 849 nodes
Contracts: 51
Non-contracts: 798
Building network...
Calc metrics...
Network analysis...

Analyzing Network Structure

Basic Network Metrics:
Total Nodes: 10142
Total Edges: 14317
Density: 0.0001
Contract Ratio: 0.50%
Average Degree: 2.82

Community Analysis:
Total Communities: 891
Modularity: 0.0002

Top 5 Communities:

Community 3:
Size: 8814
Contracts: 24
Wallets: 702
Internal/External Edges: 12208/1530

Community 0:
Size: 75
Contracts: 18
Wallets: 2
Internal/External Edges: 74/807

Community 79:
Size: 65
Contracts: 0
Wallets: 1
Internal/External Edges: 66/1

Community 5:
Size: 47
Contracts: 0
Wallets: 12
Internal/External Edges: 48/28

Community 68:
Size: 32
Contracts: 0
Wallets: 1
Internal/External Edges: 31/1
Contract analysis...

Analyzing Contract Ecosystem

Contract Statistics:
Total Contracts: 51

Interaction Patterns:
low_interaction: 49
medium_interaction: 2

Volume Distribution:
low_volume: 46.0
medi