# Sybil exploration grant clima

In [1]:
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
from sblegos.TransactionAnalyser import TransactionAnalyser
from sbutils.LoadData import LoadData

In [3]:
current_dir = Path(os.getcwd())

### Initialisation of the data

#### Grant data initialisation

In [4]:
path_to_contributor_address = "data/grants/address"

In [5]:
full_path_add = os.path.join(current_dir, path_to_contributor_address)
files = os.listdir(full_path_add)
files

['df_contribution_address_CLIMATE.csv',
 'df_contribution_address_ETHEREUM.csv',
 'df_contribution_address_FANTOM.csv',
 'df_contribution_address_GR15.csv',
 'df_contribution_address_OSS.csv',
 'df_contribution_address_UNICEF.csv',
 'unique_ctbt_address.csv']

In [6]:
csv = files[0]
df_address = pd.read_csv(os.path.join(full_path_add, csv))
list_address = df_address["address"].tolist()

The list_address is then used to load all the transactions from the grant data.

#### Data loader initialisation

In [7]:
path_to_parent = Path(current_dir).parent
path_to_tx = os.path.join(path_to_parent, 'transactions_full')
print(path_to_tx)

c:\Users\DELL Admin\Documents\ODC\DataBuilderHackathon\transactions_full


Initialize the data loader with the path to the transactions 

In [8]:
data_loader = LoadData(path_to_tx)

The data loader is then used to load the data from the grant data. It takes some time to load the data. 3 minutes on my computer. 

In [9]:
df_tx = data_loader.create_df_tx('ethereum', list_address)

#### Sybil scorer TransactionAnalyser initialisation and exploration

In [10]:
tx_analyser = TransactionAnalyser(df_tx, df_address=df_address)

Verify matching addresses because the transaction data is not always complete for a specific chain. ie an address may have contributed from a chain but have zero transactions on the studyed chain, here ethereum.

In [11]:
df_matching_address = pd.DataFrame(df_tx.EOA.unique(), columns=["address"])
df_matching_address.head(2)

Unnamed: 0,address
0,0x001f41de6f455d6df74a669c6b21b6dda1bae644
1,0x002153708f11f2651215059eea30820ee4d49ff3


In [12]:
df_matching_address['seed_same_naive'] = df_matching_address.loc[:, 'address'].apply(lambda x : tx_analyser.has_same_seed_naive(x))

In [13]:
df_matching_address['seed_same'] = df_matching_address.loc[:, 'address'].apply(lambda x : tx_analyser.has_same_seed(x))

In [14]:
df_matching_address['seed_suspicious'] = df_matching_address.loc[:, 'seed_same_naive'].ne(df_matching_address.loc[:, 'seed_same'])

In [15]:
df_matching_address['less_5_tx'] = df_matching_address.loc[:, 'address'].apply(lambda x : tx_analyser.has_less_than_n_transactions(x, 5))
df_matching_address['less_10_tx'] = df_matching_address.loc[:, 'address'].apply(lambda x : tx_analyser.has_less_than_n_transactions(x, 10))

In [16]:
df_matching_address['interacted_other_ctbt'] = df_matching_address.loc[:, 'address'].apply(lambda x : tx_analyser.has_interacted_with_other_contributor(x))

In [17]:
df_matching_address['seed_suspicious'].value_counts()

False    2428
True       35
Name: seed_suspicious, dtype: int64

In [18]:
df_matching_address[df_matching_address['seed_suspicious'] == True]

Unnamed: 0,address,seed_same_naive,seed_same,seed_suspicious,less_5_tx,less_10_tx,interacted_other_ctbt
14,0x0150c608a25176c08d9bc8b43d0dc64e4c9f4a58,False,True,True,False,False,False
57,0x061c405ccb27cf0ebfc76f41da09c2e3eba8c476,False,True,True,False,False,False
189,0x13969a736a633aab08b2d57881b0f4ec92ef9a86,False,True,True,False,True,False
266,0x1c491e2253abc092acadc5744a0623229e95b8a5,False,True,True,False,False,False
473,0x30b3bbdb92eeb77f0c3394bfaf26a44767ec1bd0,False,True,True,False,False,True
478,0x312e80bb497f5a399e01208c47360b531e34910b,False,True,True,False,False,False
498,0x32ff90bc3fa85d87e982d5b384a453bed65a799d,False,True,True,False,False,False
624,0x41b0b75802b25fbc14e0157f9341473843c28e1e,False,True,True,False,False,False
698,0x49371938108e13d0253de8f488de7c82c2735ee0,False,True,True,False,False,False
703,0x49f86794fe566dead8f8198729e5fdf37cb7f6ab,False,True,True,False,False,False


In [None]:
df_matching_address

In [19]:
df_address.loc[:10, 'address']

0     0x5c7291e18c3ecf30e14ccf1dcd646c0ca3309113
1     0x30aee7a692427143ba7e65efb7db35d360a0ac51
2     0xe43f6352cbbb43a286f77d7dbc9348fb9a4344c9
3     0xebacd448a23f63a156841d440778be89613be97b
4     0x4a3588812a9da7a2b1d3f5eba57c7169008f7c38
5     0xd2602c7bdfc9f413974e944280bbfae275d1b1b6
6     0xe73a198e7c07c418d7dcfd42f5e953074abbd125
7     0x9e184fa0b20ab043638c133b9825956ab37e1ee2
8     0xd854b808a6315d808bf66287550104b459e83375
9     0x6d6a5e2f37d6f0d75a66ebebb852adfcf2261137
10    0xe3f42515bc533f5a8072a10ff6275043f33be7ab
Name: address, dtype: object

In [21]:
tx_analyser.has_transaction_similitude("0x5c7291e18c3ecf30e14ccf1dcd646c0ca3309113")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_address_transactions.sort_values('block_timestamp', ascending=True, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_other_address['lcs'] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_other_address.loc[add, 'lcs'] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing

TypeError: sequence item 1: expected str instance, float found

In [None]:
a

In [None]:
simi_100 = df_address.loc[:10, 'address'].apply(lambda x : tx_analyser.has_transaction_similitude_opti(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array_transactions_other = self.dict_add_value_string_tx.get(add)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  for add in df_other_address.index:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

KeyboardInterrupt: 

In [None]:
simi_100 = df_address.loc[:100, 'address'].apply(lambda x : tx_analyser.has_transaction_similitude_opti(x))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array_transactions_other = self.dict_add_value_string_tx.get(add)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  for add in df_other_address.index:


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\DELL Admin\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\DELL Admin\AppData\Local\Temp\ipykernel_7100\1011365593.py", line 1, in <module>
    simi_100 = df_address.loc[:100, 'address'].apply(lambda x : tx_analyser.has_transaction_similitude_opti(x))
  File "c:\Python310\lib\site-packages\pandas\core\series.py", line 4771, in apply
    return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
  File "c:\Python310\lib\site-packages\pandas\core\apply.py", line 1105, in apply
    return self.apply_standard()
  File "c:\Python310\lib\site-packages\pandas\core\apply.py", line 1156, in apply_standard
    mapped = lib.map_infer(
  File "pandas\_libs\lib.pyx", line 2918, in pandas._libs.lib.map_infer
  File "C:\Users\DELL Admin\AppData\Local\Temp\ipykernel_7100\1011365593.py", line 1, in <lambda>
    simi_