# Notebook to compare vectorize method and apply method of sbscorer

In [1]:
import os
from pathlib import Path
import numpy as np
import requests
import pandas as pd
from sbdata.FlipsideApi import FlipsideApi
from sbutils import LoadData
from sblegos.TransactionAnalyser import TransactionAnalyser as txa

In [7]:
# Set path to data folder
current_dir = Path(os.getcwd())
FOLDER_NAME = 'community_round'
CHAIN = 'optimism'
PATH_TO_EXPORT = os.path.join(current_dir.parent.parent, 'tx_data', FOLDER_NAME)
DATA_DIR = os.path.join(current_dir.parent.parent, 'data-regen-rangers')
DATA_DIR_GITCOIN = os.path.join(current_dir.parent.parent, 'data-gitcoin')
GRANT_ROUND_ID = '0x984e29dCB4286c2D9cbAA2c238AfDd8A191Eefbc'
CHAIN_ID = '10'


# read the address from oss grant
api_key = os.environ['FLIPSIDE_API_KEY']
flipside_api = FlipsideApi(api_key, max_address=400)
PATH_TO_VOTES = os.path.join(DATA_DIR_GITCOIN, 'citizen-votes.csv')
df_votes = pd.read_csv(PATH_TO_VOTES)
array_unique_address = df_votes['voter'].unique()

array_unique_address = np.char.lower(array_unique_address.astype(str))
print(f'Number of unique voter: {len(array_unique_address)}')

Number of unique voter: 17023


In [3]:
# Load data
data_loader = LoadData.LoadData(PATH_TO_EXPORT)
df_tx = data_loader.create_df_tx(CHAIN, array_unique_address)

In [4]:
c = np.setxor1d(array_unique_address, df_tx.EOA.values)
c

array([], dtype=object)

In [6]:
i = np.intersect1d(df_tx.EOA.unique(), array_unique_address)
i.shape

(17023,)

In [8]:
tx_analyser = txa(df_tx, array_address=i)

In [10]:
# Use non numpy vectorized method
df_matching_address = tx_analyser.get_df_features()
df_matching_address.head(2)

%memit command to measure memory usage or %prun command to profile the execution time and resource usage of your code.

In [None]:
df_matching_address['lcs'] = 0
df_matching_address[df_matching_address['less_10_tx'] == True].shape
r = df_matching_address.loc[df_matching_address['less_10_tx'] == True, 'address'].apply(lambda x: tx_analyser.transaction_similitude_pylcs(x, minimum_sim_tx=3))

(280, 8)

In [None]:
def lcs_to_array(lcs):
    if lcs.shape[0] == 0:
        return []
    else:
        return np.asarray(lcs.reset_index())

def get_mean_score_lcs(lcs):
    if lcs.shape[0] == 0:
        return 0
    else:
        return lcs.reset_index()['score'].mean()
    
def get_max_score_lcs(lcs):
    if lcs.shape[0] == 0:
        return 0
    else:
        return lcs.reset_index()['score'].max()
    
df_matching_address['cluster_size_lcs'] = 0
df_matching_address['mean_score_lcs'] = 0
df_matching_address['max_score_lcs'] = 0

df_matching_address.loc[df_matching_address['less_10_tx'] == True, 'cluster_size_lcs'] = r.apply(lambda x: len(x))
df_matching_address.loc[df_matching_address['less_10_tx'] == True, 'mean_score_lcs'] = r.apply(lambda x: get_mean_score_lcs(x))
df_matching_address.loc[df_matching_address['less_10_tx'] == True, 'max_score_lcs'] = r.apply(lambda x: get_max_score_lcs(x))

print((df_matching_address[df_matching_address['less_10_tx'] == True]['cluster_size_lcs'] > 0).sum())

In [None]:
df_matching_address[df_matching_address['less_10_tx'] == True]

Unnamed: 0,address,seed_same_naive,seed_same,seed_suspicious,less_5_tx,less_10_tx,interacted_other_ctbt,lcs,cluster_size_lcs,mean_score_lcs,max_score_lcs
3,0x008bb720841a00aba1447738b867d71cd32f7c4e,True,True,False,True,True,False,"[[0x65e1f863e632c9643d413bfc54a6627f6d43f186, ...",0,0.0,0.0
9,0x0208de8c57f26fe715d87e0687c5ec3a9292ceef,True,True,False,True,True,False,[],3,0.2,0.2
11,0x02c85a1a9b682eba44c4333a66d3ce64c71b0743,True,True,False,True,True,False,[],3,0.2,0.2
12,0x030164425b941813345c52bb67407197044e7ba7,True,True,False,True,True,False,"[[0x44f038d4ad8aa7b4b22b4dbfa2ecba873560758b, ...",1,0.2,0.2
13,0x032dc63c7822a2b4647bd857d386efb931aa0c18,True,True,False,True,True,False,"[[0x4593c72d9881355f479dfd82d5f4c208eb1cc916, ...",8,0.2,0.2
...,...,...,...,...,...,...,...,...,...,...,...
996,0xfb6035a69681996b965ec146361d265e11231172,True,True,False,True,True,False,,10,0.2,0.2
1001,0xfd291021c322965b1bfeb3c62ebd74aec5036fc3,True,True,False,False,True,False,,0,0.0,0.0
1003,0xfd91d221b0e2105a148fa727d959100bcc5d0175,True,True,False,False,True,False,,0,0.0,0.0
1006,0xfecadbe4f0aa97fe1d5cf7d043c0c477ee3cb646,True,True,False,True,True,False,,10,0.2,0.2


171