# Sybil exploration grant clima

In [69]:
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd

In [70]:
from legos import TransactionAnalyser
from utils import LoadData

In [71]:
current_dir = Path(os.getcwd())

### Initialisation of the data

#### Grant data initialisation

In [72]:
path_to_grants = "data/grants"
path_to_contributor_address = os.path.join(path_to_grants, "address")

In [73]:
full_path_add = os.path.join(current_dir, path_to_contributor_address)
files = os.listdir(full_path_add)
files

['df_contribution_address_CLIMATE.csv',
 'df_contribution_address_ETHEREUM.csv',
 'df_contribution_address_FANTOM.csv',
 'df_contribution_address_GR15.csv',
 'df_contribution_address_OSS.csv',
 'df_contribution_address_UNICEF.csv',
 'unique_ctbt_address.csv']

In [74]:
df_address = pd.read_csv(os.path.join(full_path_add, "df_contribution_address_CLIMATE.csv"))
list_address = df_address["address"].tolist()

The list_address is then used to load all the transactions from the grant data.

#### Data loader initialisation

In [75]:
path_to_parent = Path(current_dir).parent
path_to_tx = os.path.join(path_to_parent, 'transactions_full')
print(path_to_tx)

c:\Users\DELL Admin\Documents\ODC\DataBuilderHackathon\transactions_full


In [76]:
data_loader = LoadData.LoadData(path_to_tx)

The data loader is then used to load the data from the grant data. It takes some time to load the data. 3 minutes on my computer. 

In [77]:
df_tx = data_loader.create_df_tx('ethereum', list_address)

#### Sybil scorer TransactionAnalyser initialisation and exploration

In [78]:
tx_analyser = TransactionAnalyser.TransactionAnalyser(df_tx, df_address=df_address)

In [79]:
df_matching_address = pd.DataFrame(df_tx.EOA.unique(), columns=["address"])
df_matching_address.head(2)

Unnamed: 0,address
0,0x001f41de6f455d6df74a669c6b21b6dda1bae644
1,0x002153708f11f2651215059eea30820ee4d49ff3


### Load grant application

In [80]:
df_applications = pd.read_csv(os.path.join(path_to_grants, "df_application_normalized.csv"))

In [129]:
df_applications[df_applications.project_address == "0xe126b3E5d052f1F575828f61fEBA4f4f2603652a"]

Unnamed: 0,grant_id,project_active_grant,project_approved,project_address,project_title,project_website,project_decription,project_created_at,project_round,project_long_id,...,project_funding_source,project_team_size,live_on_other_chains,project_value,project_improvements_to_existing,project_github_twitter_credential,project_github_org_link,project_amount_received,project_contributor_count,project_contribution_count


In [95]:
df_climate = df_applications[df_applications.project_round == "CLIMATE"]

In [121]:
df_climate["contract_id"] = df_climate.grant_id.apply(lambda x :x.split('-')[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_climate["contract_id"] = df_climate.grant_id.apply(lambda x :x.split('-')[0])


In [122]:
df_climate["contract_id"].unique()

array(['0x0299d145933e3abc9bee2251589d080e0b12765e7e211e72e5209d0ac1889d33',
       '0x0b84bfa1e358e2c7816e5244258c16e4d6d06930fb4cf20f2d714ce35d330308',
       '0x0d85d55a968bc9929c4f7ddfef2e6008b4ad8b77be65f7c08e89eab7cb760cc6',
       '0x13fdb2715bc255356cf8f34ec8d40f3ffd0e2d38299b5533ee2c7772e2948e5b',
       '0x21cf1e3113b6bc01f3bc0517be60f2bcf495ba9e6ba9be416ec0fb863c8c1ec7',
       '0x2971da13feca9a170191e5ee5850beb57acbab9f5850e96b50f5d2ab15a351f2',
       '0x29ebfa45b1d708eda13c20e04a999053361a809a89c48041ffe67890329aefd1',
       '0x2e4e97b9d9c36923cf2c1062f16c6cb83259c1e0b9a10ac466bf20cec9589ae1',
       '0x329170e769ace967de0a1467dbeb2c3eaee2580099ccb9dc650452ab10216821',
       '0x34da3ca2039af745fdf37add512a3cb5fdd7b7b0a5c95fd74877742b534222b3',
       '0x3f9ed1884e631b7684ab9421a7ad3b4b532308268bac91bdd772d483f0363131',
       '0x42799cfce3b14376285e960875ef687cd7d1b32439a2ac3316304536f2c16509',
       '0x42c5410440da06b29d782ee691783aad21d76b0de61c1034432f816af89aa57f',

In [101]:
'0xD95A1969c41112cEE9A2c931E849bCef36a16F4C' in df_climate.project_long_id.values

False

In [91]:
df_applications.project_contract_id.apply(lambda x : x.str.split(':'))

0                                                    NaN
1                                                    NaN
2                                                    NaN
3                                                    NaN
4                                                    NaN
                              ...                       
1127     1:0x03506eD3f57892C85DB20C36846e9c808aFe9ef4:88
1128    10:0x8e1bD5Da87C14dd8e08F7ecc2aBf9D1d558ea174:39
1129    1:0x03506eD3f57892C85DB20C36846e9c808aFe9ef4:112
1130     1:0x03506eD3f57892C85DB20C36846e9c808aFe9ef4:96
1131    1:0x03506eD3f57892C85DB20C36846e9c808aFe9ef4:219
Name: project_contract_id, Length: 1132, dtype: object

In [82]:
df_applications.project_round.unique()

array(['GR15', 'FANTOM', 'UNICEF', 'CLIMATE', 'ETHEREUM', 'OSS'],
      dtype=object)