# Data preprocessing

## Libs & Constants

In [1]:
# libs
import pandas as pd

# constants
REDEMPTION_ADDRESS = "0x204d9de758217a39149767731a87bcc32427b6ef".lower()
TOKEN_SYMBOL = "GRO"

## Datasets

In [2]:
df_data = pd.read_csv('../data/redemption_transfers.csv')
df_data.head()

Unnamed: 0,Txhash,Blockno,UnixTimestamp,DateTime (UTC),From,To,TokenValue,USDValueDayOfTx,ContractAddress,TokenName,TokenSymbol
0,0x233bed8746971ffbeb34bef48a8ad84dd33bd030465f...,18229361,1695845099,2023-09-27 20:04:59,0x859df1b9bb101715b7c9bfc213378e383d216241,0x204d9de758217a39149767731a87bcc32427b6ef,1000.0,$290.15,0x3ec8798b81485a254928b70cda1cf0a2bb0b74d7,Gro DAO Token,GRO
1,0xe4c282ac2f79c790ef09dfa7487577f48ce6f14a7379...,18229833,1695850799,2023-09-27 21:39:59,0x01ab07010c2f4bf5971537669cc19d5d8cf320a0,0x204d9de758217a39149767731a87bcc32427b6ef,60.18642735397526,$17.46,0x3ec8798b81485a254928b70cda1cf0a2bb0b74d7,Gro DAO Token,GRO
2,0x1154ffc38fa7397327b7a40f2dee7edf165e06c97b44...,18229914,1695851795,2023-09-27 21:56:35,0x299741f68fd5b59ca177795d824f9ce9f6843bba,0x204d9de758217a39149767731a87bcc32427b6ef,63.0,$18.28,0x3ec8798b81485a254928b70cda1cf0a2bb0b74d7,Gro DAO Token,GRO
3,0x29e363c5f8fc9cdf9291f42599a7c707ec6f340d8b76...,18230181,1695854999,2023-09-27 22:49:59,0x31535eb65e75949d6e5eb7ee88006aff24b85633,0x204d9de758217a39149767731a87bcc32427b6ef,1740.433262095915,$504.99,0x3ec8798b81485a254928b70cda1cf0a2bb0b74d7,Gro DAO Token,GRO
4,0xbc4d31b3fd2591a8d156eba5db95caebf2b0ee4fea5b...,18230425,1695857939,2023-09-27 23:38:59,0xa57c927df5b836d203560a5b568af75831939eed,0x204d9de758217a39149767731a87bcc32427b6ef,5.500325140221783,$1.60,0x3ec8798b81485a254928b70cda1cf0a2bb0b74d7,Gro DAO Token,GRO


In [3]:
df_user_types = pd.read_csv('../data/users.csv')
df_user_types.head()

Unnamed: 0,Address,UserType
0,0xfa5e54667bf2e3536ee386672b57809ee182d979,Team
1,0xf59cc73cc03b0366c53f41144691fbd6f9027801,Team
2,0xf23b0e575ca65f26ba40eae794b9a8b903715cb7,Team
3,0xd332cb987c22edf44e35cd2128dd6b4ffc1437b8,Team
4,0xd0ec53a6144dee637052bf94b443fd1d49f45076,Team


## Data transformation

In [4]:
# addr to lower case
df_user_types["Address"] = df_user_types["Address"].str.lower()

# Convert the 'TokenValue' column to numerical format
df_data["TokenValue"] = df_data["TokenValue"].str.replace(",", "").astype(float)

# Convert withdrawal values to negative
df_data.loc[df_data["From"] == REDEMPTION_ADDRESS, "TokenValue"] = -df_data[
    "TokenValue"
]

# unify address
df_data["Address"] = df_data.apply(
    lambda row: row["From"].lower()
    if row["To"] == REDEMPTION_ADDRESS
    else row["To"].lower(),
    axis=1,
)

# merge transfers with addresses
df_joined = df_data.merge(df_user_types, on="Address", how="left")

# all other addresses not included in user types is `Community`
df_joined["UserType"].fillna("Community", inplace=True)

# filter by GRO (excludes claims)
df_gro = df_joined[df_joined["TokenSymbol"] == TOKEN_SYMBOL]

# Group by address and user type and calculate the sum of the token values
df_grouped = (
    df_gro.groupby(["Address", "UserType"])
    .agg(NetTokenValue=pd.NamedAgg(column="TokenValue", aggfunc="sum"))
    .reset_index()
)

df_grouped.head(10)

Unnamed: 0,Address,UserType,NetTokenValue
0,0x01ab07010c2f4bf5971537669cc19d5d8cf320a0,Community,60.186427
1,0x03387d5015f88aea995e790f18ef7ff9dfa0943c,Community,2579.522285
2,0x03a13b51afedaac32fd63092c0dc75e00536096d,Community,1239.12182
3,0x03ce77d68c5ef952d91ff53c336ffa260f43caa0,Community,616.493065
4,0x04106fdd34485c03794f112e1c71ec6706bbb506,Team,84507.37849
5,0x055c881dabe7cd8f16b630c52a755cdd9c56ecd9,Community,117172.069817
6,0x056c23c029da306b0f73f3e005f7a31bfad7af4f,Community,356.19
7,0x0598d2c365a5661032a91da03ed74e298ceea67b,Community,2453.204476
8,0x070172e1338c407096f7030007c1a865d5a05487,Owner,150000.0
9,0x071f78421ac4c71604955320f0745a084db75e1e,Community,46.478255


## Data dump

In [5]:
# save the consolidated data
df_grouped.to_csv('../data/gro_data.csv', index=False)

## Data overview

In [6]:
print(f'Total GRO redeemed: {sum(df_grouped["NetTokenValue"]):,.2f}')

Total GRO redeemed: 34,691,611.64


In [7]:
grouped = df_grouped.groupby("UserType")

# Find the length of the longest name to align the output
longest_name_length = max(len(name) for name in grouped.groups.keys())

for name, group in grouped:
    net_value = group["NetTokenValue"].sum()
    print(f"GRO from {name:<{longest_name_length}}: {net_value:,.2f}")

GRO from Community: 6,180,168.34
GRO from Investor : 9,918,134.95
GRO from Owner    : 16,768,436.66
GRO from Team     : 1,824,871.70
