In [4]:
import numpy as np
import pandas as pd

# set the number of transactions and the number of addresses
n_transactions = 10000
n_addresses = 3000


# define the mean and standard deviation for the amount traded
mean_amt = 1000
std_amt = 500

# generate the addresses for the dataset
addresses = np.array(['0x' + '{:0>40}'.format(hex(i)[2:]) for i in range(n_addresses)])

# define the cluster labels for each address
cluster_labels = np.repeat([0, 1, 2], [1850, 1000, 150])

# define the mean and standard deviation for the frequency of trades
freq_means = [20, 50, 10]
freq_stds = [2, 16, 3]

amt_means = [30, 1000, 2000]
amt_stds = [3, 80, 100]

# initialize the dataframe for the transactions
transactions = pd.DataFrame(columns=['blocknumber', 'wallet_address', 'amount_traded'])

# generate the transactions for each address
for i in range(n_addresses):
    address = addresses[i]
    cluster = cluster_labels[i]
    freq_mean = freq_means[cluster]
    freq_std = freq_stds[cluster]
    amt_mean = amt_means[cluster]
    amt_std = amt_stds[cluster]
    n_trades = np.random.normal(freq_mean, freq_std)
    trade_amounts = np.random.normal(amt_mean, amt_std, size=int(n_trades))
    blocknumbers = np.random.randint(1, n_transactions, size=len(trade_amounts))
    df = pd.DataFrame({'blocknumber': blocknumbers,
                       'wallet_address': np.repeat(address, len(trade_amounts)),
                       'amount_traded': trade_amounts})
    transactions = pd.concat([transactions, df], ignore_index=True)

# shuffle the dataframe and reset the index
transactions = transactions.sample(frac=1).reset_index(drop=True)

In [5]:
transactions

Unnamed: 0,blocknumber,wallet_address,amount_traded
0,3201,0x00000000000000000000000000000000000000ea,28.281221
1,6256,0x0000000000000000000000000000000000000212,30.273289
2,6612,0x00000000000000000000000000000000000001f1,27.839600
3,4836,0x0000000000000000000000000000000000000aee,966.693829
4,6340,0x0000000000000000000000000000000000000707,31.922800
...,...,...,...
86099,6965,0x00000000000000000000000000000000000003df,29.079976
86100,9672,0x0000000000000000000000000000000000000ad9,989.427933
86101,3286,0x0000000000000000000000000000000000000326,32.409810
86102,2977,0x000000000000000000000000000000000000039a,28.788668


In [7]:
transactions.to_csv('AMM_transactions.csv',index=False)