In [1]:
import grequests
import requests
import os
import json
import base64
import pandas as pd

API_ENDPOINT_TXS = "https://api.osmosis.interbloc.org/cosmos/tx/v1beta1/txs?events=tx.height={}&pagination.offset={}"
# RPC_ENDPOINT = "https://rpc.osmosis.interbloc.org:443" ## note: history on this node is pruned so it cannot fetch data at heights around `UPGRADE_HEIGHT`
BLOCK_TX_FILE_PATH = "data/block_tx/{}.json"
TX_RESULT_LIMIT = 100

UPGRADE_HEIGHT = 4707300
HALT_HEIGHT = 4713064

GAMM_MSG_PREFIX = "/osmosis.gamm.v1beta1"
UNPOOL_WHITELISTED_MSG = "/osmosis.superfluid.MsgUnPoolWhitelistedPool"
EXEC_MSG = "/cosmos.authz.v1beta1.MsgExec"

## 0. Download block data 

Download all tx responses for every block in [`UPGRADE_HEIGHT`, `HALT_HEIGHT`] to `data/block_tx/`


### Method 1. Get the downloaded data

Downloaded data already avaialble [here](fixme://) (**NOTE: UDPATE LINK!**)

Extract the archive in `data/block_tx/`


### Method 2. Download data in parallel

In [None]:
# TODO: FIXME! Account for pagination (see slow version - results are grouped by pages of 100 entries, seems to be unchangeable, using workaround with 'pagination.offset')
 
STEP = 5

for height in range(UPGRADE_HEIGHT, HALT_HEIGHT+1, STEP):

    urls = [API_ENDPOINT_TXS.format(h) for h in range(height, height + STEP) if not os.path.exists(BLOCK_TX_FILE_PATH.format(h)) and h <= HALT_HEIGHT]
    rs = (grequests.get(url, headers={"Accept": "application/json"}) for url in urls)
    responses = grequests.map(rs, size = STEP)

    for idx, response in enumerate(responses):
        curr_height = response.request.url.replace(API_ENDPOINT_TXS.format(""), "")

        with open(BLOCK_TX_FILE_PATH.format(curr_height), "w") as f:
            json.dump(response.json()["tx_responses"], f)


### Method 3. Slow serialized version

In [None]:
for height in range(UPGRADE_HEIGHT, HALT_HEIGHT+1):
    
    print(height, end="\r")
    
    if not os.path.exists(BLOCK_TX_FILE_PATH.format(height)):
        offset = 0
        response = requests.get(API_ENDPOINT_TXS.format(height, offset), headers={"Accept": "application/json"})
        response_json = response.json()
        total = int(response_json["pagination"]["total"])
        tx_responses = response_json["tx_responses"]
        while total > TX_RESULT_LIMIT:
            total -= TX_RESULT_LIMIT
            offset += TX_RESULT_LIMIT
            response = requests.get(API_ENDPOINT_TXS.format(height, offset), headers={"Accept": "application/json"})
            tx_responses.extend(response.json()["tx_responses"])
        with open(BLOCK_TX_FILE_PATH.format(height), "w") as f:
                json.dump(tx_responses, f)

## 1. Load raw txs into pandas DataFrame

### Method 1. Import the downloaded data and save DataFrame to `raw_tx.csv`

In [None]:
results = []

def filter_txs_data(dict):
    filtered_dict = {key: dict[key] for key in ["height", "txhash", "code", "timestamp", "tx", "logs"]}
    return filtered_dict


for height in range(UPGRADE_HEIGHT, HALT_HEIGHT+1):
    
    print("processing block:", height, "remaining:", "{:5d}".format(HALT_HEIGHT - height), end='\r')
    
    with open(BLOCK_TX_FILE_PATH.format(height)) as f:
        txs_data = json.load(f)

        if txs_data:
            txs = list(filter(filter_txs_data, txs_data)) 
            results += txs

print("\ncreating dataframe...")

raw_df = pd.DataFrame.from_records(results)
raw_df.to_csv("csv/raw_txs.csv", index=False)

print("done.")

### Method 2. Load DataFrame from `raw_txs.csv` (if previously generated)

In [3]:
import ast

raw_df = pd.read_csv("csv/raw_txs.csv")
raw_df.tx = raw_df.tx.apply(ast.literal_eval)
raw_df.logs = raw_df.logs.apply(ast.literal_eval)

## 2. Process txs data

In [81]:
df = raw_df.copy()

In [82]:
# Remove unsuccessful transactions
df = df[df["code"] == 0]
df.reset_index(inplace=True, drop=True)

# Remove unused columns
df.drop(columns=["raw_log", "events", "data", "info", "codespace", "gas_wanted", "gas_used", "code"], inplace=True)

# Expand txs (with logs)
df = df.join(pd.json_normalize(df.tx)[["body.messages"]]).drop(columns=["tx"])
df = df.explode(column=["body.messages", "logs"])
df.reset_index(inplace=True, drop=True)

# Extract events from logs
df = df.join(pd.json_normalize(df.logs)[["events"]]).drop(columns=["logs"])

df.head()

Unnamed: 0,height,txhash,timestamp,body.messages,events
0,4707300,AF8A1A668EAF07C57365EE2065A27E69FB6550C7300EC5...,2022-06-07T16:24:28Z,{'@type': '/osmosis.gamm.v1beta1.MsgJoinSwapEx...,"[{'type': 'coin_received', 'attributes': [{'ke..."
1,4707300,8FA019013E99D64980674B46F911553BA8E5340FB1B543...,2022-06-07T16:24:28Z,"{'@type': '/cosmos.gov.v1beta1.MsgVote', 'prop...","[{'type': 'message', 'attributes': [{'key': 'a..."
2,4707300,487E8829053D4B7DDE9B038F990486CE332691EBBD41BD...,2022-06-07T16:24:28Z,{'@type': '/osmosis.gamm.v1beta1.MsgJoinSwapEx...,"[{'type': 'coin_received', 'attributes': [{'ke..."
3,4707300,387FD1E9CA5C77908613720B0C004848B6BB38B10FDDD1...,2022-06-07T16:24:28Z,{'@type': '/cosmos.staking.v1beta1.MsgDelegate...,"[{'type': 'coin_received', 'attributes': [{'ke..."
4,4707300,A08E30D44F70ABDB675DC1E68738420CC454D991930AAC...,2022-06-07T16:24:28Z,{'@type': '/ibc.core.client.v1.MsgUpdateClient...,"[{'type': 'message', 'attributes': [{'key': 'a..."


In [83]:
# Expand messages (but execution data will come from events)
df = df.join(pd.json_normalize(df["body.messages"])[["@type", "sender", "grantee", "msgs"]])
df.drop(columns=["body.messages"], inplace=True)

# Filter messages
df = df[(df["@type"].str.contains(GAMM_MSG_PREFIX)) | (df["@type"] == UNPOOL_WHITELISTED_MSG) | (df["@type"] == EXEC_MSG)]
execs_msgs_if_gamm = df[df["@type"] == EXEC_MSG].msgs.apply(lambda c: True if [m for m in c if GAMM_MSG_PREFIX in m["@type"]] else False)
df.drop(execs_msgs_if_gamm[execs_msgs_if_gamm == False].index, axis=0, inplace=True)
df.reset_index(inplace=True, drop=True)

df.head()

Unnamed: 0,height,txhash,timestamp,events,@type,sender,grantee,msgs
0,4707300,AF8A1A668EAF07C57365EE2065A27E69FB6550C7300EC5...,2022-06-07T16:24:28Z,"[{'type': 'coin_received', 'attributes': [{'ke...",/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo148uzfggn8recx64uwfjfyx5zctsu8xzlcpa5f2,,
1,4707300,487E8829053D4B7DDE9B038F990486CE332691EBBD41BD...,2022-06-07T16:24:28Z,"[{'type': 'coin_received', 'attributes': [{'ke...",/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo144yjwr38rl5zn4hntwad0su3rk9scpz0g5muqr,,
2,4707300,D44A94B87395E34A543BF0FD3602087C5AAC0748B3F747...,2022-06-07T16:24:28Z,"[{'type': 'coin_received', 'attributes': [{'ke...",/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1zdzn7pfzfryva8nr6s85lctnuavmk2e0utr9ck,,
3,4707301,744CC419565E3B04491ABEA3C53750500BF05E2AB96E65...,2022-06-07T16:24:34Z,"[{'type': 'coin_received', 'attributes': [{'ke...",/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1m8fseyyr53jukpmpp4d9h9nva5mhqtg7dnnn67,,
4,4707301,1E0BC01D53C694E317576F8152D7EC8A899B711114FA35...,2022-06-07T16:24:34Z,"[{'type': 'coin_received', 'attributes': [{'ke...",/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo105ttc7sk7pa67dazgyph3z0dcv5x4et6pgphm9,,


In [85]:
# define routine to extract data from events
def process_msg_events(row):
    events = row[3]
    msg_type = row[4]
    sender = row[5]
    if "MsgExec" in msg_type:
        # we have one MsgJoinSwapExternAmountIn in the list of txs done via MsgExec, swap it for the underlying msg
        # we don't need generic code, as there's literally just one MsgExec with one MsgJoinSwapExternAmountIn inside in the entire txs list 
        msg_type = row[7][0]["@type"]
        sender = row[7][0]["sender"]

    if "MsgSwapExactAmountIn" in msg_type or "MsgSwapExactAmountOut" in msg_type:
        for event in events:
            if event["type"] == "token_swapped":
                pool_id_list = list()
                tokens_in_list = list()
                tokens_out_list = list()
                swaps = zip(*(iter(event["attributes"]),) * 5) # 5 entries per single "token_swapped" event
                for swap in swaps:
                    pool_id_list.append(swap[2]["value"])
                    tokens_in_list.append(swap[3]["value"])
                    tokens_out_list.append(swap[4]["value"])
                return msg_type, sender, pool_id_list, tokens_in_list, tokens_out_list
    elif "MsgJoinPool" in msg_type or "MsgJoinSwapExternAmountIn" in msg_type or "MsgJoinSwapShareAmountOut" in msg_type:
        for event in events:
            if event["type"] == "pool_joined":
                pool_id = event["attributes"][2]["value"]
                tokens_in = event["attributes"][3]["value"]
            elif event["type"] == "coinbase":
                shares_minted = event["attributes"][1]["value"]
        return msg_type, sender, pool_id, tokens_in, shares_minted
    elif "MsgExitPool" in msg_type or "MsgExitSwapExternAmountOut" in msg_type or "MsgExitSwapShareAmountIn" in msg_type or "MsgUnPoolWhitelistedPool" in msg_type:
        for event in events:
            if event["type"] == "pool_exited":
                pool_id = event["attributes"][2]["value"]
                tokens_out = event["attributes"][3]["value"]
            elif event["type"] == "burn":
                shares_burned = event["attributes"][1]["value"]
        return msg_type, sender, pool_id, shares_burned, tokens_out

In [86]:
# extract data from events
df["@type"], df["sender"], df["poolId"], df["tokensIn"], df["tokensOut"] = zip(*df.apply(process_msg_events, axis=1))
df.drop(columns=["events", "grantee", "msgs"], inplace=True)

df.head()

Unnamed: 0,height,txhash,timestamp,@type,sender,poolId,tokensIn,tokensOut
0,4707300,AF8A1A668EAF07C57365EE2065A27E69FB6550C7300EC5...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo148uzfggn8recx64uwfjfyx5zctsu8xzlcpa5f2,722,1018369uosmo,2572033049108385259gamm/pool/722
1,4707300,487E8829053D4B7DDE9B038F990486CE332691EBBD41BD...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo144yjwr38rl5zn4hntwad0su3rk9scpz0g5muqr,719,1185077ibc/A0CC0CF735BFB30E730C70019D4218A1244...,18561808163462387644gamm/pool/719
2,4707300,D44A94B87395E34A543BF0FD3602087C5AAC0748B3F747...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1zdzn7pfzfryva8nr6s85lctnuavmk2e0utr9ck,"[641, 678]",[29436000000ibc/67795E528DF67C5606FC20F824EA39...,"[791775171uosmo, 879220465ibc/D189335C6E4A68B5..."
3,4707301,744CC419565E3B04491ABEA3C53750500BF05E2AB96E65...,2022-06-07T16:24:34Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1m8fseyyr53jukpmpp4d9h9nva5mhqtg7dnnn67,[13],[49807ibc/A0CC0CF735BFB30E730C70019D4218A1244F...,[7632ibc/27394FB092D2ECCD56123C74F36E4C1F92600...
4,4707301,1E0BC01D53C694E317576F8152D7EC8A899B711114FA35...,2022-06-07T16:24:34Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo105ttc7sk7pa67dazgyph3z0dcv5x4et6pgphm9,"[678, 584]",[3500000ibc/D189335C6E4A68B513C10AB227BF1C1D38...,"[3139487uosmo, 2727518ibc/0954E1C28EB7AF5B72D2..."


In [88]:
# split swaps into separate rows (one row per poolId)
df = df.explode(column=["poolId", "tokensIn", "tokensOut"])
df.reset_index(inplace=True, drop=True)

df.head()

Unnamed: 0,height,txhash,timestamp,@type,sender,poolId,tokensIn,tokensOut
0,4707300,AF8A1A668EAF07C57365EE2065A27E69FB6550C7300EC5...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo148uzfggn8recx64uwfjfyx5zctsu8xzlcpa5f2,722,1018369uosmo,2572033049108385259gamm/pool/722
1,4707300,487E8829053D4B7DDE9B038F990486CE332691EBBD41BD...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,osmo144yjwr38rl5zn4hntwad0su3rk9scpz0g5muqr,719,1185077ibc/A0CC0CF735BFB30E730C70019D4218A1244...,18561808163462387644gamm/pool/719
2,4707300,D44A94B87395E34A543BF0FD3602087C5AAC0748B3F747...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1zdzn7pfzfryva8nr6s85lctnuavmk2e0utr9ck,641,29436000000ibc/67795E528DF67C5606FC20F824EA39A...,791775171uosmo
3,4707300,D44A94B87395E34A543BF0FD3602087C5AAC0748B3F747...,2022-06-07T16:24:28Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1zdzn7pfzfryva8nr6s85lctnuavmk2e0utr9ck,678,791775171uosmo,879220465ibc/D189335C6E4A68B513C10AB227BF1C1D3...
4,4707301,744CC419565E3B04491ABEA3C53750500BF05E2AB96E65...,2022-06-07T16:24:34Z,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1m8fseyyr53jukpmpp4d9h9nva5mhqtg7dnnn67,13,49807ibc/A0CC0CF735BFB30E730C70019D4218A1244FF...,7632ibc/27394FB092D2ECCD56123C74F36E4C1F926001...


## 3. Save processed txs as `txs.csv`

In [89]:
df.to_csv("csv/txs.csv", index=None)

At this point the processed DataFrame contains the ordered sequence of `gamm` transactions with the relevant execution data (plus `MsgUnPoolWhitelistedPool` as it removes liquidity from pools, this is marginal and only implemented to perform consistency checks). 
We can now simulate running all transactions on the various pools, sequentially, starting from the state of pools at `UPGRADE_HEIGHT-1`.

For any transaction except `MsgJoinPool`, we can use the execution data to update the pools state *as is* (the bug was only present in `MaximalExactRatioJoin()` which can only be reached if `tokensIn.len() != 1` in `calcJoinPoolShares()`, only possible via a `MsgJoinPool`).

When a `MsgJoinPool` is encountered, we also compute the **correct** number of shares. Seemingly, exit pool transactions require also a recalculation of liquidity removed using corrected history.

### re-load previously saved `txs.csv`

skip to this part when reloading

In [None]:
df = pd.read_csv("csv/txs.csv")

## 4. Find wallets affected and their credit/debt towards the protocol

In this analysis, the debit accrued due to exploitation will be denominated in all assets exited from the pool(s), not accounting for successive swaps/transfers that finalize the exploit. The attack pattern usually ends up with a final swap that consolidates the gains to a single asset, but this final calculation can be done separately (and is likely already done somewhere by someone, at least for the bigger attackers).

#### define functions to query node for onchain data

In [90]:
# define functions to query onchain pool and address balance data at various heights
# credit to @george-aj (https://github.com/george-aj/osmosis-nitrogen-extra-gamm-analysis) for the starting point

import codecs
import protos.osmosis.gamm.pool_models.balancer.balancerPool_pb2
import protos.osmosis.gamm.v1beta1.query_pb2 as query_gamms
import protos.cosmos.bank.v1beta1.query_pb2 as query_bank
from google.protobuf.json_format import MessageToDict


def _send_abci_query(request_msg, path, response_msg, height):
    """Encode and send pre-filled protobuf msg to RPC endpoint."""
    # Some queries have no data to pass.
    if request_msg:
        request_msg = codecs.encode(request_msg.SerializeToString(), 'hex')
        request_msg = str(request_msg, 'utf-8')

    req = {
        "jsonrpc": "2.0",
        "id": "1",
        "method": "abci_query",
        "params": {
            "height": str(height),
            "path": path,
            "data": request_msg
        }
    }
    req = json.dumps(req)
    resp = requests.post(RPC_ENDPOINT, req, timeout=5).json()
    if 'result' not in resp:
        print(resp)
    response = resp['result']['response']['value']
    if not response:
        print(resp)
    response = base64.b64decode(response)
    result = response_msg()
    result.ParseFromString(response)
    result = MessageToDict(result)
    return result


def get_onchain_pool_data(height):
    request_msg = query_gamms.QueryPoolsRequest()
    request_msg.pagination.limit = 10000
    response_msg = query_gamms.QueryPoolsResponse
    done = False
    while not done:
        try:
            pool_data = _send_abci_query(request_msg=request_msg,
                                        path="/osmosis.gamm.v1beta1.Query/Pools",
                                        response_msg=response_msg,
                                        height=height)
            done = True
        except Exception as e:
            print(e)
            print("error while fetching pools data for height {}. Retrying...".format(height))

    pool_map = {}
    for pool in pool_data.get('pools'):
        pool_map.update({int(pool.get('id')): pool})

    return pool_map


def get_onchain_balance(height, address, denom):
    request_msg = query_bank.QueryBalanceRequest()
    request_msg.address = address
    request_msg.denom = denom
    response_msg = query_bank.QueryBalanceResponse
    done = False
    while not done:
        try:
            balance = _send_abci_query(request_msg=request_msg,
                                        path="/cosmos.bank.v1beta1.Query/Balance",
                                        response_msg=response_msg,
                                        height=height)
            done = True
        except Exception as e:
            print(e)
            print("error while fetching balance data for address {} at height {}. Retrying...".format(address, height))

    return int(balance["balance"]["amount"])

#### define functions to perform join/exit computations

In [115]:
# define functions to compute correct amounts when doing join/exit
# uses `jigu.core` (https://jigu.terra.money/docs/data/sdk.html#decimal-numbers) for a better approximation of `sdk.Dec`
# the logic for join/exit is copied 1~=1 from Osmosis

from decimal import Decimal
from jigu.core import Dec
from math import ceil
from copy import deepcopy


def pool_balance_of_denom(denom, pool):
    pool_assets = pool["poolAssets"]
    for pool_asset in pool_assets:
        if pool_asset["token"]["denom"] == denom:
            return Dec(pool_asset["token"]["amount"])
    raise Exception("cannot find denom {} in pool {}".format(denom, pool["id"]))


def pool_asset_of_denom(denom, pool):
    pool_assets = pool["poolAssets"]
    for pool_asset in pool_assets:
        if pool_asset["token"]["denom"] == denom:
            return pool_asset
    raise Exception("cannot find denom {} in pool {}".format(denom, pool["id"]))


def solve_constant_func_invariant(amt_x_before, amt_x_after, amt_x_weight, amt_y_before, amt_y_weight):
	weight_ratio = Dec(amt_x_weight) / Dec(amt_y_weight)
	y = Dec(amt_x_before) / Dec(amt_x_after)
	return Dec(amt_y_before) * (Dec(1) - Dec(Decimal(str(y)) ** Decimal(str(weight_ratio))))


# defines logic to compute the correct amount of shares when performing a join
def correct_join_pool(tokens_in, pool):
    # MaximalExactRatioJoin
    min_share_ratio = Dec(10**100)
    max_share_ratio = Dec(0)
    coin_share_ratios = dict()
    for coin in tokens_in:
        amt_in = Dec(coin["amount"])
        share_ratio = amt_in / pool_balance_of_denom(coin["denom"], pool)
        if share_ratio < min_share_ratio:
            min_share_ratio = share_ratio
        if share_ratio > max_share_ratio:
            max_share_ratio = share_ratio
        coin_share_ratios[coin["denom"]] = share_ratio
    num_shares = int((min_share_ratio * Dec(pool["totalShares"]["amount"])))
    rem_coins = list()
    added_coins = list()
    if min_share_ratio != max_share_ratio:
        for coin in tokens_in:
            if coin_share_ratios[coin["denom"]] == min_share_ratio:
                added_coins.append(coin)
                continue
        used_amt = ceil(min_share_ratio * pool_balance_of_denom(coin["denom"], pool))
        new_amt = int(coin["amount"]) - int(used_amt)
        added_coins.append({"denom": coin["denom"], "amount": used_amt})
        if new_amt > 0:
            rem_coins.append({"denom": coin["denom"], "amount": new_amt})

    updated_pool = deepcopy(pool)
    updated_pool["totalShares"]["amount"] = int(updated_pool["totalShares"]["amount"]) +  num_shares
    for coin in added_coins:
        for pool_asset in updated_pool["poolAssets"]:
            if pool_asset["token"]["denom"] == coin["denom"]:
                pool_asset["token"]["amount"] = int(pool_asset["token"]["amount"]) + int(coin["amount"])

    # calcJoinSingleAssetTokensIn
    for coin in rem_coins:
        pool_asset = pool_asset_of_denom(coin["denom"], updated_pool)
        pool_balance = Dec(pool_asset["token"]["amount"])
        normalized_weight = Dec(pool_asset["weight"]) / Dec(pool["totalWeight"])
        swap_fee = pool["poolParams"]["swapFee"]
        swap_fee = Dec("0." + "0" * (18 - len(swap_fee)) + swap_fee)
        token_amt_after_fee = Dec(coin["amount"]) * (Dec(1) - (Dec(1) - normalized_weight) * swap_fee)
        new_shares = -solve_constant_func_invariant(pool_balance + token_amt_after_fee, pool_balance, normalized_weight, updated_pool["totalShares"]["amount"], 1)
        num_shares += int(new_shares)

    return num_shares


# compute liquidity removed as a result of burning the provided amount of shares. Used to compute shares on corrected pool history.
def burn_shares(pool, shares_burned_amt):
    shares_burned_amt = Dec(shares_burned_amt)
    exit_fee = pool["poolParams"]["exitFee"]
    exit_fee = Dec("0." + "0" * (18 - len(exit_fee)) + exit_fee)
    if exit_fee > 0:
        shares = shares_burned_amt * (Dec(1) - exit_fee)
    else:
        shares = shares_burned_amt
    share_out_ratio = shares / Dec(pool["totalShares"]["amount"])
    tokens_out = list()
    for pool_asset in pool["poolAssets"]:
        amt_out = int(share_out_ratio * Dec(pool_asset["token"]["amount"]))
        tokens_out.append({"denom": pool_asset["token"]["denom"], "amount": amt_out}) 
    return tokens_out

#### define helper functions to handle coins and pool assets

In [129]:
# quick and dirty function to split a normalized coin string into denom and amount
def parse_token_str(token_str):
    denom_index_start = token_str.find(next(filter(str.isalpha, token_str)))
    amount, denom = int(token_str[:denom_index_start]), token_str[denom_index_start:]
    return {"amount": amount, "denom": denom}


# another quick and dirty function to perform `sdk.Coins.AddCoins()`
def sub_coins(a, b):
    tolerance = 99 # small tolerance for subtraction. Values could slightly diverge due to approximation.
    res = list()
    for coinb in b:
        for coina in a:
            if coina["denom"] == coinb["denom"]:
                if int(coina["amount"]) + tolerance < int(coinb["amount"]):
                    raise Exception("would result in coin with negative amount -- {} -- {}".format(a, b))
                amt = int(coina["amount"]) - int(coinb["amount"])
                amt = 0 if amt < 0 else amt # clamp (< tolerance) negative values to 0
                if amt != 0:
                    # only add entry if amount is not 0
                    res.append({"denom": coina["denom"], "amount": amt})
                break
        else:
            raise Exception("Unable to find denom {} in primary coin list".format(coinb["denom"]))

    return res


# update the pool assets using the provided lists of tokens in and tokens out
def update_pool_assets(tokens_in, tokens_out, pool):
    pool_assets = pool["poolAssets"]
    for token_in in tokens_in:
        for pool_asset in pool_assets:
            if pool_asset["token"]["denom"] == token_in["denom"]:
               pool_asset["token"]["amount"] = str(int(pool_asset["token"]["amount"]) + int(token_in["amount"]))
               break
        else:
            raise Exception("cannot find asset {} in pool {}".format(token_in["denom"], pool["id"]))
    for token_out in tokens_out:
        for pool_asset in pool_assets:
            if pool_asset["token"]["denom"] == token_out["denom"]:
               pool_asset["token"]["amount"] = str(int(pool_asset["token"]["amount"]) - int(token_out["amount"]))
               break
        else:
            raise Exception("cannot find asset {} in pool {}".format(token_out["denom"], pool["id"]))

#### define main txs processing logic

In [130]:
# apply a tx on both "proper" and "corrupted" pools. The "corrupted pools" data only use real block data (also for joins) and is used for consistency checks with onchain data
def apply_tx(tx, proper_pools, corrupted_pools, affected_wallets):
    msg_type = tx["@type"]
    tx_height = int(tx["height"])
    
    if "MsgSwapExactAmountIn" in msg_type or "MsgSwapExactAmountOut" in msg_type:
        # just update pool balances using execution data
        pool_id = int(tx["poolId"])
        tokens_in = [parse_token_str(tx["tokensIn"])] # can only be one token
        tokens_out = [parse_token_str(tx["tokensOut"])] # can only be one token
        update_pool_assets(tokens_in, tokens_out, proper_pools[pool_id])
        update_pool_assets(tokens_in, tokens_out, corrupted_pools[pool_id])

    elif "MsgJoinSwapExternAmountIn" in msg_type or "MsgJoinSwapShareAmountOut" in msg_type:
        # these joins are not bugged, we can just use execution data
        pool_id = int(tx["poolId"])
        tokens_in = [parse_token_str(tx["tokensIn"])] # can only be one token
        update_pool_assets(tokens_in, [], proper_pools[pool_id])
        update_pool_assets(tokens_in, [], corrupted_pools[pool_id])
        shares_added = parse_token_str(tx["tokensOut"])
        if proper_pools[pool_id]["totalShares"]["denom"] != shares_added["denom"]:
           raise Exception("invalid shares minted for tx type {} hash {} height".format(tx["@type"], tx["txhash"], tx_height)) 
        proper_pools[pool_id]["totalShares"]["amount"] = str(int(proper_pools[pool_id]["totalShares"]["amount"]) + int(shares_added["amount"]))
        corrupted_pools[pool_id]["totalShares"]["amount"] = str(int(corrupted_pools[pool_id]["totalShares"]["amount"]) + int(shares_added["amount"]))
    
    elif "MsgExitSwapExternAmountOut" in msg_type or "MsgExitSwapShareAmountIn" in msg_type:
        raise Exception("not implemented - should not be needed")

    elif "MsgExitPool" in msg_type or "MsgUnPoolWhitelistedPool" in msg_type:
        addr = tx["sender"]
        pool_id = int(tx["poolId"])
        tokens_out = [parse_token_str(t) for t in tx["tokensOut"].split(",")] 
        shares_burned = parse_token_str(tx["tokensIn"])
        if proper_pools[pool_id]["totalShares"]["denom"] != shares_burned["denom"]:
           raise Exception("invalid shares minted for tx type {} hash {} height".format(tx["@type"], tx["txhash"], tx_height)) 
        if addr in affected_wallets and pool_id in affected_wallets[addr] and "shares_inflated" in affected_wallets[addr][pool_id]:
            only_valid_shares = False
            # if entry is in `affected_wallets`, the account performed a join earlier
            shares_inflated_amt = affected_wallets[addr][pool_id]["shares_inflated"]
            shares_valid_amt = affected_wallets[addr][pool_id]["shares_valid"]
            if shares_burned["amount"] > shares_valid_amt + shares_inflated_amt:
                # burning more shares than what was minted post-upgrade, hence using shares created prior to the upgrade.
                # we check the outstanding shares balance of the account at the previous height (querying onchain data) to see if the account
                # had enough shares (minus the inflated) to cover for the exit.
                # In such a case, we use valid shares and leave the inflated shres in the outstanding balance.
                # The user will just have a LP shares debt (that will have to be burned) but not a liquidity debt.
                # We do this only here so we limit to querying for balance only if strictly needed.
                shares_balance_amt = get_onchain_balance(tx_height-1, addr, shares_burned["denom"])
                if shares_burned["amount"] > shares_balance_amt:
                    raise Exception("this should not happen")
                shares_valid_amt = shares_balance_amt - shares_inflated_amt 
                if shares_valid_amt > shares_burned["amount"]:
                    # clamp to a max of shares_burned["amount"]
                    shares_valid_amt = shares_burned["amount"]
            if shares_burned["amount"] >= shares_valid_amt:
                # update outstanding shares in `affected_wallets`
                affected_wallets[addr][pool_id]["shares_inflated"] = shares_inflated_amt + shares_valid_amt - shares_burned["amount"]
                affected_wallets[addr][pool_id]["shares_valid"] = 0
            else:
                # in case the shares burned are less than the valid shares available, use those first (if inflated shares are never claimed,
                # they can simply be burned)
                only_valid_shares = True
                shares_valid_amt = shares_burned["amount"]
                affected_wallets[addr][pool_id]["shares_valid"] -= shares_valid_amt
            # simulate burning correct number of shares
            correct_tokens_out = burn_shares(proper_pools[pool_id], shares_valid_amt)
            if only_valid_shares:
                # the tx is using only valid shares, so we will compute a refund.
                # If the wallet claims the inflated shares in the future, the net will be a debt and not a credit towards the pool
                diff = sub_coins(correct_tokens_out, tokens_out)
                if diff: 
                   affected_wallets[addr][pool_id]["tokens_owed"] = diff 
            else:
                # add entry for stolen amount
                try:
                    affected_wallets[addr][pool_id]["tokens_stolen"] = sub_coins(tokens_out, correct_tokens_out)
                except:
                    # negative amounts? print out exception, so we can investigate
                    print("found exception for tx: {}, exploit exit results in less coin than what owed [{}, {}]".format(tx.txhash, correct_tokens_out, shares_valid_amt))
                    # we assume no tokens stolen, but give no credit
        else:
            # this exit does not happen after a v9 join, so the account is possibly due a refund
            shares_valid_amt = shares_burned["amount"]
            correct_tokens_out = burn_shares(proper_pools[pool_id], shares_valid_amt)
            diff = sub_coins(correct_tokens_out, tokens_out)
            if diff:
                if addr not in affected_wallets:
                    affected_wallets[addr] = {pool_id: {}}
                elif pool_id not in affected_wallets[addr]:
                    affected_wallets[addr][pool_id] = {}
                affected_wallets[addr][pool_id]["tokens_owed"] = diff
        # update corrected pool with valid data
        update_pool_assets([], correct_tokens_out, proper_pools[pool_id])
        proper_pools[pool_id]["totalShares"]["amount"] = str(int(proper_pools[pool_id]["totalShares"]["amount"]) - int(shares_valid_amt))
        # update pool used for consistency check with execution data
        update_pool_assets([], tokens_out, corrupted_pools[pool_id])
        corrupted_pools[pool_id]["totalShares"]["amount"] = str(int(corrupted_pools[pool_id]["totalShares"]["amount"]) - int(shares_burned["amount"]))

    elif "MsgJoinPool" in msg_type:
        addr = tx["sender"]
        pool_id = int(tx["poolId"])
        tokens_in = [parse_token_str(t) for t in tx["tokensIn"].split(",")]
        shares_added = parse_token_str(tx["tokensOut"])
        if proper_pools[pool_id]["totalShares"]["denom"] != shares_added["denom"]:
           raise Exception("invalid shares minted for tx type {} hash {} height".format(tx["@type"], tx["txhash"], tx["height"]))
        # update correct pool data with proper shares amount, and use exec data to update consistency-check pool
        corrupted_pools[pool_id]["totalShares"]["amount"] = str(int(corrupted_pools[pool_id]["totalShares"]["amount"]) + int(shares_added["amount"]))
        correct_shares_amt = correct_join_pool(tokens_in, proper_pools[pool_id])
        proper_pools[pool_id]["totalShares"]["amount"] = str(int(proper_pools[pool_id]["totalShares"]["amount"]) + int(correct_shares_amt))
        update_pool_assets(tokens_in, [], proper_pools[pool_id])
        update_pool_assets(tokens_in, [], corrupted_pools[pool_id])
        # add entry for wallet-pool to track creation of inflated shares
        if addr not in affected_wallets:
            affected_wallets[addr] = {pool_id: {"shares_valid": 0, "shares_inflated": 0}}
        elif pool_id not in affected_wallets[addr]:
            affected_wallets[addr][pool_id] = {"shares_valid": 0, "shares_inflated": 0}
        elif "shares_valid" not in affected_wallets[addr][pool_id]:
            affected_wallets[addr][pool_id]["shares_valid"] = 0
            affected_wallets[addr][pool_id]["shares_inflated"] = 0  
        affected_wallets[addr][pool_id]["shares_valid"] += correct_shares_amt
        affected_wallets[addr][pool_id]["shares_inflated"] += shares_added["amount"] - correct_shares_amt

### run analysis

this is done sequentially, row-by-row. It can take a while...

In [131]:
# query initial pools data (before upgrade takes effect)
initial_pools_data = get_onchain_pool_data(UPGRADE_HEIGHT-1)
# update swap fees on pool 1 (happened at update time)
initial_pools_data[1]["poolParams"]["swapFee"] = "2000000000000000"

corrected_pools_history = list()
affected_wallets = dict()
proper_pools = initial_pools_data
corrupted_pools = deepcopy(initial_pools_data) # this is to compare with onchain data for consistency
## print("processing block:", UPGRADE_HEIGHT, "remaining:", "{:5d}".format(HALT_HEIGHT - UPGRADE_HEIGHT), end='\r')
last_index = len(df) - 1
for index, tx in df.iterrows():
    # apply tx to update data structures
    apply_tx(tx, proper_pools, corrupted_pools, affected_wallets)
    # get netx tx height
    curr_height = int(tx["height"])
    if index == last_index:
        print("\ndone")
        continue # skip rest of the code as it would not be valid for the last element
    ## next_height = int(df.loc[index+1]["height"])
    ## if curr_height < next_height: # disabled to speedup computation
        # next tx is in a new block, finalize pools for this height
        ## print("processing block:", next_height, "remaining:", "{:5d}".format(HALT_HEIGHT - next_height), end='\r')
        ## corrected_pools_history.append(deepcopy(proper_pools))
        # perform sanity check of "corrupted" pools state against onchain data
        ## onchain_pools_data = get_onchain_pool_data(curr_height)
        ## if onchain_pools_data != corrupted_pools:
        ##     raise Exception("inconsistency found between simulated and onchain data for pools at height {}".format(curr_height))

found exception for tx: 4BD39A0E0382439126CF97003C0B0FD9EA5EA3E3C3615122E97FA326BE4923E2, exploit exit results in less coin than what owed [[{'denom': 'ibc/EA1D43981D5C9A1C4AAEA9C23BB1D4FA126BA9BC7020A25E0AE4AA841EA25DC5', 'amount': 32022329348614615}, {'denom': 'uosmo', 'amount': 50939375}], 4178386097894532289]


Exception: would result in coin with negative amount -- [{'denom': 'ibc/EA1D43981D5C9A1C4AAEA9C23BB1D4FA126BA9BC7020A25E0AE4AA841EA25DC5', 'amount': 32022329348614615}, {'denom': 'uosmo', 'amount': 50939375}] -- [{'amount': 32014949074096627, 'denom': 'ibc/EA1D43981D5C9A1C4AAEA9C23BB1D4FA126BA9BC7020A25E0AE4AA841EA25DC5'}, {'amount': 50951139, 'denom': 'uosmo'}]

In [124]:
tx

True

In [114]:
addr = tx["sender"]
pool_id = int(tx["poolId"])
tokens_out = [parse_token_str(t) for t in tx["tokensOut"].split(",")]
shares_burned = parse_token_str(tx["tokensIn"])
shares_valid_amt = shares_burned["amount"]
correct_tokens_out = burn_shares(proper_pools[pool_id], 2856057366418290176)
print(correct_tokens_out)
print("--")
print(tokens_out)

[{'denom': 'ibc/EA1D43981D5C9A1C4AAEA9C23BB1D4FA126BA9BC7020A25E0AE4AA841EA25DC5', 'amount': 21888261994759431}, {'denom': 'uosmo', 'amount': 34818653}]
--
[{'amount': 32014949074096627, 'denom': 'ibc/EA1D43981D5C9A1C4AAEA9C23BB1D4FA126BA9BC7020A25E0AE4AA841EA25DC5'}, {'amount': 50951139, 'denom': 'uosmo'}]


In [None]:
diff_items = {k: onchain_pools_data[k] for k in onchain_pools_data if k in corrupted_pools and onchain_pools_data[k] != corrupted_pools[k]}
diff_items.keys()

In [None]:
onchain_pools_data[712]

In [None]:
corrupted_pools[712]

## 5. Process analysis retults

In [99]:
# generate pandas DataFrame
addrpool_df = pd.DataFrame.from_dict([[addr, pool_id, pool_data] for addr, addr_pools in affected_wallets.items() for pool_id, pool_data in addr_pools.items()])
addrpool_df.columns = ["address", "pool_id", "pool_data"]

# expand pool data
addrpool_df = addrpool_df.join(pd.json_normalize(addrpool_df["pool_data"])[["shares_inflated", "tokens_stolen", "tokens_owed"]]).drop(columns=["pool_data"])

addrpool_df.head()

Unnamed: 0,address,pool_id,shares_inflated,tokens_stolen,tokens_owed
0,osmo1lj72jml6hw635cek9hep62u3hvfwng9e2e3r09,606,726942355452044093187,,
1,osmo1rsks3gfv2rcf3atj8gs09yyhrftyn7j5vuu9v7,712,2501715123556921241,,
2,osmo1jxpztalf3skjftyrlf3tpd3c94g2nqkanr7c3a,648,0,,
3,osmo1vtmpjl927zgwa8ht6mt75zsfyx75xdqpcu4atn,1,1108487209230434290,,
4,osmo1z98eg2ztdp2glyla62629nrlvczg8s7f8sgpm5,597,0,[{'denom': 'ibc/FE2CD1E6828EC0FAB8AF39BAC45BC2...,


In [100]:
def token_to_str(tokens_list):
    if type(tokens_list) is not list:
        return ""
    return ",".join([str(token["amount"]) + token["denom"] for token in tokens_list])

def net_difference_tokens(row):
    stolen = row[3]
    owed = row[4]
    if type(stolen) is list and type(owed) is list:
        # we assume stolen > owed
        stolen = sub_coins(stolen, owed)
        owed = ""
    return stolen, owed

# compute net difference if entries in both `tokens_stolen` and `tokens_owned`
addrpool_df["tokens_stolen"], addrpool_df["tokens_owed"] = zip(*addrpool_df.apply(net_difference_tokens, axis=1))

# convert tokens list into strings
addrpool_df.tokens_owed = addrpool_df.tokens_owed.apply(token_to_str)
addrpool_df.tokens_stolen = addrpool_df.tokens_stolen.apply(token_to_str)

addrpool_df.head()

Unnamed: 0,address,pool_id,shares_inflated,tokens_stolen,tokens_owed
0,osmo1lj72jml6hw635cek9hep62u3hvfwng9e2e3r09,606,726942355452044093187,,
1,osmo1rsks3gfv2rcf3atj8gs09yyhrftyn7j5vuu9v7,712,2501715123556921241,,
2,osmo1jxpztalf3skjftyrlf3tpd3c94g2nqkanr7c3a,648,0,,
3,osmo1vtmpjl927zgwa8ht6mt75zsfyx75xdqpcu4atn,1,1108487209230434290,,
4,osmo1z98eg2ztdp2glyla62629nrlvczg8s7f8sgpm5,597,0,252939555ibc/FE2CD1E6828EC0FAB8AF39BAC45BC25B9...,


## 6. Save wallets info as `wallets-pools.csv`

In [101]:
addrpool_df.to_csv("csv/wallets-pools.csv", index=None)

## 7. APPENDIX

### transactions occurrences

In [95]:
df.groupby("@type").size().reset_index(name="number_of_txs")

Unnamed: 0,@type,number_of_txs
0,/osmosis.gamm.v1beta1.MsgExitPool,832
1,/osmosis.gamm.v1beta1.MsgJoinPool,1972
2,/osmosis.gamm.v1beta1.MsgJoinSwapExternAmountIn,2018
3,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,39187
4,/osmosis.gamm.v1beta1.MsgSwapExactAmountOut,455
5,/osmosis.superfluid.MsgUnPoolWhitelistedPool,19


### group transactions by sender and sort by number of txs

In [96]:
(df.groupby(["@type", "sender"])
    .size()
    .reset_index(name="number_of_txs")
    .sort_values(by=["number_of_txs"], ascending=False))

Unnamed: 0,@type,sender,number_of_txs
7263,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1lyncm90mfw9fp9xpdnxgu6sjkh8egnzsqm3l5m,2727
6393,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1h9ac2c382h4hyadx3mlqsgc8wus53dn67kl9p9,1780
5864,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1f6k7kcs2jcfpkyr86vw3hdqtz6775ekfahjm7u,1526
8086,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo1qmnw9h2fd0h2sqsq2h937n0c4kjz4jlqf3x9w4,1487
3964,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo15vdjje8009ly9rudapxjcht5h9vshc7hw3a7zy,1044
...,...,...,...
3272,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo12wkfsyfc6et0ep7850fwcdwkxvsxv7nuyzeaav,1
3271,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo12whmqnezsqeupmfjfvshtfa77j29wwltntcnnh,1
3270,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo12wh0vevlqct7m8wvahzr9pf05xq34ye7rwngkv,1
3267,/osmosis.gamm.v1beta1.MsgSwapExactAmountIn,osmo12vn7u0wmkf67epmr427hdka76wvk7gejky9n8y,1


### find wallets-by-pools that did either join or exit. Count occurrences of joins/exits per pool

In [97]:
import math

exit_joins_df = (df[(df["@type"].str.contains("MsgJoinPool")) | (df["@type"].str.contains("MsgExitPool"))]
    .groupby(["@type", "sender", "poolId"])
    .size()
    .reset_index(name="number_of_txs")
    .sort_values(by=["number_of_txs"], ascending=False))
exit_joins_df = exit_joins_df[exit_joins_df["@type"].str.contains("MsgJoinPool")].merge(exit_joins_df[exit_joins_df["@type"].str.contains("MsgExitPool")], on=["sender", "poolId"], how="outer")
exit_joins_df.drop(columns=["@type_x", "@type_y"], inplace=True)
exit_joins_df.columns = ["address", "poolId", "joins", "exits"]

exit_joins_df.joins = exit_joins_df.joins.apply(lambda v: 0 if math.isnan(v) else int(v))
exit_joins_df.exits = exit_joins_df.exits.apply(lambda v: 0 if math.isnan(v) else int(v))

exit_joins_df

Unnamed: 0,address,poolId,joins,exits
0,osmo18qx59wy8s3ytax3e0akna934e86mw776vlzjtq,1,45,47
1,osmo1yglld3aary7lnrrn2dz7la84kmnmen4kpsxzay,712,43,42
2,osmo1hq8tlgq0kqz9e56532zghdhz7g8gtjymdltqer,678,28,28
3,osmo1jfxcl8ja3nnfjduqemptknz2j6nk6502zp3rte,561,25,25
4,osmo18qx59wy8s3ytax3e0akna934e86mw776vlzjtq,712,19,17
...,...,...,...,...
2139,osmo1cww8ylq7ge7ps8776qvugasd6yks3c7u6wqyht,561,0,1
2140,osmo1cwux6ehfyg98zj7x3vgr7gz33l2kye3n42su5t,1,0,1
2141,osmo1cwj3vqxk6p7e6ntxt8925m3l9d6l0jr554d3e7,1,0,1
2142,osmo1crykcdaxm3u4flnjxq0kmyye7uqy9tcesac6u2,651,0,1
