In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('final_rwa_data.csv')
df

Unnamed: 0,date,protocol,tvl,chain,usdc_mcap,usdt_mcap,dai_mcap,total_stablecoin_mcap,treasury_yield,m2_supply,tvl_share_by_chain,asset_type
0,2020-10-17,centrifuge,0,borrowed,1.472977e+10,2.711217e+09,3.389384e+08,1.777992e+10,0.78,18978.9,0.000000,Diversified
1,2020-10-17,centrifuge,20165,Ethereum,1.472977e+10,2.711217e+09,3.389384e+08,1.777992e+10,0.78,18978.9,100.000000,Diversified
2,2020-10-17,centrifuge,20165,All,1.472977e+10,2.711217e+09,3.389384e+08,1.777992e+10,0.78,18978.9,100.000000,Diversified
3,2020-10-17,centrifuge,0,Ethereum-borrowed,1.472977e+10,2.711217e+09,3.389384e+08,1.777992e+10,0.78,18978.9,0.000000,Diversified
4,2020-10-18,centrifuge,0,borrowed,1.472977e+10,2.733321e+09,3.551186e+08,1.781821e+10,0.78,18978.9,0.000000,Diversified
...,...,...,...,...,...,...,...,...,...,...,...,...
59756,2025-11-25,ribbon-lend,55,borrowed,1.845206e+11,7.431023e+10,4.844847e+08,2.593154e+11,4.04,22298.1,0.000003,Private Credit
59757,2025-11-25,centrifuge,0,Arbitrum,1.845206e+11,7.431023e+10,4.844847e+08,2.593154e+11,4.04,22298.1,0.000000,Diversified
59758,2025-11-25,centrifuge,49966677,Plume Mainnet,1.845206e+11,7.431023e+10,4.844847e+08,2.593154e+11,4.04,22298.1,100.000000,Diversified
59759,2025-11-25,swarm-markets,63337,All,1.845206e+11,7.431023e+10,4.844847e+08,2.593154e+11,4.04,22298.1,0.001092,Diversified


In [3]:
# Parse date and sort
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date")

# For overall protocol-level TVL we use chain == "All"
df_all = df[df["chain"] == "All"].copy()

macro_cols = [
    "usdc_mcap",
    "usdt_mcap",
    "dai_mcap",
    "total_stablecoin_mcap",
    "treasury_yield",
    "m2_supply",
]

# Sanity check
print("Date range:", df["date"].min(), "->", df["date"].max())
print("Protocols:", df["protocol"].unique())
print("Asset types:", df["asset_type"].unique())
print("Chains:", df["chain"].unique())

Date range: 2020-10-17 00:00:00 -> 2025-11-25 00:00:00
Protocols: ['centrifuge' 'truefi' 'maple-finance' 'goldfinch' 'swarm-markets'
 'credix' 'polytrade' 'ribbon-lend' 'ondo-finance']
Asset types: ['Diversified' 'Private Credit' 'Treasury']
Chains: ['borrowed' 'Ethereum' 'All' 'Ethereum-borrowed' 'Ethereum-staking'
 'staking' 'Polygon' 'Solana' 'Solana-borrowed' 'Mantle' 'Sui' 'Aptos'
 'Arbitrum' 'Arbitrum-borrowed' 'Noble' 'Base' 'XRPL' 'BSC' 'Avalanche'
 'Plume Mainnet' 'Stellar']


In [4]:
# -------------------------------------------------------------------
# 1) Macro Liquidity vs RWA TVL (for multi-line chart)
#    - RWA TVL (sum over all protocols, chain == "All")
#    - Stablecoin caps
#    - Treasury yield, M2
# Output: viz1_macro_vs_rwa.csv
# -------------------------------------------------------------------

# RWA TVL per day (sum over all protocols, chain == "All")
rwa_tvl_daily = (
    df_all
    .groupby("date")["tvl"]
    .sum()
    .rename("rwa_tvl")
)

# Macro series per day (they are duplicated across rows, so take first)
macro_daily = (
    df_all
    .groupby("date")[macro_cols]
    .first()
)

viz1 = (
    pd.concat([rwa_tvl_daily, macro_daily], axis=1)
    .reset_index()
    .sort_values("date")
)

# Export for D3
viz1.to_csv("viz1_macro_vs_rwa.csv", index=False)

# D3 expectation:
# - Each row = one day
# - Columns: date, rwa_tvl, usdc_mcap, usdt_mcap, dai_mcap,
#            total_stablecoin_mcap, treasury_yield, m2_supply
# - Use date as x, others as separate series (multi-line chart / dual-axis)



In [10]:
viz1.head()

Unnamed: 0,date,rwa_tvl,usdc_mcap,usdt_mcap,dai_mcap,total_stablecoin_mcap,treasury_yield,m2_supply
0,2020-10-17,20165,14729770000.0,2711217000.0,338938397.0,17779920000.0,0.78,18978.9
1,2020-10-18,20179,14729770000.0,2733321000.0,355118575.0,17818210000.0,0.78,18978.9
2,2020-10-19,20163,14879770000.0,2742216000.0,330837573.0,17952820000.0,0.78,18978.9
3,2020-10-20,20135,15179770000.0,2742213000.0,330594649.0,18252580000.0,0.81,18978.9
4,2020-10-21,27818,15179770000.0,2809284000.0,331803319.0,18320850000.0,0.83,18978.9


In [5]:
# -------------------------------------------------------------------
# 2) Capital Allocation by Asset Type (Treasury vs Private Credit vs Diversified)
#    - chain == "All"
#    - For each date & asset_type: TVL and share of total RWA TVL that day
# Output: viz2_asset_type_shares.csv
# -------------------------------------------------------------------

viz2 = (
    df_all
    .groupby(["date", "asset_type"])["tvl"]
    .sum()
    .reset_index()
    .sort_values(["date", "asset_type"])
)

# Total RWA TVL per date (chain == "All")
total_by_date = viz2.groupby("date")["tvl"].transform("sum")
viz2["share"] = viz2["tvl"] / total_by_date

viz2.to_csv("viz2_asset_type_shares.csv", index=False)

# D3 expectation:
# - Tidy/long format:
#   date, asset_type, tvl, share
# - For a 100% stacked area chart:
#   - x: date
#   - y: share
#   - color: asset_type
# - Or use tvl directly for stacked area of absolute TVL

In [11]:
viz2.head()

Unnamed: 0,date,asset_type,tvl,share
0,2020-10-17,Diversified,20165,1.0
1,2020-10-18,Diversified,20179,1.0
2,2020-10-19,Diversified,20163,1.0
3,2020-10-20,Diversified,20135,1.0
4,2020-10-21,Diversified,27818,1.0


In [6]:


# -------------------------------------------------------------------
# 3) Chain Allocation Over Time
#    - Pure execution chains (Ethereum, Solana, Polygon, etc.)
#    - Exclude synthetic categories: "All", "borrowed", "staking", "X-borrowed", "X-staking"
#    - For each date & chain: TVL and share within tracked chains that day
# Output: viz3_chain_allocation.csv
# -------------------------------------------------------------------

all_chains = sorted(df["chain"].unique())
pure_chains = [
    c for c in all_chains
    if "-" not in c and c not in ["All", "borrowed", "staking"]
]

df_chain = df[df["chain"].isin(pure_chains)].copy()

viz3 = (
    df_chain
    .groupby(["date", "chain"])["tvl"]
    .sum()
    .reset_index()
    .sort_values(["date", "chain"])
)

# Share within chains per day (just among these pure chains)
chain_total_by_date = viz3.groupby("date")["tvl"].transform("sum")
viz3["share_within_chains"] = viz3["tvl"] / chain_total_by_date

viz3.to_csv("viz3_chain_allocation.csv", index=False)

# D3 expectation:
# - Tidy/long format:
#   date, chain, tvl, share_within_chains
# - For area chart or stacked area:
#   - x: date
#   - y: tvl or share_within_chains
#   - color: chain
# - Add legend + ability to toggle chains for readability


In [12]:
viz3.head()

Unnamed: 0,date,chain,tvl,share_within_chains
0,2020-10-17,Ethereum,20165,1.0
1,2020-10-18,Ethereum,20179,1.0
2,2020-10-19,Ethereum,20163,1.0
3,2020-10-20,Ethereum,20135,1.0
4,2020-10-21,Ethereum,27818,1.0


In [7]:

# -------------------------------------------------------------------
# 4) Protocol Concentration
#    A) Full time series by protocol (for lines or sparklines)
#    B) Snapshot on latest date (for a bar chart of concentration)
# Output:
#   viz4_protocol_timeseries.csv
#   viz4_protocol_latest.csv
# -------------------------------------------------------------------

# A) Time series: TVL by protocol, chain == "All"
viz4_ts = (
    df_all
    .groupby(["date", "protocol"])["tvl"]
    .sum()
    .reset_index()
    .sort_values(["date", "protocol"])
)

viz4_ts.to_csv("viz4_protocol_timeseries.csv", index=False)

# B) Snapshot at latest date available
latest_date = viz4_ts["date"].max()
viz4_latest = viz4_ts[viz4_ts["date"] == latest_date].copy()

total_latest_tvl = viz4_latest["tvl"].sum()
viz4_latest["share_of_rwa"] = viz4_latest["tvl"] / total_latest_tvl

viz4_latest.to_csv("viz4_protocol_latest.csv", index=False)

print("Latest snapshot date for protocols:", latest_date.date())

# D3 expectation:
# - Timeseries CSV:
#   date, protocol, tvl
#   -> line chart per protocol (with color = protocol)
# - Latest CSV:
#   date, protocol, tvl, share_of_rwa
#   -> single-day bar chart sorted by tvl or share_of_rwa

Latest snapshot date for protocols: 2025-11-25


In [17]:
viz4_ts.head()

Unnamed: 0,date,protocol,tvl
0,2020-10-17,centrifuge,20165
1,2020-10-18,centrifuge,20179
2,2020-10-19,centrifuge,20163
3,2020-10-20,centrifuge,20135
4,2020-10-21,centrifuge,27818


In [9]:
# =====================================================================
# Viz 5 – Yield vs RWA Asset Composition (dual-axis time series)
# Creates: viz5_yield_timeseries.csv
# =====================================================================

df_viz5 = df.copy()

# Use only rows that have asset_type + tvl values
df_viz5 = df_viz5[df_viz5["tvl"] > 0].copy()

# Total TVL per date (used to compute share)
total_per_day = df_viz5.groupby("date")["tvl"].sum().rename("total_tvl")
df_viz5 = df_viz5.merge(total_per_day, on="date")

# Share of RWA capital per asset type per day
df_viz5["share_of_rwa"] = df_viz5["tvl"] / df_viz5["total_tvl"]

# The D3 visualization expects the yield column to be called "yield_10y"
df_viz5 = df_viz5.rename(columns={"treasury_yield": "yield_10y"})

# Final minimal structure required for Viz5
viz5 = df_viz5[[
    "date",
    "yield_10y",
    "asset_type",
    "share_of_rwa",
    "tvl"
]].sort_values(["date", "asset_type"])

viz5.to_csv("viz5_yield_timeseries.csv", index=False)

viz5.head()


Unnamed: 0,date,yield_10y,asset_type,share_of_rwa,tvl
0,2020-10-17,0.78,Diversified,0.5,20165
1,2020-10-17,0.78,Diversified,0.5,20165
2,2020-10-18,0.78,Diversified,0.5,20179
3,2020-10-18,0.78,Diversified,0.5,20179
4,2020-10-19,0.78,Diversified,0.5,20163
