# Crypto Exchange Analysis

This notebook reads 4 different tables generated by the data pipelines on Airflow and performs some analysis to extract insights.

**Note**: Operations will be filtering a specific date partition for sample purposes, the date can be changed on the code if desired.

In [37]:
#imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.float_format', '{:,.2f}'.format)

## Collecting each table

Data is partitioned by ingestion date used on the date column, here the whole data will be read so the pandas dataframe will contain all data generated throught the time the pipeline was active

In [38]:
df_exchanges = pd.read_parquet('/home/jovyan/data/crypto_ingestion/processed/exchanges/')
df_shared_markets = pd.read_parquet('/home/jovyan/data/crypto_ingestion/processed/shared_markets/')
df_market_30day_volume =  pd.read_parquet('/home/jovyan/data/crypto_ingestion/processed/market_30day_volume/')
df_exchange_30day_volume =  pd.read_parquet('/home/jovyan/data/crypto_ingestion/processed/exchange_30day_volume/')

In [51]:
df_exchanges

Unnamed: 0,exchange_id,exchange_name,year_established,country,trust_score,trust_score_rank,date
0,binance,Binance,2017,Cayman Islands,10,1,2025-08-10
1,gate,Gate,2013,Panama,10,2,2025-08-10
2,bybit_spot,Bybit,2018,British Virgin Islands,10,3,2025-08-10
3,bitget,Bitget,2018,Seychelles,10,4,2025-08-10
4,mxc,MEXC,2018,Seychelles,10,5,2025-08-10
5,okex,OKX,2017,Seychelles,10,6,2025-08-10
6,gdax,Coinbase Exchange,2012,United States,10,7,2025-08-10
7,kraken,Kraken,2011,United States,10,8,2025-08-10
8,huobi,HTX,2013,Seychelles,9,9,2025-08-10
9,crypto_com,Crypto.com Exchange,2019,Malta,9,10,2025-08-10


In [52]:
df_shared_markets

Unnamed: 0,exchange_id,market_id,base,target,name,date
0,okex,ETH_USD,ETH,USD,ETH/USD,2025-08-10
1,okex,BTC_USD,BTC,USD,BTC/USD,2025-08-10
2,okex,XRP_USD,XRP,USD,XRP/USD,2025-08-10
3,gdax,ETH_USD,ETH,USD,ETH/USD,2025-08-10
4,gdax,BTC_USD,BTC,USD,BTC/USD,2025-08-10
5,gdax,XRP_USD,XRP,USD,XRP/USD,2025-08-10
6,kraken,ETH_USD,ETH,USD,ETH/USD,2025-08-10
7,kraken,XRP_USD,XRP,USD,XRP/USD,2025-08-10
8,crypto_com,ETH_USD,ETH,USD,ETH/USD,2025-08-10
9,crypto_com,BTC_USD,BTC,USD,BTC/USD,2025-08-10


In [53]:
df_market_30day_volume

Unnamed: 0,market_id,timestamp,volume,date
0,ETH_USD,2025-07-12 00:00:00,42455550664.55,2025-08-10
1,ETH_USD,2025-07-13 00:00:00,17472135946.60,2025-08-10
2,ETH_USD,2025-07-14 00:00:00,18605172049.09,2025-08-10
3,ETH_USD,2025-07-15 00:00:00,41336421998.04,2025-08-10
4,ETH_USD,2025-07-16 00:00:00,43416230283.77,2025-08-10
...,...,...,...,...
336,XRP_USD,2025-08-07 00:00:00,4082166604.03,2025-08-10
337,XRP_USD,2025-08-08 00:00:00,7757095979.62,2025-08-10
338,XRP_USD,2025-08-09 00:00:00,8889928900.44,2025-08-10
339,XRP_USD,2025-08-10 00:00:00,4765475379.04,2025-08-10


In [54]:
df_exchange_30day_volume

Unnamed: 0,exchange_id,timestamp,volume,date
0,binance,2025-07-12 10:20:00,213596.66,2025-08-10
1,binance,2025-07-13 10:20:00,120523.79,2025-08-10
2,binance,2025-07-14 10:20:00,213346.00,2025-08-10
3,binance,2025-07-15 10:20:00,240031.75,2025-08-10
4,binance,2025-07-16 10:20:00,224760.32,2025-08-10
...,...,...,...,...
295,crypto_com,2025-08-06 10:20:00,28695.61,2025-08-10
296,crypto_com,2025-08-07 10:20:00,20168.57,2025-08-10
297,crypto_com,2025-08-08 10:20:00,30413.26,2025-08-10
298,crypto_com,2025-08-09 10:20:00,21560.63,2025-08-10


## Top 10 exchanges by BTC Volume on 2025-08-10

In [39]:
df_exchange_30day_volume_filtered = df_exchange_30day_volume[df_exchange_30day_volume['date'] == '2025-08-10']
total_volume = df_exchange_30day_volume.groupby('exchange_id')['volume'].sum().reset_index()
top_exchanges = total_volume.nlargest(10, 'volume')

top_exchanges

Unnamed: 0,exchange_id,volume
0,binance,6252200.19
4,gate,1284729.19
8,mxc,1277680.32
1,bitget,1183793.84
2,bybit_spot,1132948.65
9,okex,954503.45
5,gdax,905818.44
6,huobi,850791.79
3,crypto_com,839265.18
7,kraken,386911.23


## Trust Score vs BTC Volume 30 day on 2025-08-10

In [40]:
df_exchange_30day_volume_filtered = df_exchange_30day_volume[df_exchange_30day_volume['date'] == '2025-08-10']
df_exchanges_filtered = df_exchanges[df_exchanges['date'] == '2025-08-10']

df_volume_exchange_merged = df_exchange_30day_volume_filtered.merge(df_exchanges_filtered, on="exchange_id")
df_volume_exchange_merged_grouped = df_volume_exchange_merged.groupby(["exchange_name", "trust_score"], as_index=False)["volume"].sum()
df_volume_exchange_merged_grouped.sort_values(by=["trust_score", "volume"], ascending=[False, False])

Unnamed: 0,exchange_name,trust_score,volume
0,Binance,10,6252200.19
5,Gate,10,1284729.19
8,MEXC,10,1277680.32
1,Bitget,10,1183793.84
2,Bybit,10,1132948.65
9,OKX,10,954503.45
3,Coinbase Exchange,10,905818.44
7,Kraken,10,386911.23
6,HTX,9,850791.79
4,Crypto.com Exchange,9,839265.18


### Market concentration by exchange considering 30 day shared market volume on 2025-08-10

In [50]:
df_shared_markets_filtered = df_shared_markets[df_shared_markets['date'] == '2025-08-10']
df_market_30day_volume_filtered = df_market_30day_volume[df_market_30day_volume['date'] == '2025-08-10']
df_exchanges_filtered = df_exchanges[df_exchanges['date'] == '2025-08-10']

df_volume_market_merged = df_market_30day_volume_filtered.merge(df_shared_markets_filtered, on="market_id")\
.groupby(["exchange_id", "base", "target"], as_index=False)["volume"].sum()

total_per_exchange = df_volume_market_merged.groupby("exchange_id")["volume"].sum().rename("total_exchange_volume")
df_volume_market_merged = df_volume_market_merged.merge(total_per_exchange, on="exchange_id")
df_volume_market_merged["pct_of_exchange_volume"] = round((df_volume_market_merged["volume"] / df_volume_market_merged["total_exchange_volume"]) * 100, 2)

df_volume_market_merged = df_volume_market_merged.merge(df_exchanges_filtered[["exchange_id", "exchange_name"]], on="exchange_id")
df3_top_pct = df_volume_market_merged.sort_values(["exchange_id", "pct_of_exchange_volume",], ascending=[True,False])

df3_top_pct


Unnamed: 0,exchange_id,base,target,volume,total_exchange_volume,pct_of_exchange_volume,exchange_name
1,crypto_com,ETH,USD,4643464983077.32,9721790876268.64,47.76,Crypto.com Exchange
0,crypto_com,BTC,USD,4099607715899.54,9721790876268.64,42.17,Crypto.com Exchange
2,crypto_com,XRP,USD,978718177291.78,9721790876268.64,10.07,Crypto.com Exchange
4,gdax,ETH,USD,4643464983077.32,9721790876268.64,47.76,Coinbase Exchange
3,gdax,BTC,USD,4099607715899.54,9721790876268.64,42.17,Coinbase Exchange
5,gdax,XRP,USD,978718177291.78,9721790876268.64,10.07,Coinbase Exchange
6,kraken,ETH,USD,4643464983077.32,5622183160369.09,82.59,Kraken
7,kraken,XRP,USD,978718177291.78,5622183160369.09,17.41,Kraken
9,okex,ETH,USD,4643464983077.32,9721790876268.64,47.76,OKX
8,okex,BTC,USD,4099607715899.54,9721790876268.64,42.17,OKX
