In [None]:
import pandas as pd
import os
from pathlib import Path

ACTIONS = ["BUY", "SELL"]
ASSETS = ["BTC", "ETH", "SOL"]

DATA_DIR = Path(os.getcwd()).parent.parent.parent / "data"

In [7]:
# These are dates with short term trades that should not be included
EXEMPTIONS = {
    "BTC": ["2024-04-14", "2024-04-15", "2024-04-16"],
    "ETH": ["2024-03-14", "2024-04-14", "2024-04-23"],
    "SOL": ["2023-03-04", "2024-05-22"]
}

In [8]:
raw_df = pd.read_csv(DATA_DIR / "coinbase_raw.csv")

In [11]:

coinbase_df = raw_df

coinbase_df["action"] = coinbase_df["Transaction Type"].str.upper()
coinbase_df["asset"] = coinbase_df["Asset"]

coinbase_df = coinbase_df[coinbase_df.action.isin(ACTIONS)]
coinbase_df = coinbase_df[coinbase_df.asset.isin(ASSETS)]

coinbase_df["id"] = coinbase_df["ID"].map(lambda i: f"coinbase-{i}")
coinbase_df["platform"] = "coinbase"

coinbase_df["date"] = coinbase_df["Timestamp"].map(pd.to_datetime).astype(str)
coinbase_df["quantity"] = coinbase_df["Quantity Transacted"].astype(float).abs()
coinbase_df["price"] = coinbase_df["Price at Transaction"].str.replace("$", "").astype(float).abs()
coinbase_df["fees"] = coinbase_df["Fees and/or Spread"].str.replace("$", "")
coinbase_df["cost"] = coinbase_df["Total (inclusive of fees and/or spread)"].str.replace("$", "")
coinbase_df["value"] = coinbase_df["quantity"].astype(float) * coinbase_df["price"].astype(float)
    
coinbase_df = coinbase_df[~coinbase_df.price.isna()]
coinbase_df = coinbase_df[coinbase_df.quantity != 0]

def exempt_trade(row):
    asset = row["asset"]
    date = row["date"][:10]
    return (asset in EXEMPTIONS and date in EXEMPTIONS[asset])

coinbase_df["exempt"] = coinbase_df.apply(exempt_trade, axis=1)
coinbase_df = coinbase_df[~coinbase_df.exempt]

coinbase_df = coinbase_df[["id", "platform", "date", "action", "asset", "price", "quantity", "fees", "cost", "value"]]
coinbase_df = coinbase_df.sort_values(["date", "asset"]).reset_index(drop=True)

coinbase_df.head()

Unnamed: 0,id,platform,date,action,asset,price,quantity,fees,cost,value
0,coinbase-60b1a4765a49f30001783fa2,coinbase,2021-05-29 02:18:30+00:00,BUY,BTC,36348.265,0.013489,9.698253415,500.0,490.301747
1,coinbase-60b1a4c7d4ca9f0001c317d9,coinbase,2021-05-29 02:19:51+00:00,BUY,ETH,2510.495,0.195198,9.9559953108,500.0,490.044005
2,coinbase-60d2b69a60ad5c00010f6a62,coinbase,2021-06-23 04:20:42+00:00,BUY,SOL,29.801344,3.233423,3.63966398607658,100.0,96.360336
3,coinbase-6170124955f9cb000193eed6,coinbase,2021-10-20 12:57:45+00:00,SELL,BTC,64298.42,0.007451,7.11,470.14,479.084955
4,coinbase-6170127ace47b2000176d0c9,coinbase,2021-10-20 12:58:34+00:00,SELL,SOL,160.594,0.741466,2.99,115.5,119.075047


In [10]:
coinbase_df.to_csv(DATA_DIR / "coinbase_clean.csv", index=False)