# Analysis of the returns of bitcoin

## Setup

In [None]:
from matplotlib import font_manager
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

# Save chart as png function
def save_chart_as_png(filename: str) -> None:
    plt.savefig(
        f"../images/{filename}.png",
        format="png",
        dpi=300,
        orientation="landscape",
        bbox_inches="tight",
    )

In [None]:
# Get all dfs
def get_df(csv_basename: str) -> pd.DataFrame:
    # Get df from CSV with date as index
    return pd.read_csv(f"../data/{csv_basename}.csv", index_col="date", parse_dates=True)

df_btc = get_df("BTC")
df_sp500 = get_df("SP500")
df_us10y = get_df("US10Y")
df_au = get_df("AU")
df_wti = get_df("WTI")
df_chf = get_df("CHF")

## Returns of bitcoin across time 💰

In [None]:
# Get daily price change and moving averages
df_btc["price_change"] = df_btc["price"].pct_change()
df_btc["price_change_log"] = np.log(df_btc["price"] / df_btc["price"].shift(1))
df_btc["price_change_log_90d_ma"] = df_btc["price_change_log"].rolling(window=90).mean()
df_btc["price_change_log_1y_ma"] = df_btc["price_change_log"].rolling(window=365).mean()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_btc, x=df_btc.index, y="price_change_log_90d_ma", label="Moving Average (90-days)", color="#00f8ff", linewidth=0.75)
sns.lineplot(data=df_btc, x=df_btc.index, y="price_change_log_1y_ma", label="Moving Average (1-year)", color="#ff5b00", linewidth=1)

plt.axhline(y=0, color="red", linewidth=2/3, linestyle="--")

plt.title("Bitcoin Daily Returns Over Time")
plt.xlabel(None)
plt.ylabel(None)

save_chart_as_png("2_BTC_returns")

In [None]:
# Highest daily return
df_btc.loc[[df_btc["price_change"].idxmax()]]

In [None]:
# Lowest daily return
df_btc.loc[[df_btc["price_change"].idxmin()]]

In [None]:
# Total cumulative returns (which is the exact same as price appreciation since first entry)
(df_btc["price_change"] + 1).prod() - 1

In [None]:
# Compound annual growth rate (CAGR)
start_value = df_btc["price"].iloc[0]
end_value = df_btc["price"].iloc[-1]
n = (df_btc.index[-1] - df_btc.index[0]).days / 365.25

(end_value / start_value) ** (1 / n) - 1

**Key takeaways:**
- ...

### Comparison of returns with other assets

In [None]:
# Resample all dfs to weekly frequency, to be able to compare with bitcoin's 365 trading days, and also make sure other dfs start at the same point of btc df.
df_btc_w = df_btc[["price"]].resample("W").mean()
df_sp500_w = df_sp500.loc[df_sp500.index >= df_btc.index.min()].resample("W").mean()
df_us10y_w = df_us10y.loc[df_us10y.index >= df_btc.index.min()].resample("W").mean()
df_au_w = df_au.loc[df_au.index >= df_btc.index.min()].resample("W").mean()
df_wti_w = df_wti.loc[df_wti.index >= df_btc.index.min()].resample("W").mean()
df_chf_w = df_chf.loc[df_chf.index >= df_btc.index.min()].resample("W").mean()

In [None]:
# Get 1-year moving average of returns for each weekly df
for df in [df_btc_w, df_sp500_w, df_us10y_w, df_wti_w, df_au_w, df_chf_w]:
    # Get arithmetic returns using the first column, instead of log returns since the oil data includes non-positive prices
    df["returns"] = df.iloc[:, 0].pct_change()
    # Get 1-year moving average of weekly returns
    df["returns_1y_ma"] = df["returns"].rolling(window=52).mean()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_btc_w, x=df_btc_w.index, y="returns_1y_ma", label="Bitcoin", color="#f7931a", linewidth=1)
sns.lineplot(data=df_sp500_w, x=df_sp500_w.index, y="returns_1y_ma", label="S&P 500", color="lime", linewidth=1)
sns.lineplot(data=df_us10y_w, x=df_us10y_w.index, y="returns_1y_ma", label="US 10-year yield", color="deepskyblue", linewidth=1)
sns.lineplot(data=df_au_w, x=df_au_w.index, y="returns_1y_ma", label="Gold futures", color="gold", linewidth=1)
sns.lineplot(data=df_wti_w, x=df_wti_w.index, y="returns_1y_ma", label="Crude oil futures", color="crimson", linewidth=1)
sns.lineplot(data=df_chf_w, x=df_chf_w.index, y="returns_1y_ma", label="USD/CHF", color="mediumorchid", linewidth=1)

plt.axhline(y=0, color="red", linewidth=2/3, linestyle="--")

plt.ylim([-0.035, 0.14])

plt.title("Comparison of 1-Year Average Weekly Returns of Bitcoin with Other Assets Over Time")
plt.xlabel(None)
plt.ylabel(None)

save_chart_as_png("2_BTC_returns_comparison")

In [None]:
# Create table with average weekly returns of each asset for different timeframes
def get_timeframes_avg_returns(df: pd.DataFrame) -> dict[str, float]:
    return {
        "Total avg weekly returns": round(df["returns"].mean(), 4),
        "2011-2016 avg": round(df[df.index.year < 2016]["returns"].mean(), 4),
        "2016-2020 avg": round(df[(df.index.year >= 2016) & (df.index.year < 2020)]["returns"].mean(), 4),
        "2020-2024 avg": round(df[df.index.year >= 2020]["returns"].mean(), 4),
    }

df_avg_returns = pd.DataFrame({
    "Bitcoin": get_timeframes_avg_returns(df_btc_w),
    "S&P 500": get_timeframes_avg_returns(df_sp500_w),
    "US 10-year yield": get_timeframes_avg_returns(df_us10y_w),
    "Gold futures": get_timeframes_avg_returns(df_au_w),
    "Crude oil futures": get_timeframes_avg_returns(df_wti_w),
    "USD/CHF": get_timeframes_avg_returns(df_chf_w),
}).T

df_avg_returns.sort_values(by=df_avg_returns.columns[0], ascending=False)

**Key takeaways:**
- ...

### Distribution of returns

In [None]:
sym_limit = 0.15
df_btc_lim = df_btc.query(f"{-sym_limit} <= price_change_log <= {sym_limit}")

plt.figure(figsize=(10, 6))

sns.histplot(df_btc_lim["price_change_log"], binwidth=0.01, binrange=(-sym_limit, sym_limit), color="turquoise", edgecolor="white", alpha=3/4)

plt.xlim(-sym_limit, sym_limit)

plt.title("Distribution of Bitcoin Daily Returns")
plt.xlabel("Price Change")
plt.ylabel("Frequency")

save_chart_as_png("2_BTC_returns_dist")

In [None]:
# Create table with daily returns stats
pd.DataFrame({
    "Average return": [round(df_btc["price_change_log"].mean(), 4)],
    "Median return": [round(df_btc["price_change_log"].median(), 4)],
    "Standard deviation": [round(df_btc["price_change_log"].std(), 4)],
    "Min return": [round(df_btc["price_change_log"].min(), 4)],
    "Max return": [round(df_btc["price_change_log"].max(), 4)],
    "Skewness": [round(df_btc["price_change_log"].skew(), 4)],
    "Kurtosis": [round(df_btc["price_change_log"].kurt(), 4)],
})

**Key takeaways:**
- ...

### Distribution of yearly rolling returns

In [None]:
# Get yearly rolling returns
btc_yearly_rolling_returns = df_btc["price_change_log"].rolling(window=365).sum().dropna()

In [None]:
plt.figure(figsize=(10, 6))

sns.histplot(btc_yearly_rolling_returns, binwidth=1/3, binrange=(-2, 6), color="mediumpurple", edgecolor="white", alpha=3/4)

plt.xlim(-2, 6)

plt.title("Distribution of Bitcoin Yearly Rolling Returns")
plt.xlabel("Price Change")
plt.ylabel("Frequency")

save_chart_as_png("2_BTC_returns_y_rol_dist")

In [None]:
# Create table with yearly rolling returns stats
pd.DataFrame({
    "Average return": [round(btc_yearly_rolling_returns.mean(), 4)],
    "Median return": [round(btc_yearly_rolling_returns.median(), 4)],
    "Standard deviation": [round(btc_yearly_rolling_returns.std(), 4)],
    "Min return": [round(btc_yearly_rolling_returns.min(), 4)],
    "Max return": [round(btc_yearly_rolling_returns.max(), 4)],
    "Skewness": [round(btc_yearly_rolling_returns.skew(), 4)],
    "Kurtosis": [round(btc_yearly_rolling_returns.kurt(), 4)],
})

**Key takeaways:**
- ...

## Year-over-year returns across time 📊

In [None]:
# Get YoY returns
# Get yearly bitcoin price df with first and last prices
df_btc_yearly = df_btc.groupby(df_btc.index.year)["price"].agg(
    first_price="first",
    last_price="last",
)
# Get YoY return
df_btc_yearly["price_change"] = (df_btc_yearly["last_price"] - df_btc_yearly["first_price"]) / df_btc_yearly["first_price"]

In [None]:
plt.figure(figsize=(10, 6))

# Create a custom palette for positive and negative returns
palette_greens = LinearSegmentedColormap.from_list("positive",["#d4c334", "#40d434"])
palette_reds = LinearSegmentedColormap.from_list("negative", ["#d43438", "#d47034"])

# Get one barplot for the positive and another for the negative returns while making sure they are in the correct order
ax = sns.barplot(data=df_btc_yearly[df_btc_yearly["price_change"] >= 0], x="date", y="price_change", order=df_btc_yearly.index, palette=palette_greens, hue="price_change", legend=False)
ax = sns.barplot(data=df_btc_yearly[df_btc_yearly["price_change"] < 0], x="date", y="price_change", order=df_btc_yearly.index, palette=palette_reds, hue="price_change", legend=False)

plt.yscale("symlog")
plt.ylim(-1, 80)

plt.yticks([-1, 0, 1, 10, 50])

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{int(x)}"))

# Label each bar with its value
font_properties = font_manager.FontProperties(family="sans-serif", weight="bold", size=8)
for container in ax.containers:
    ax.bar_label(container, fmt="%.2f", padding=2.5, fontproperties=font_properties)

plt.title("Bitcoin Year-over-Year Returns Over Time")
plt.xlabel(None)
plt.ylabel(None)

save_chart_as_png("2_BTC_YoY_returns")

In [None]:
# Highest YoY return
df_btc_yearly.loc[[df_btc_yearly["price_change"].idxmax()]]

In [None]:
# Lowest YoY return
df_btc_yearly.loc[[df_btc_yearly["price_change"].idxmin()]]

In [None]:
# Create table with YoY returns stats
pd.DataFrame({
    "Average YoY return": [round(df_btc_yearly["price_change"].mean(), 4)],
    "Median YoY return": [round(df_btc_yearly["price_change"].median(), 4)],
    "Standard deviation": [round(df_btc_yearly["price_change"].std(), 4)],
    "Min YoY return": [round(df_btc_yearly["price_change"].min(), 4)],
    "Max YoY return": [round(df_btc_yearly["price_change"].max(), 4)],
    "Cumulative product YoY return": [(1 + df_btc_yearly["price_change"]).cumprod().iloc[-1].round(4) - 1],
})

**Key takeaways:**
- ...

### Positive returns phases

In [None]:
# Add positive returns phase column
df_btc_yearly["positive_returns_phase"] = df_btc_yearly.index.map(lambda year:
                                                         1 if 2010 <= year <= 2013
                                                         else 2 if 2015 <= year <= 2017
                                                         else 3 if 2019 <= year <= 2021
                                                         else 4 if 2023 <= year <= 2024
                                                         else None).astype("Int64")

In [None]:
# Get total return, max, min, average, median, standard deviation and cumulative product YoY return per positive returns phase
df_btc_yearly.groupby("positive_returns_phase").agg(
    timeframe=("price_change", lambda x: f"{x.index.min()}-{x.index.max()}"),
    total_return=("price_change", lambda x: round((df_btc_yearly.loc[x.index[-1]]["last_price"] - df_btc_yearly.loc[x.index[0]]["first_price"]) / df_btc_yearly.loc[x.index[0]]["first_price"], 2)),
    avg_yoy_return=("price_change", lambda x: round(x.mean(), 2)),
    median_yoy_return=("price_change", lambda x: round(x.median(), 2)),
    std_yoy_return=("price_change", lambda x: round(x.std(), 2)),
    max_yoy_return=("price_change", lambda x: round(x.max(), 2)),
    min_yoy_return=("price_change", lambda x: round(x.min(), 2)),
    cumprod_yoy_return=("price_change", lambda x: round((1 + x).cumprod().iloc[-1] - 1, 2))
).T

**Key takeaways:**
- ...

## Returns before vs since 2014 ⏳

In [None]:
# Get data before and since 2014 into separate dfs
df_btc_b2014 = df_btc[df_btc.index.year < 2014].copy()
df_btc_s2014 = df_btc[df_btc.index.year >= 2014].copy()

In [None]:
# Add day number as a column to each df
df_btc_b2014["day_number"] = range(1, len(df_btc_b2014) + 1)
df_btc_s2014["day_number"] = range(1, len(df_btc_s2014) + 1)

In [None]:
# Ensure the first price change is set to 0 for each df (to have 100 as the first base 100 price)
df_btc_b2014.loc[df_btc_b2014.index[0], "price_change"] = 0
df_btc_s2014.loc[df_btc_s2014.index[0], "price_change"] = 0

In [None]:
# Add a base 100 price to each df
df_btc_b2014["base_100_price"] = (1 + df_btc_b2014["price_change"]).cumprod() * 100
df_btc_s2014["base_100_price"] = (1 + df_btc_s2014["price_change"]).cumprod() * 100

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_btc_b2014, x="day_number", y="base_100_price", color="#ff5b00", linewidth=0.75, label="Before 2014")
sns.lineplot(data=df_btc_s2014, x="day_number", y="base_100_price", color="#00f8ff", linewidth=0.75, label="Since 2014")

plt.yscale("log")
plt.xlim(0, 4000)

plt.gca().yaxis.set_major_formatter(
    FuncFormatter(lambda x, _: (
        f"{int(x)}" if x < 1000 and x.is_integer() 
        else f"{x:.1f}" if x < 1 
        else f"{int(x / 1000)}K" if x < 1_000_000 
        else f"{int(x / 1_000_000)}M"
    ))
)

plt.title("Bitcoin Price (Base 100) Over Days: Before vs Since 2014")
plt.xlabel(None)
plt.ylabel(None)

plt.legend()

save_chart_as_png("2_BTC_price_2014")

In [None]:
# Create table with basic stats of both timeframes and the ratio
def calculate_stats(df: pd.DataFrame) -> dict[str, float]:
    return {
        "Number of days": df["price"].count(),
        "Final price (base 100)": round(df.iloc[-1]["base_100_price"], 4),
        "Cumulative returns": round((df["price_change"] + 1).prod() - 1, 4),
    }

df_tf_stats = pd.DataFrame({
    "Before 2014": calculate_stats(df_btc_b2014),
    "Since 2014": calculate_stats(df_btc_s2014),
})

df_tf_stats["Ratio (before / since)"] = (df_tf_stats["Before 2014"] / df_tf_stats["Since 2014"]).round(4)

# Format output to remove scientific notation and make more readable
df_tf_stats = df_tf_stats.map(lambda x: f"{float(x):,.2f}" if isinstance(x, (int, float)) else x)

df_tf_stats.T

**Key takeaways:**
- ...

### Distribution of returns (before vs since 2014)

In [None]:
sym_limit = 0.15
df_btc_lim = df_btc.query(f"{-sym_limit} <= price_change_log <= {sym_limit}")

plt.figure(figsize=(10, 6))

sns.histplot(df_btc_lim[df_btc_lim.index.year < 2014]["price_change_log"], stat="probability", binwidth=0.01, binrange=(-sym_limit, sym_limit), color="#ff5b00", alpha=3/4, label="Before 2014")
sns.histplot(df_btc_lim[df_btc_lim.index.year >= 2014]["price_change_log"], stat="probability", binwidth=0.01, binrange=(-sym_limit, sym_limit), color="#00f8ff", alpha=2/3, label="Since 2014")

plt.xlim(-sym_limit, sym_limit)

plt.title("Distribution of Bitcoin Daily Returns")
plt.xlabel("Price Change")
plt.ylabel("Probability")
plt.legend()

save_chart_as_png("2_BTC_returns_dist_2014")

In [None]:
# Create table with stats of both timeframes and the ratio
def calculate_stats(df: pd.DataFrame) -> dict[str, float]:
    return {
        "Average return": round(df["price_change_log"].mean(), 4),
        "Median return": round(df["price_change_log"].median(), 4),
        "Standard deviation": round(df["price_change_log"].std(), 4),
        "Min return": round(df["price_change_log"].min(), 4),
        "Max return": round(df["price_change_log"].max(), 4),
        "Skewness": round(df["price_change_log"].skew(), 4),
        "Kurtosis": round(df["price_change_log"].kurt(), 4),
    }

df_tf_stats = pd.DataFrame({
    "Before 2014": calculate_stats(df_btc_b2014),
    "Since 2014": calculate_stats(df_btc_s2014),
})

df_tf_stats["Ratio (before / since)"] = (df_tf_stats["Before 2014"] / df_tf_stats["Since 2014"]).round(4)
df_tf_stats.T

**Key takeaways:**
- ...

## Price vs returns 🌀

In [None]:
# Get 90-day moving average price
df_btc["price_90d_ma"] = df_btc["price"].rolling(window=90).mean()

In [None]:
plt.figure(figsize=(10, 6))

sns.regplot(data=df_btc, x="price_90d_ma", y="price_change_log_90d_ma",
            scatter_kws={"alpha": 0.7, "linewidths": 0.2, "color": "#59b5ec"},
            line_kws={"color": "#ec8f59"})

plt.xscale("log")
plt.xlim(0.05, 100_000)

plt.gca().xaxis.set_major_formatter(
    FuncFormatter(lambda x, _: f"{int(x)}" if x < 1000 and x.is_integer()
                  else (f"{x:.1f}" if x < 1 else f"{int(x/1000)}K"))
)

plt.title("Price vs Returns of Bitcoin")
plt.xlabel("Average Price (90-day Window)")
plt.ylabel("Average Daily Returns (90-day Window)")

save_chart_as_png("2_BTC_price_vs_returns")

In [None]:
# Pearson correlation coefficient betweent price and returns
df_btc["price_90d_ma"].corr(df_btc["price_change_log_90d_ma"]).round(2)

**Key takeaways:**
- ...