# Analysis of the returns of bitcoin

## Setup

In [None]:
import matplotlib.colorbar as mpl_cbar
import matplotlib.colors as mpl_cols
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import pandas as pd
import seaborn as sns

In [None]:
# Get bitcoin price df and set date as index
df_btc = pd.read_csv("../data/BTC.csv", index_col="date", parse_dates=True)

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

# Save chart as png function
def save_chart_as_png(filename: str) -> None:
    plt.savefig(
        f"../images/{filename}.png",
        format="png",
        dpi=300,
        orientation="landscape",
        bbox_inches="tight",
    )

In [None]:
# Get OHLC average as price
df_btc["price"] = df_btc[["open", "high", "low", "close"]].mean(axis=1)
df_btc.drop(columns=["open", "high", "low", "close"], inplace=True)

## Returns of bitcoin across time

In [None]:
# Get daily price change and moving averages
df_btc["price_change"] = df_btc["price"].pct_change()
df_btc["price_change_90d_ma"] = df_btc["price_change"].rolling(window=90).mean()
df_btc["price_change_1y_ma"] = df_btc["price_change"].rolling(window=365).mean()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_btc, x=df_btc.index, y="price_change_90d_ma", label="Moving average (90-days)", color="#00f8ff", linewidth=0.75)
sns.lineplot(data=df_btc, x=df_btc.index, y="price_change_1y_ma", label="Moving average (1-year)", color="#ff5b00", linewidth=1)
plt.axhline(y=0, color="red", linewidth=1, linestyle="--")

plt.title("Daily returns of bitcoin across time")
plt.xlabel("")
plt.ylabel("")

save_chart_as_png("3_BTC_returns")

In [None]:
# Highest daily return
df_btc.loc[[df_btc["price_change"].idxmax()]]

In [None]:
# Lowest daily return
df_btc.loc[[df_btc["price_change"].idxmin()]]

In [None]:
# Total cumulative returns (which is the exact same as price appreciation since first entry)
(df_btc["price_change"] + 1).prod() - 1

In [None]:
# Compound annual growth rate (CAGR)
start_value = df_btc["price"].iloc[0]
end_value = df_btc["price"].iloc[-1]
n = (df_btc.index[-1] - df_btc.index[0]).days / 365.25

(end_value / start_value) ** (1 / n) - 1

**Key takeaways:**
- ...

## Distribution of returns

In [None]:
plt.figure(figsize=(10, 6))

sym_limit = 0.15
df_btc_lim = df_btc.query(f"{-sym_limit} <= price_change <= {sym_limit}")

sns.histplot(df_btc_lim["price_change"], binwidth=0.01, binrange=(-sym_limit, sym_limit), color="lime", edgecolor="white", alpha=0.75)

plt.xlim(-sym_limit, sym_limit)

plt.title("Distribution of the daily returns of bitcoin")
plt.xlabel("Price change")
plt.ylabel("Frequency")

save_chart_as_png("3_BTC_returns_dist")

In [None]:
# Create table with daily returns stats
pd.DataFrame({
    "Average return": [round(df_btc["price_change"].mean(), 4)],
    "Median return": [round(df_btc["price_change"].median(), 4)],
    "Standard deviation": [round(df_btc["price_change"].std(), 4)],
    "Min return": [round(df_btc["price_change"].min(), 4)],
    "Max return": [round(df_btc["price_change"].max(), 4)],
    "Skewness": [round(df_btc["price_change"].skew(), 4)],
    "Kurtosis": [round(df_btc["price_change"].kurt(), 4)],
})

**Key takeaways:**
- ...

## Volatility vs returns (90-day rolling values)

In [None]:
# Get 90-day rolling volatily with the standard deviation method (for consistency)
df_btc["volatility_90d"] = df_btc["price_change"].rolling(window=90).std()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

# Normalize years to map colors
norm = mpl_cols.Normalize(vmin=df_btc.index.year.min(), vmax=df_btc.index.year.max())
cmap = plt.cm.viridis_r

sns.scatterplot(data=df_btc, x="volatility_90d", y="price_change_90d_ma", hue=df_btc.index.year, palette=cmap, alpha=0.7, linewidth=0.2, legend=False)
plt.axhline(y=0, color="red", linewidth=1, linestyle="--")

plt.title("Volatility vs returns of bitcoin")
plt.xlabel("Volatility (90-day window)")
plt.ylabel("Average daily returns (90-day window)")

# Create the colorbar
cax, _ = mpl_cbar.make_axes(ax, location="right", pad=0.01)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
cbar = fig.colorbar(sm, cax=cax)

save_chart_as_png("3_BTC_vlt_vs_returns")

In [None]:
# Pearson correlation coefficient betweent volatility and returns
df_btc["volatility_90d"].corr(df_btc["price_change_90d_ma"]).round(2)

**Key takeaways:**
- ...

## Year-over-year returns across time

In [None]:
# Get YoY returns
# Get yearly bitcoin price df with first and last prices
df_btc_yearly = df_btc.groupby(df_btc.index.year)["price"].agg(
    first_price="first",
    last_price="last"
)
# Get YoY return
df_btc_yearly["price_change"] = (df_btc_yearly["last_price"] - df_btc_yearly["first_price"]) / df_btc_yearly["first_price"]

In [None]:
plt.figure(figsize=(10, 6))

sns.barplot(data=df_btc_yearly, x=df_btc_yearly.index, y="price_change", color="#f7931a")

plt.title("Year-over-year returns of bitcoin across time")
plt.xlabel("")
plt.ylabel("")

plt.yscale("symlog")
plt.ylim(-1.25, 75)

plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{int(x)}"))
plt.yticks([-1, 0, 1, 5, 10, 50])

save_chart_as_png("3_BTC_YoY_returns")

In [None]:
# Highest YoY return
df_btc_yearly.loc[[df_btc_yearly["price_change"].idxmax()]]

In [None]:
# Lowest YoY return
df_btc_yearly.loc[[df_btc_yearly["price_change"].idxmin()]]

In [None]:
# Create table with YoY returns stats
pd.DataFrame({
    "Average YoY return": [round(df_btc_yearly["price_change"].mean(), 4)],
    "Median YoY return": [round(df_btc_yearly["price_change"].median(), 4)],
    "Standard deviation": [round(df_btc_yearly["price_change"].std(), 4)],
    "Min YoY return": [round(df_btc_yearly["price_change"].min(), 4)],
    "Max YoY return": [round(df_btc_yearly["price_change"].max(), 4)],
    "Cumulative product YoY return": [(1 + df_btc_yearly["price_change"]).cumprod().iloc[-1].round(4) - 1],
})

**Key takeaways:**
- ...

In [None]:
# Add positive returns phase column
df_btc_yearly["positive_returns_phase"] = df_btc_yearly.index.map(lambda year:
                                                         1 if 2010 <= year <= 2013
                                                         else 2 if 2015 <= year <= 2017
                                                         else 3 if 2019 <= year <= 2021
                                                         else 4 if 2023 <= year <= 2024
                                                         else None).astype("Int64")

In [None]:
# Get total return, max, min, average, median, standard deviation and cumulative product YoY return per positive returns phase
df_btc_yearly.groupby("positive_returns_phase").agg(
    timeframe=("price_change", lambda x: f"{x.index.min()}-{x.index.max()}"),
    total_return=("price_change", lambda x: round((df_btc_yearly.loc[x.index[-1]]["last_price"] - df_btc_yearly.loc[x.index[0]]["first_price"]) / df_btc_yearly.loc[x.index[0]]["first_price"], 2)),
    avg_yoy_return=("price_change", lambda x: round(x.mean(), 2)),
    median_yoy_return=("price_change", lambda x: round(x.median(), 2)),
    std_yoy_return=("price_change", lambda x: round(x.std(), 2)),
    max_yoy_return=("price_change", lambda x: round(x.max(), 2)),
    min_yoy_return=("price_change", lambda x: round(x.min(), 2)),
    cumprod_yoy_return=("price_change", lambda x: round((1 + x).cumprod().iloc[-1] - 1, 2))
).T

**Key takeaways:**
- ...

## Before 2014 vs since 2014

In [None]:
# Get data before and since 2014 into separate dfs
df_btc_b2014 = df_btc[df_btc.index.year < 2014].copy()
df_btc_s2014 = df_btc[df_btc.index.year >= 2014].copy()

In [None]:
# Add day number as a column to each df
df_btc_b2014["day_number"] = range(1, len(df_btc_b2014) + 1)
df_btc_s2014["day_number"] = range(1, len(df_btc_s2014) + 1)

In [None]:
# Ensure the first price change is set to 0 for each df
df_btc_b2014.loc[df_btc_b2014.index[0], "price_change"] = 0
df_btc_s2014.loc[df_btc_s2014.index[0], "price_change"] = 0

In [None]:
# Add a base 100 price to each df
df_btc_b2014["base_100_price"] = (1 + df_btc_b2014["price_change"]).cumprod() * 100
df_btc_s2014["base_100_price"] = (1 + df_btc_s2014["price_change"]).cumprod() * 100

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_btc_b2014, x="day_number", y="base_100_price", color="red", linewidth=0.75, label="Before 2014")
sns.lineplot(data=df_btc_s2014, x="day_number", y="base_100_price", color="cyan", linewidth=0.75, label="Since 2014")

plt.title("Price of bitcoin (base 100) over days: before vs since 2014")
plt.xlabel("")
plt.ylabel("")
plt.legend()

plt.yscale("log")

plt.xlim(0, 4000)
plt.gca().yaxis.set_major_formatter(
    FuncFormatter(lambda x, _: (
        f"{int(x)}" if x < 1000 and x.is_integer() 
        else (f"{x:.1f}" if x < 1 
        else (f"{int(x/1000)}K" if x < 1_000_000 
        else f"{int(x/1_000_000)}M"))
    ))
)

save_chart_as_png("3_BTC_price_2014")

In [None]:
# Create table with basic stats of both timeframes and the ratio
def calculate_stats(df: pd.DataFrame) -> dict[str, float]:
    return {
        "Number of days": df["price"].count(),
        "Final price (base 100)": round(df.iloc[-1]["base_100_price"], 4),
        "Cumulative returns": round((df["price_change"] + 1).prod() - 1, 4),
    }

df_tf_stats = pd.DataFrame({
    "Before 2014": calculate_stats(df_btc_b2014),
    "Since 2014": calculate_stats(df_btc_s2014)
})

df_tf_stats["Ratio (before / since)"] = (df_tf_stats["Before 2014"] / df_tf_stats["Since 2014"]).round(4)

# Format output to remove scientific notation and make more readable
df_tf_stats = df_tf_stats.map(lambda x: f"{float(x):,.2f}" if isinstance(x, (int, float)) else x)

df_tf_stats.T

**Key takeaways:**
- ...

In [None]:
plt.figure(figsize=(10, 6))

sym_limit = 0.15
df_btc_lim = df_btc.query(f"{-sym_limit} <= price_change <= {sym_limit}")

sns.histplot(df_btc_lim[df_btc_lim.index.year < 2014]["price_change"], stat="probability", binwidth=0.01, binrange=(-sym_limit, sym_limit), color="blue", alpha=0.75, label="Before 2014")
sns.histplot(df_btc_lim[df_btc_lim.index.year >= 2014]["price_change"], stat="probability", binwidth=0.01, binrange=(-sym_limit, sym_limit), color="green", alpha=0.66, label="Since 2014")

plt.xlim(-sym_limit, sym_limit)

plt.title("Distribution of the daily returns of bitcoin")
plt.xlabel("Price change")
plt.ylabel("Probability")
plt.legend()

save_chart_as_png("3_BTC_returns_dist_2014")

In [None]:
# Create table with stats of both timeframes and the ratio
def calculate_stats(df: pd.DataFrame) -> dict[str, float]:
    return {
        "Average return": round(df["price_change"].mean(), 4),
        "Median return": round(df["price_change"].median(), 4),
        "Standard deviation": round(df["price_change"].std(), 4),
        "Min return": round(df["price_change"].min(), 4),
        "Max return": round(df["price_change"].max(), 4),
        "Skewness": round(df["price_change"].skew(), 4),
        "Kurtosis": round(df["price_change"].kurt(), 4),
    }

df_tf_stats = pd.DataFrame({
    "Before 2014": calculate_stats(df_btc_b2014),
    "Since 2014": calculate_stats(df_btc_s2014)
})

df_tf_stats["Ratio (before / since)"] = (df_tf_stats["Before 2014"] / df_tf_stats["Since 2014"]).round(4)
df_tf_stats.T

**Key takeaways:**
- ...

## Risk-adjusted performance ⚖️

### Sharpe ratio

**Key takeaways:**
- ...