To validate Bitcoin price data retrieved from the CoinGecko API, we comparing it with data from Yahoo Finance. Since different platforms may aggregate price data from varying sources and at different intervals, this comparison helps ensure the reliability and consistency of the CoinGecko data for further analysis. By aligning and visualizing the two data sources, we can detect any significant discrepancies and gain confidence in the accuracy of the time-series data used for further calculations.

In [5]:
import requests
import pandas as pd
import plotly.express as px
import yfinance as yf

def get_coingecko_btc_prices(days):
    url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart"
    params = {
        "vs_currency": "usd",
        "days": days,
    }
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data["prices"], columns=["timestamp", "price"])
    df["datetime"] = pd.to_datetime(df["timestamp"], unit="ms")
    return df[["datetime", "price"]]

def get_yahoo_btc_prices(days):
    period_map = {
        1: "1d",
        7: "7d",
        30: "30d"
    }
    btc = yf.Ticker("BTC-USD")
    df = btc.history(period=period_map[days], interval="1h")
    df = df.reset_index()[["Datetime", "Close"]]
    df.columns = ["datetime", "price"]
    return df

def align_to_hour(df):
    df["datetime"] = df["datetime"].dt.floor("h")
    return df.groupby("datetime").mean().reset_index()

def plot_prices(cg_df, yf_df,label):
    cg_df = cg_df.copy()
    yf_df = yf_df.copy()
    cg_df["source"] = "CoinGecko"
    yf_df["source"] = "Yahoo Finance"

    combined_df = pd.concat([cg_df, yf_df])

    fig = px.line(
        combined_df,
        x="datetime",
        y="price",
        color="source",
        title=f"BTC Price Comparison: CoinGecko vs Yahoo ({label})",
        markers=True,
        labels={"price": "Price (USD)", "datetime": "Time"}
    )

    fig.update_layout(
        xaxis_title="Time",
        yaxis_title="Price (USD)",
        legend_title="Data Source",
        hovermode="x unified"
    )

    fig.show()

def compare_prices_for_days(days, label):
    cg_df = get_coingecko_btc_prices(days)
    yf_df = get_yahoo_btc_prices(days)

    yf_df["datetime"] = yf_df["datetime"].dt.tz_localize(None)

    cg_df = align_to_hour(cg_df)
    yf_df = align_to_hour(yf_df)

    plot_prices(cg_df, yf_df, label)

def main():
    compare_prices_for_days(1, "Last 24 Hours")
    compare_prices_for_days(7, "Last 7 Days")
    compare_prices_for_days(30, "Last 30 Days")

if __name__ == "__main__":
    main()

We test the same thing for Solana data too

In [4]:
def get_coingecko_solana_prices(days):
    url = "https://api.coingecko.com/api/v3/coins/solana/market_chart"
    params = {
        "vs_currency": "usd",
        "days": days,
    }
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data["prices"], columns=["timestamp", "price"])
    df["datetime"] = pd.to_datetime(df["timestamp"], unit="ms")
    return df[["datetime", "price"]]

def get_yahoo_solana_prices(days):
    period_map = {
        1: "1d",
        7: "7d",
        30: "30d"
    }
    sol = yf.Ticker("SOL-USD")
    df = sol.history(period=period_map[days], interval="1h")
    df = df.reset_index()[["Datetime", "Close"]]
    df.columns = ["datetime", "price"]
    return df

def align_to_hour(df):
    df["datetime"] = df["datetime"].dt.floor("h")
    return df.groupby("datetime").mean().reset_index()

def plot_prices(cg_df, yf_df, label):
    cg_df = cg_df.copy()
    yf_df = yf_df.copy()
    cg_df["source"] = "CoinGecko"
    yf_df["source"] = "Yahoo Finance"

    combined_df = pd.concat([cg_df, yf_df])

    fig = px.line(
        combined_df,
        x="datetime",
        y="price",
        color="source",
        title=f"Solana Price Comparison: CoinGecko vs Yahoo ({label})",
        markers=True,
        labels={"price": "Price (USD)", "datetime": "Time"}
    )

    fig.update_layout(
        xaxis_title="Time",
        yaxis_title="Price (USD)",
        legend_title="Data Source",
        hovermode="x unified"
    )

    fig.show()

def compare_prices_for_days(days, label):
    cg_df = get_coingecko_solana_prices(days)
    yf_df = get_yahoo_solana_prices(days)

    yf_df["datetime"] = yf_df["datetime"].dt.tz_localize(None)

    cg_df = align_to_hour(cg_df)
    yf_df = align_to_hour(yf_df)

    plot_prices(cg_df, yf_df, label)

def main():
    compare_prices_for_days(1, "Last 24 Hours")
    compare_prices_for_days(7, "Last 7 Days")
    compare_prices_for_days(30, "Last 30 Days")

if __name__ == "__main__":
    main()
