In [1]:
pip install dash

Note: you may need to restart the kernel to use updated packages.


In [2]:
import dash
from dash import dcc, html, Input, Output
import requests
import pandas as pd
import numpy as np
import xgboost as xgb
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.graph_objects as go
import time

In [3]:
app = dash.Dash(__name__)
app.title = "Crypto Price Prediction"

In [4]:
def fetch_top_cryptocurrencies(currency="usd", top_n=30):
    url = "https://api.coingecko.com/api/v3/coins/markets"
    params = {
        "vs_currency": currency,
        "order": "market_cap_desc",
        "per_page": top_n,
        "page": 1,
    }
    response = requests.get(url, params=params)
    data = response.json()
    return [{'id': crypto['id'], 'name': crypto['name']} for crypto in data]

In [5]:
def fetch_historical_data(crypto_id, start_date, end_date, currency="usd"):
    url = f"https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart/range"
    start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
    end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp())
    params = {"vs_currency": currency, "from": start_timestamp, "to": end_timestamp}
    response = requests.get(url, params=params)
    data = response.json()
    if 'prices' not in data:
        return None
    prices = data['prices']
    volumes = data['total_volumes']
    df = pd.DataFrame(prices, columns=['timestamp', 'price'])
    df['volume'] = [v[1] for v in volumes]
    df['date'] = pd.to_datetime(df['timestamp'], unit='ms').dt.date
    return df[['date', 'price', 'volume']]

In [6]:
print("Fetching the top 30 cryptocurrencies...")
cryptos = fetch_top_cryptocurrencies()
print(f"Top 30 Cryptocurrencies: {cryptos}")

Fetching the top 30 cryptocurrencies...
Top 30 Cryptocurrencies: [{'id': 'bitcoin', 'name': 'Bitcoin'}, {'id': 'ethereum', 'name': 'Ethereum'}, {'id': 'tether', 'name': 'Tether'}, {'id': 'ripple', 'name': 'XRP'}, {'id': 'solana', 'name': 'Solana'}, {'id': 'binancecoin', 'name': 'BNB'}, {'id': 'dogecoin', 'name': 'Dogecoin'}, {'id': 'cardano', 'name': 'Cardano'}, {'id': 'usd-coin', 'name': 'USDC'}, {'id': 'staked-ether', 'name': 'Lido Staked Ether'}, {'id': 'tron', 'name': 'TRON'}, {'id': 'avalanche-2', 'name': 'Avalanche'}, {'id': 'shiba-inu', 'name': 'Shiba Inu'}, {'id': 'the-open-network', 'name': 'Toncoin'}, {'id': 'wrapped-steth', 'name': 'Wrapped stETH'}, {'id': 'polkadot', 'name': 'Polkadot'}, {'id': 'chainlink', 'name': 'Chainlink'}, {'id': 'stellar', 'name': 'Stellar'}, {'id': 'wrapped-bitcoin', 'name': 'Wrapped Bitcoin'}, {'id': 'bitcoin-cash', 'name': 'Bitcoin Cash'}, {'id': 'hedera-hashgraph', 'name': 'Hedera'}, {'id': 'sui', 'name': 'Sui'}, {'id': 'weth', 'name': 'WETH'}, {

In [7]:
end_date = datetime.today().strftime("%Y-%m-%d")
start_date = (datetime.today() - timedelta(days=6*30)).strftime("%Y-%m-%d")

In [8]:
historical_data = {}
for crypto in cryptos:
    print(f"Fetching historical data for {crypto['name']}...")
    data = fetch_historical_data(crypto['id'], start_date, end_date)
    if data is not None and len(data) > 0: 
        historical_data[crypto['name']] = data
    time.sleep(1)

Fetching historical data for Bitcoin...
Fetching historical data for Ethereum...
Fetching historical data for Tether...
Fetching historical data for XRP...
Fetching historical data for Solana...
Fetching historical data for BNB...
Fetching historical data for Dogecoin...
Fetching historical data for Cardano...
Fetching historical data for USDC...
Fetching historical data for Lido Staked Ether...
Fetching historical data for TRON...
Fetching historical data for Avalanche...
Fetching historical data for Shiba Inu...
Fetching historical data for Toncoin...
Fetching historical data for Wrapped stETH...
Fetching historical data for Polkadot...
Fetching historical data for Chainlink...
Fetching historical data for Stellar...
Fetching historical data for Wrapped Bitcoin...
Fetching historical data for Bitcoin Cash...
Fetching historical data for Hedera...
Fetching historical data for Sui...
Fetching historical data for WETH...
Fetching historical data for Litecoin...
Fetching historical data 

In [9]:
print("\nHistorical Data:")
for crypto_name, data in historical_data.items():
    print(f"\n{crypto_name}:")
    print(data)


Historical Data:

Bitcoin:
           date         price        volume
0    2024-06-08  69325.362388  1.859750e+10
1    2024-06-09  69315.104123  1.068830e+10
2    2024-06-10  69654.160738  1.046104e+10
3    2024-06-11  69493.177609  1.933587e+10
4    2024-06-12  67329.152327  3.851304e+10
..          ...           ...           ...
175  2024-11-30  97453.247345  7.467003e+10
176  2024-12-01  96513.142347  4.358002e+10
177  2024-12-02  97311.707191  4.914769e+10
178  2024-12-03  95833.136230  1.010199e+11
179  2024-12-04  96031.630978  8.793575e+10

[180 rows x 3 columns]

Ethereum:
           date        price        volume
0    2024-06-08  3679.376652  1.619902e+10
1    2024-06-09  3683.025380  7.575769e+09
2    2024-06-10  3705.899884  6.301031e+09
3    2024-06-11  3666.827665  8.786881e+09
4    2024-06-12  3498.555954  1.962193e+10
..          ...          ...           ...
175  2024-11-30  3598.193321  2.880063e+10
176  2024-12-01  3709.909439  3.196200e+10
177  2024-12-02  3708.

In [10]:
df_all = pd.DataFrame()
for crypto, data in historical_data.items():
    data['crypto'] = crypto
    df_all = pd.concat([df_all, data])

In [11]:
df_all['date'] = pd.to_datetime(df_all['date']) 
df_all = df_all.sort_values(by=['crypto', 'date'])
df_all['price_previous_day'] = df_all.groupby('crypto')['price'].shift(1)
df_all.dropna(inplace=True)
print(df_all)

          date         price        volume   crypto  price_previous_day
1   2024-06-09  69315.104123  1.068830e+10  Bitcoin        69325.362388
2   2024-06-10  69654.160738  1.046104e+10  Bitcoin        69315.104123
3   2024-06-11  69493.177609  1.933587e+10  Bitcoin        69654.160738
4   2024-06-12  67329.152327  3.851304e+10  Bitcoin        69493.177609
5   2024-06-13  68224.519675  3.602781e+10  Bitcoin        67329.152327
..         ...           ...           ...      ...                 ...
175 2024-11-30      1.808961  1.412099e+10      XRP            1.532175
176 2024-12-01      1.939413  1.811823e+10      XRP            1.808961
177 2024-12-02      2.286340  1.843537e+10      XRP            1.939413
178 2024-12-03      2.708847  5.382000e+10      XRP            2.286340
179 2024-12-04      2.531924  3.795709e+10      XRP            2.708847

[716 rows x 5 columns]


In [12]:
X = df_all[['price_previous_day', 'volume']]
y = df_all['price']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
model = xgb.XGBRegressor(objective='reg:squarederror')

In [16]:
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 9],
    'min_child_weight': [1, 3, 5],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'n_estimators': [500, 1000, 1500]
}


In [17]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

In [18]:
grid_search.fit(X_train_scaled, y_train)

In [19]:
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 832.3424650807615
Root Mean Squared Error (RMSE): 3074.7327574080664


In [20]:
@app.callback(
    [Output("price-graph", "figure"),
     Output("model-performance-metrics", "children")],
    [Input("crypto-dropdown", "value")]
)
def update_graph(selected_crypto):
    start_date = (datetime.today() - timedelta(days=180)).strftime("%Y-%m-%d")
    end_date = datetime.today().strftime("%Y-%m-%d")

    data = fetch_historical_data(selected_crypto, start_date, end_date)
    
    if data is None or data.empty:
        return go.Figure(), f"No data available for the selected cryptocurrency ({selected_crypto})."

    data['price_previous_day'] = data['price'].shift(1).fillna(method='bfill')
    X = data[['price_previous_day', 'volume']]
    y = data['price']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = xgb.XGBRegressor(objective='reg:squarederror')
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)
    
    data['predicted_price'] = model.predict(scaler.transform(X))
 
    future_dates = pd.date_range(datetime.today(), datetime.today() + timedelta(days=30)).strftime('%Y-%m-%d')
    future_data = pd.DataFrame({
        'date': future_dates,
        'price_previous_day': data['price'].iloc[-1],
        'volume': data['volume'].iloc[-1] 
    })

    future_data_scaled = scaler.transform(future_data[['price_previous_day', 'volume']])
    future_data['predicted_price'] = model.predict(future_data_scaled)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data['price'],
        mode='lines+markers',
        name='Actual Prices',
        line=dict(color='blue', width=2),
        hovertemplate="Date: %{x}<br>Actual Price: %{y}<extra></extra>"
    ))

    fig.add_trace(go.Scatter(
        x=data['date'],
        y=data['predicted_price'],
        mode='lines+markers',
        name='Predicted Prices (Historical)',
        line=dict(color='orange', width=2, dash='dash'),
        hovertemplate="Date: %{x}<br>Predicted Price: %{y}<extra></extra>"
    ))

    fig.add_trace(go.Scatter(
        x=future_data['date'],
        y=future_data['predicted_price'],
        mode='lines+markers',
        name='Predicted Prices (Future)',
        line=dict(color='green', width=2, dash='dot'),
        hovertemplate="Date: %{x}<br>Predicted Price: %{y}<extra></extra>"
    ))

    fig.update_layout(
        title=f"Actual vs Predicted Prices for {selected_crypto.capitalize()}",
        xaxis_title="Date",
        yaxis_title="Price (USD)",
        legend_title="Legend",
        xaxis=dict(showgrid=True, tickangle=45),
        yaxis=dict(showgrid=True, zeroline=True),
        template="plotly_white",
        hovermode="x unified",
        xaxis_rangeslider_visible=True
    )

    metrics_text = f"""
    **Model Performance Metrics for {selected_crypto.capitalize()}**  
    - Mean Absolute Error (MAE): {mae:.2f}  
    - Root Mean Squared Error (RMSE): {rmse:.2f}
    """

    return fig, metrics_text


if __name__ == "__main__":
    app.layout = html.Div([
        html.H1("Crypto Price Prediction"),
        dcc.Dropdown(
            id="crypto-dropdown",
            options=[{"label": c['name'], "value": c['id']} for c in cryptos],
            value=cryptos[0]['id'],
            placeholder="Select Cryptocurrency",
        ),
        dcc.Graph(id="price-graph"),
        html.Div(id="model-performance-metrics", style={"whiteSpace": "pre-wrap"})
    ])
    app.run_server(debug=True)

In [21]:
pwd

'/Users/anand'

In [44]:
pwd

'/Users/anand'