pipeline eg :

In [3]:
import os
import pandas as pd
import numpy as np

# Create the directory if it doesn't exist
os.makedirs('data/raw', exist_ok=True)

# Create a dummy cryptodata.csv file
dates = pd.to_datetime(pd.date_range(start='2020-01-01', periods=100))
symbols = ['BTC', 'ETH']

data = []
for symbol in symbols:
    for date in dates:
        open_price = np.random.uniform(100, 1000)
        high_price = open_price * np.random.uniform(1.01, 1.05)
        low_price = open_price * np.random.uniform(0.95, 0.99)
        close_price = np.random.uniform(low_price, high_price)
        volume = np.random.uniform(100000, 10000000)
        market_cap = close_price * np.random.uniform(1e9, 1e11)
        data.append([date, symbol, open_price, high_price, low_price, close_price, volume, market_cap])

df_dummy = pd.DataFrame(data, columns=['date', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'market_cap'])
df_dummy.to_csv('data/raw/cryptodata.csv', index=False)

print("Dummy 'cryptodata.csv' created at 'data/raw/cryptodata.csv'")

Dummy 'cryptodata.csv' created at 'data/raw/cryptodata.csv'


Time series cross validation and hyperparameter tuning

In [5]:
# Assuming load_data, basic_clean, prepare_dataset functions are defined in a previous cell
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define default arguments for notebook execution based on the pipeline_example.py script
input_path = 'data/raw/cryptodata.csv'
symbol_to_model = None # Set to 'BTC' or 'ETH' if you want to model a single symbol

# Load and clean data
df = load_data(input_path)
df = basic_clean(df)

# Prepare dataset with feature engineering
if symbol_to_model:
    df_sym = prepare_dataset(df, symbol=symbol_to_model)
else:
    df_sym = prepare_dataset(df)

# Define features and target
features = [c for c in df_sym.columns if c not in ['date','symbol','target_vol','close','open','high','low','market_cap']]
X = df_sym[features].copy()
y = df_sym['target_vol'].values

# Train-test split by date (80/20)
cutoff = int(len(df_sym)*0.8)
X_train, X_test = X.iloc[:cutoff], X.iloc[cutoff:]
y_train, y_test = y[:cutoff], y[cutoff:]

# Scale features
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

print("Data prepared and scaled. X_train_s and y_train are now defined.")

Data prepared and scaled. X_train_s and y_train are now defined.


Simple Streamlit app for local deployment

In [None]:
# deploy_streamlit.py
import streamlit as st
import pandas as pd
import joblib
import numpy as np
from datetime import datetime

st.title("Crypto Volatility Predictor (Local)")

model_art = joblib.load('models/trained_rf.pkl')
model = model_art['model']
scaler = model_art['scaler']
features = model_art['features']

uploaded = st.file_uploader("Upload CSV (date,symbol,open,high,low,close,volume,market_cap)", type=['csv'])
if uploaded:
    df = pd.read_csv(uploaded, parse_dates=['date'])
    symbol = st.selectbox("Symbol", options=sorted(df['symbol'].unique()))
    df_s = df[df['symbol']==symbol].sort_values('date').reset_index(drop=True)
    # create features quickly using same functions (you can import them)
    # simplified: we assume features are prepared for latest row
    # For demo: compute last row features
    st.write("Preparing latest features...")
    # (import feature code or replicate minimal transforms)
    # For brevity in app, calculate some features:
    df_s['log_ret'] = np.log(df_s['close']).diff()
    df_s['vol_7'] = df_s['log_ret'].rolling(7).std() * np.sqrt(365)
    # Build feature vector for most recent available day
    latest = df_s.iloc[-1]
    feat_vec = []
    for f in features:
        if f in df_s.columns:
            feat_vec.append(latest[f])
        else:
            feat_vec.append(0.0)
    X = scaler.transform([feat_vec])
    pred = model.predict(X)[0]
    st.metric("Predicted next-day vol (annualized)", f"{pred:.6f}")

In [8]:
!pip install streamlit joblib

Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m67.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0
