In [3]:
pip install yfinance

In [5]:
pip install --upgrade numpy


In [4]:
# LIVE MODEL SCORER - 30 DAYS DATA, PRICE-ONLY VERSION
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import joblib
from io import BytesIO
from azure.storage.filedatalake import DataLakeServiceClient

# -------------------------------
# 1. Load model from Azure Data Lake
# -------------------------------
service_client = DataLakeServiceClient(
    account_url="https://ds562team9datalake.dfs.core.windows.net",
    credential="KXg2Djg7uRevBSpPNIVnKw/N6HpqBh+kJwDX07wkywbpU2joMZdTIBOXk30EoMMxH2d8wwb+9j0g+AStO60IWw=="
)

fs_client = service_client.get_file_system_client("gold")
file_client = fs_client.get_file_client("model/best_hgb_model.pkl")
stream = file_client.download_file().readall()
model = joblib.load(BytesIO(stream))

# -------------------------------
# 2. Get last 30 days BTC-USD data from yfinance
# -------------------------------
end_date = datetime.today()
start_date = end_date - timedelta(days=30)

df_price = yf.download("BTC-USD", start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval="1d")

# Clean & format
df_price = (
    df_price
      .reset_index()
      .rename(columns={
         "Date":  "date",
         "Open":  "open_price",
         "High":  "high_price",
         "Low":   "low_price",
         "Close": "close_price",
         "Volume":"volume"
      })
)
df_price["date"] = pd.to_datetime(df_price["date"]).dt.date

# -------------------------------
# 3. Feature Engineering
# -------------------------------
df = df_price.copy()
df["return_1d"] = df["close_price"].pct_change()
df["volatility_3d"] = df["return_1d"].rolling(3).std()
df["sma_5"]  = df["close_price"].rolling(5).mean()
df["sma_10"] = df["close_price"].rolling(10).mean()

# RSI 14
delta     = df["close_price"].diff()
gain      = delta.clip(lower=0)
loss      = -delta.clip(upper=0)
avg_gain  = gain.rolling(14).mean()
avg_loss  = loss.rolling(14).mean()
rs        = avg_gain / avg_loss
df["rsi_14"] = 100 - (100 / (1 + rs))

# Dummy sentiment values (set to 0, or replace with API later)
df["tweet_sentiment"] = 0
df["news_sentiment"] = 0
df["tweet_count"] = 0
df["news_count"] = 0
df["tweet_sent_roll3"] = 0
df["tweet_sent_roll7"] = 0
df["news_sent_roll3"] = 0
df["news_sent_roll7"] = 0

# -------------------------------
# 4. Prepare feature matrix & predict
# -------------------------------
feature_cols = [
    "tweet_sentiment", "news_sentiment", "tweet_count", "news_count",
    "tweet_sent_roll3", "tweet_sent_roll7", "news_sent_roll3", "news_sent_roll7",
    "sma_5", "sma_10", "rsi_14", "volatility_3d"
]

df_features = df.dropna(subset=feature_cols).copy()
preds = model.predict(df_features[feature_cols])

# -------------------------------
# 5. Add signals & save
# -------------------------------
df_features["signal"] = [
    "buy"  if p >= 0.001 else
    "sell" if p <= -0.001 else
    "hold"
    for p in preds
]

# Show final signals
df_result = df_features[["date", "close_price", "signal"]]
print(df_result.tail())

# Optional: Save to CSV
df_result.to_csv("btc_signals_last30days.csv", index=False)  # 🔁 or upload to Azure
