In [21]:
# ==========================================
# 1. SETUP & IMPORTS
# ==========================================
import sys
import os
import importlib
import pandas as pd

# Point to 'src' folder
sys.path.append(os.path.abspath('../src'))

# Import your predict module
import predict

# Force reload (in case you updated predict.py recently)
importlib.reload(predict)

# ==========================================
# 2. LOAD DATA
# ==========================================
# We load the full dataset, but we will pretend the last 20 days are "New Incoming Data"
data_path = '../data/labeled/labeled_data.csv'

if os.path.exists(data_path):
    df_full = pd.read_csv(data_path)
    # Take the last 20 rows to simulate recent market activity
    df_recent = df_full.tail(20).copy()
    print(f"Loaded {len(df_recent)} rows of recent data.")
else:
    print("Error: Labeled data not found. Run previous steps first.")

Loaded 20 rows of recent data.


In [22]:
# ==========================================
# SCENARIO A: USE THE "WINNER" (Default)
# ==========================================
# If we don't provide a name, it looks for 'best_crypto_model.pkl'
print("asking the WINNER (Best Model)...")
results_best = predict.predict_from_dataframe(df_recent)

# ==========================================
# SCENARIO B: ASK SPECIFIC MODELS
# ==========================================
# 1. Ask XGBoost
print("Asking XGBoost...")
results_xgb = predict.predict_from_dataframe(df_recent, model_name="XGBoost")

# 2. Ask Random Forest
print("Asking Random Forest...")
results_rf = predict.predict_from_dataframe(df_recent, model_name="RandomForest")

# 3. Ask LogisticRegression
print("Asking LogisticRegression...")
results_log = predict.predict_from_dataframe(df_recent, model_name="LogisticRegression")

# 4. Ask Random LightGBM
print("Asking Random LightGBM...")
results_lgbm = predict.predict_from_dataframe(df_recent, model_name="LightGBM")

# 5. Ask CatBoost
print("Asking CatBoost...")
results_cat = predict.predict_from_dataframe(df_recent, model_name="CatBoost")



asking the WINNER (Best Model)...
    Loading model: best_crypto_model...
Asking XGBoost...
    Loading model: XGBoost...
Asking Random Forest...
    Loading model: RandomForest...
Asking LogisticRegression...
    Loading model: LogisticRegression...
Asking Random LightGBM...
    Loading model: LightGBM...
Asking CatBoost...
    Loading model: CatBoost...


In [23]:
import pandas as pd

# Create a clean comparison table
comparison_df = pd.DataFrame({
    'Date': df_recent['open_time'].values,
    'Close_Price': df_recent['close'].values,
    'WINNER': results_best['prediction_text'].values,
    'XGB': results_xgb['prediction_text'].values,
    'RF': results_rf['prediction_text'].values,
    'LogReg': results_log['prediction_text'].values,
    'LGBM': results_lgbm['prediction_text'].values,
    'CatBoost': results_cat['prediction_text'].values
})

# Format Date
comparison_df['Date'] = pd.to_datetime(comparison_df['Date']).dt.strftime('%Y-%m-%d')

# Display the last 10 rows
print("\nðŸ“Š MODEL CONSENSUS TABLE (Last 10 Days):")
display(comparison_df.tail(10))


ðŸ“Š MODEL CONSENSUS TABLE (Last 10 Days):


Unnamed: 0,Date,Close_Price,WINNER,XGB,RF,LogReg,LGBM,CatBoost
10,2025-11-22,84739.74,BUY,BUY,BUY,BUY,BUY,BUY
11,2025-11-23,86830.0,BUY,HOLD,BUY,HOLD,BUY,BUY
12,2025-11-24,88300.01,BUY,SELL,BUY,BUY,BUY,BUY
13,2025-11-25,87369.96,BUY,HOLD,BUY,BUY,BUY,BUY
14,2025-11-26,90484.02,HOLD,HOLD,HOLD,BUY,BUY,HOLD
15,2025-11-27,91333.95,SELL,BUY,HOLD,BUY,BUY,SELL
16,2025-11-28,90890.7,SELL,BUY,HOLD,HOLD,BUY,SELL
17,2025-11-29,90802.44,SELL,BUY,HOLD,BUY,BUY,SELL
18,2025-11-30,90360.0,SELL,HOLD,HOLD,HOLD,BUY,SELL
19,2025-12-01,86286.01,HOLD,HOLD,HOLD,SELL,SELL,HOLD


In [24]:
# Create a "Vote Count"
# We count how many models said BUY
models_cols = ['WINNER', 'XGB', 'RF', 'LogReg', 'LGBM', 'CatBoost']

# Helper function to count votes
def count_votes(row):
    buy_votes = sum(row[col] == 'BUY' for col in models_cols)
    sell_votes = sum(row[col] == 'SELL' for col in models_cols)
    return buy_votes, sell_votes

# Apply to the latest day
latest_row = comparison_df.iloc[-1]
buy_votes, sell_votes = count_votes(latest_row)
total_models = len(models_cols)

print(f"\n LATEST SIGNAL FOR {latest_row['Date']}:")
print(f"   Price: ${latest_row['Close_Price']}")
print(f"   Buy Votes:  {buy_votes}/{total_models}")
print(f"   Sell Votes: {sell_votes}/{total_models}")

if buy_votes >= 5:
    print(" STRONG BUY SIGNAL (High Consensus)")
elif sell_votes >= 5:
    print(" STRONG SELL SIGNAL (High Consensus)")
else:
    print(" MIXED SIGNALS (No Consensus)")


 LATEST SIGNAL FOR 2025-12-01:
   Price: $86286.01
   Buy Votes:  0/6
   Sell Votes: 2/6
 MIXED SIGNALS (No Consensus)
