In [11]:
import numpy as np
import pandas as pd
from numba.core.cgutils import false_bit
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample dataset (replace with your own)
np.random.seed(42)
X = pd.DataFrame(np.random.rand(1000, 1), columns=[f'feat_{i}' for i in range(5)])
# y_base = np.random.choice([0, 1], size=1000)  # Primary label (e.g., buy/sell)
y_base = (abs(X.iloc[:,0])*100).astype(int) % 10
y_base = y_base == 3
confidence_threshold = 0.6  # Define a threshold for meta-labeling

# Split data
X_train, X_test, y_train_base, y_test_base = train_test_split(X, y_base, test_size=0.3)

# Step 1: Train the primary model
base_model = RandomForestClassifier(n_estimators=100, random_state=42)
base_model.fit(X_train, y_train_base)

# Get probabilities from base model
y_pred_proba = base_model.predict_proba(X_test)[:, 1]  # Probability of class 1
y_pred_base = (y_pred_proba > 0.5).astype(int)  # Convert to binary predictions

# Step 2: Create meta-labels
# Meta-label is 1 if the base model was confident & correct, else 0
y_meta = np.where((y_pred_proba > confidence_threshold) & (y_pred_base == y_test_base), 1, 0)

# Filter dataset for confident predictions
X_meta = X_test[y_pred_proba > confidence_threshold]
y_meta = y_meta[y_pred_proba > confidence_threshold]


ValueError: Shape of passed values is (1000, 1), indices imply (1000, 5)

In [3]:
# Train the meta-model (Random Forest)
meta_model = RandomForestClassifier(n_estimators=100, random_state=42)
meta_model.fit(X_meta, y_meta)

# Predict on new data
y_meta_pred = meta_model.predict(X_meta)

# Evaluate meta model
meta_accuracy = accuracy_score(y_meta, y_meta_pred)
print(f'Meta Model Accuracy: {meta_accuracy:.4f}')


Meta Model Accuracy: 1.0000
