In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Import evaluation modules
from config.evaluation_config import get_config
from features.evaluation import evaluate_feature_extraction
from models.evaluation import evaluate_predictions
from utils.mlflow_utils import get_tracker
from utils.report_utils import get_report_generator


In [None]:
# Get configuration
config = get_config()

# Initialize MLflow tracker
tracker = get_tracker(experiment_name="evaluation_demo")

# Initialize report generator
report_gen = get_report_generator(mlflow_tracker=tracker)


In [None]:
# Load sample extracted features
import pandas as pd
features_df = pd.read_csv(project_root / "data/processed/extracted_features.csv")

# Evaluate feature extraction
with tracker:
    feature_eval_results = evaluate_feature_extraction(
        features_df,
        expected_schema=config.feature_extraction.required_fields
    )

print("Feature Extraction Evaluation Results:")
print(f"Completeness: {feature_eval_results['completeness']:.2%}")
print(f"Accuracy: {feature_eval_results['accuracy']:.2%}")
print(f"Average Latency: {feature_eval_results['avg_latency_ms']:.2f}ms")


In [None]:
# Load test data
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Simulate predictions (replace with actual model predictions)
y_true = np.random.randint(0, 2, size=100)
y_pred = np.random.randint(0, 2, size=100)

# Evaluate predictions
with tracker:
    pred_eval_results = evaluate_predictions(
        y_true=y_true,
        y_pred=y_pred,
        model_metadata={"name": "virality_predictor_v1"}
    )

print("\nPrediction Evaluation Results:")
print(f"Accuracy: {pred_eval_results['accuracy']:.2%}")
print(f"Precision: {pred_eval_results['precision']:.2%}")
print(f"Recall: {pred_eval_results['recall']:.2%}")
print(f"F1 Score: {pred_eval_results['f1']:.2%}")


In [None]:
# Combine results for report
report_path = report_gen.generate_report(
    evaluation_type="full_system",
    metrics={
        **feature_eval_results,
        **pred_eval_results
    },
    component_name="virality_predictor",
    version="1.0.0",
    dataset_size=len(y_true),
    environment="development",
    data_quality={
        "missing_rate": feature_eval_results["missing_rate"],
        "invalid_rate": feature_eval_results["invalid_rate"]
    },
    performance_metrics={
        "feature_extraction_latency_ms": feature_eval_results["avg_latency_ms"],
        "prediction_latency_ms": pred_eval_results["avg_inference_time_ms"]
    },
    error_analysis={
        "feature_errors": feature_eval_results["error_details"],
        "prediction_errors": pred_eval_results["error_analysis"]
    },
    recommendations=[
        "Improve feature completeness for 'hashtags' field",
        "Optimize prediction latency",
        "Add more training data for rare categories"
    ]
)

print(f"\nEvaluation report generated at: {report_path}")
