In [2]:
import numpy as np
from sklearn.datasets import make_classification, make_regression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from metrics import Metrics

# Test binary classification
def test_binary_classification():
    # Generate synthetic data
    X, y = make_classification(n_samples=1000, n_classes=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    # Train Random Forest
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    
    # Get predictions
    y_pred = rf.predict(X_test)
    y_prob = rf.predict_proba(X_test)
    
    # Evaluate with Metrics class
    metrics = Metrics()
    metrics.set_data(y_true=y_test, y_pred=y_pred, y_prob=y_prob, task_type='binary_classification')
    metrics.calculate_all()
    
    # Print report and visualize
    print("\n=== BINARY CLASSIFICATION RESULTS ===")
    print(metrics.get_report())
    fig = metrics.visualize(plot_type='all')
    fig.savefig('binary_classification_metrics.png')
    
    return metrics

# Test multiclass classification
def test_multiclass_classification():
    # Generate synthetic data
    X, y = make_classification(n_samples=1000, n_classes=4, n_informative=8, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    # Train Random Forest
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    
    # Get predictions
    y_pred = rf.predict(X_test)
    y_prob = rf.predict_proba(X_test)
    
    # Evaluate with Metrics class
    metrics = Metrics()
    metrics.set_data(y_true=y_test, y_pred=y_pred, y_prob=y_prob, task_type='multiclass_classification')
    metrics.calculate_all()
    
    # Print report and visualize
    print("\n=== MULTICLASS CLASSIFICATION RESULTS ===")
    print(metrics.get_report())
    fig = metrics.visualize(plot_type='confusion')
    fig.savefig('multiclass_classification_metrics.png')
    
    return metrics

# Test regression
def test_regression():
    # Generate synthetic data
    X, y = make_regression(n_samples=1000, n_features=10, noise=0.5, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    # Train Random Forest
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    
    # Get predictions
    y_pred = rf.predict(X_test)
    
    # Evaluate with Metrics class
    metrics = Metrics()
    metrics.set_data(y_true=y_test, y_pred=y_pred, task_type='regression')
    metrics.calculate_all()
    
    # Print report and visualize
    print("\n=== REGRESSION RESULTS ===")
    print(metrics.get_report())
    fig = metrics.visualize(plot_type='all')
    fig.savefig('regression_metrics.png')
    
    return metrics

if __name__ == "__main__":
    # Run all tests
    binary_metrics = test_binary_classification()
    multiclass_metrics = test_multiclass_classification()
    regression_metrics = test_regression()
    
    # Compare metrics as dataframes
    print("\n=== METRICS COMPARISON ===")
    print("Binary Classification Metrics:")
    print(binary_metrics.get_report(format='dataframe'))
    
    print("\nMulticlass Classification Metrics:")
    print(multiclass_metrics.get_report(format='dataframe'))
    
    print("\nRegression Metrics:")
    print(regression_metrics.get_report(format='dataframe'))

ModuleNotFoundError: No module named 'numpy'