In [None]:
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import make_scorer, confusion_matrix, f1_score
import numpy as np

In [10]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()

**Creating Custom Metrics**

In [None]:
# Custom F1-score for multiclass
def custom_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

custom_scorer = make_scorer(custom_f1_score, greater_is_better=True)

scores = cross_val_score(model, x, y, cv=5, scoring=custom_scorer)

print(scores)

[0.92499772 0.88047819 0.9038656  0.86560824 0.88097104]


**Business-Specific Metrics**

In [11]:
# Cost-sensitive metric
def cost_sensitive_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    
    cost_metrix = np.array([[0, 1, 5, 0]])
    costs = cm.ravel() * cost_metrix.ravel()
    return -np.sum(costs)

cost_scorer = make_scorer(cost_sensitive_score, greater_is_better=True)

scores = cross_val_score(model, x, y, cv=5, scoring=cost_scorer)

print(scores)

[-43 -63 -85 -56 -85]


**Regression Custom Metrics**

In [None]:
def percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

percentage_scorer = make_scorer(percentage_error, greater_is_better=False)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, None]
}

grid_search = GridSearchCV(
    model, param_grid,
    cv=5,
    scoring=percentage_scorer
)