In [7]:
from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import math

def dynamic_skewness_weight(mean_f1, imbalance_ratio, num_classes, error_rate):
    normalized_mean_f1 = mean_f1 / 1
    normalized_imbalance = imbalance_ratio / 20
    normalized_classes = num_classes / 10
    normalized_error = error_rate / 1
    dynamic_weight = 0.4 * normalized_mean_f1 + 0.3 * normalized_imbalance + 0.2 * normalized_classes + 0.1 * normalized_error
    return dynamic_weight

def calculate_advanced_metrics_with_skewness_weight(cm, dynamic_weight):
    num_classes = len(cm)
    weighted_f1_sum = 0
    
    for i in range(num_classes):
        tp = cm[i][i]
        fn = sum(cm[i]) - tp
        fp = sum(cm[:, i]) - tp
        
        try:
            precision = tp / (tp + fp)
        except ZeroDivisionError:
            precision = 0
        
        try:
            recall = tp / (tp + fn)
        except ZeroDivisionError:
            recall = 0
        
        try:
            f1_c = 2 * (precision * recall) / (precision + recall)
        except ZeroDivisionError:
            f1_c = 0

        weighted_f1_sum += dynamic_weight * f1_c
    
    modified_penalized_f1 = weighted_f1_sum / num_classes
    return modified_penalized_f1


# Load your real-world dataset here
data = load_iris()
X = data['data']
y = data['target']

# For demonstration, making it imbalanced
y[y == 2] = 1

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classifier (Random Forest in this example)
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Generate a confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Calculate mean F1 score from the confusion matrix
f1_scores = []
for i in range(len(cm)):
    tp = cm[i][i]
    fn = sum(cm[i]) - tp
    fp = sum(cm[:, i]) - tp
    try:
        precision = tp / (tp + fp)
    except ZeroDivisionError:
        precision = 0
    try:
        recall = tp / (tp + fn)
    except ZeroDivisionError:
        recall = 0
    try:
        f1_c = 2 * (precision * recall) / (precision + recall)
    except ZeroDivisionError:
        f1_c = 0
    f1_scores.append(f1_c)

mean_f1 = np.mean(f1_scores)

# Calculate the imbalance ratio from the dataset
counts = np.bincount(y_train)
imbalance_ratio = max(counts) / min(counts)

# Calculate the error rate from the confusion matrix
error_rate = (len(y_test) - np.trace(cm)) / len(y_test)

# Number of unique classes
num_classes = len(np.unique(y))

# Calculate dynamic skewness weight
dynamic_weight = dynamic_skewness_weight(mean_f1, imbalance_ratio, num_classes, error_rate)
print("Dynamic Weight:", dynamic_weight)

# Calculate the Modified Penalized F1 score using the dynamic weight
modified_f1_score = calculate_advanced_metrics_with_skewness_weight(cm, dynamic_weight)

from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score
import numpy as np
import math

# ... (previous code remains unchanged)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate Standard F1 Score (use 'weighted' for multi-class problems)
standard_f1 = f1_score(y_test, y_pred, average='weighted')

# Calculate Balanced Accuracy
balanced_acc = balanced_accuracy_score(y_test, y_pred)

# Output all metrics for comparison
print("Modified Penalized F1 Score with Dynamic Weight:", modified_f1_score)
print("Accuracy:", accuracy)
print("Standard F1 Score:", standard_f1)
print("Balanced Accuracy:", balanced_acc)
print(cm)


Dynamic Weight: 0.4700000000000001
Modified Penalized F1 Score with Dynamic Weight: 0.4700000000000001
Accuracy: 1.0
Standard F1 Score: 1.0
Balanced Accuracy: 1.0
[[10  0]
 [ 0 20]]
