In [77]:
import functions_ml as fml
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import hamming_loss, f1_score, jaccard_score, accuracy_score, multilabel_confusion_matrix, classification_report

Depois posso separar uma análise para problemas que não haviam no teste (30 plantas, 5 produtos, etc) (o dataset mantém o índice no y_test, basta fazer merge das colunas para filtrar plantas, produtos e dps dar drop)

# Oracle

In [56]:
# Load model
with open('trained_models/oracle.pkl', 'rb') as f:
    model = pickle.load(f)

# Test set ("new instances")

In [57]:
TEST_TOLERANCE_LIMIT = 0.01 / 100
test_results = pd.read_csv('datasets/test_instances_results.csv')
test_features = pd.read_csv('datasets/test_instances_features.csv')
test_set = fml.create_dataset(test_features, test_results)
test_set = fml.create_multi_label_target(test_set, TEST_TOLERANCE_LIMIT)

In [58]:
target_cols = [t for t in test_set.columns if t.startswith('RF')]
for t in target_cols:
    print(t, ':', test_set[t].sum())

RF_1_0 : 0
RF_2_0 : 0
RF_2_1 : 4
RF_3_0 : 0
RF_3_1 : 8
RF_3_2 : 0
RF_4_0 : 11
RF_4_1 : 0
RF_4_2 : 0
RF_4_3 : 0
RF_6_0 : 0
RF_6_1 : 0
RF_6_2 : 0
RF_6_3 : 0
RF_6_4 : 0
RF_6_5 : 0
RF_T_0 : 8


In [None]:
# Preprocessing (deve ser igual ao aplicado no desenvolvimento)
X_test = test_set.drop(columns=target_cols + ['instance'])
X_test = fml.binary_feature_selection(X_test)
y_test = test_set[target_cols]

## Predict

In [60]:
y_pred = pd.DataFrame(model.predict(X_test), columns=y_test.columns, index=y_test.index)
y_pred_proba = pd.DataFrame(model.predict_proba(X_test), columns=y_test.columns, index=y_test.index)

In [78]:
hamming = hamming_loss(y_test, y_pred)
print("Hamming Loss:", hamming)

f1 = f1_score(y_test, y_pred, average='micro')
print("Micro-Averaged F1 Score:", f1)

jaccard = jaccard_score(y_test, y_pred, average='samples')
print("Jaccard Similarity Score:", jaccard)

subset_accuracy = accuracy_score(y_test, y_pred)
print("Subset Accuracy:", subset_accuracy)

Hamming Loss: 0.39215686274509803
Micro-Averaged F1 Score: 0.3333333333333333
Jaccard Similarity Score: 0.17823565323565324
Subset Accuracy: 0.0


In [86]:
print(classification_report(y_test, y_pred, target_names=y_test.columns, zero_division="warn"))

              precision    recall  f1-score   support

      RF_1_0       0.00      0.00      0.00         0
      RF_2_0       0.00      0.00      0.00         0
      RF_2_1       0.75      0.75      0.75         4
      RF_3_0       0.00      0.00      0.00         0
      RF_3_1       1.00      0.50      0.67         8
      RF_3_2       0.00      0.00      0.00         0
      RF_4_0       1.00      0.55      0.71        11
      RF_4_1       0.00      0.00      0.00         0
      RF_4_2       0.00      0.00      0.00         0
      RF_4_3       0.00      0.00      0.00         0
      RF_6_0       0.00      0.00      0.00         0
      RF_6_1       0.00      0.00      0.00         0
      RF_6_2       0.00      0.00      0.00         0
      RF_6_3       0.00      0.00      0.00         0
      RF_6_4       0.00      0.00      0.00         0
      RF_6_5       0.00      0.00      0.00         0
      RF_T_0       1.00      0.88      0.93         8

   micro avg       0.22   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [70]:
method_evaluation_results = []
for method in y_test.columns:
    # Calculate true positives (correct assignments)
    true_positives = ((y_pred == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((y_pred == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((y_pred == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0 

    # Calculate F1-Score
    if (precision + recall) > 0:
        f1_score = (2 * precision * recall) / (precision + recall)
    else:
        f1_score = 0

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': f'{precision * 100 :.2f}',
        'Recall (%)': f'{recall * 100 :.2f}',
        'F1-Score': f'{f1_score * 100 :.2f}'
    })
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

In [71]:
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%),F1-Score
0,RF_1_0,0,0,0,0.0,0.0,0.0
1,RF_2_0,0,2,0,0.0,0.0,0.0
2,RF_2_1,3,1,1,75.0,75.0,75.0
3,RF_3_0,0,6,0,0.0,0.0,0.0
4,RF_3_1,4,0,4,100.0,50.0,66.67
5,RF_3_2,0,5,0,0.0,0.0,0.0
6,RF_4_0,6,0,5,100.0,54.55,70.59
7,RF_4_1,0,7,0,0.0,0.0,0.0
8,RF_4_2,0,5,0,0.0,0.0,0.0
9,RF_4_3,0,5,0,0.0,0.0,0.0


## Model probabilities

In [38]:
y_pred_proba

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
0,0.0,0.27,0.91,0.79,0.92,0.97,0.55,0.95,0.95,0.99,0.97,0.94,0.96,0.98,0.95,0.82,0.93
1,0.01,0.79,0.96,0.98,0.99,0.99,0.98,0.94,0.9,0.82,0.97,0.86,0.94,0.92,0.64,0.49,0.67
2,0.03,0.16,0.01,0.17,0.03,0.01,0.18,0.17,0.07,0.03,0.63,0.14,0.18,0.36,0.06,0.02,0.47
3,0.06,0.16,0.03,0.14,0.0,0.0,0.09,0.03,0.04,0.02,0.49,0.17,0.17,0.39,0.04,0.0,0.33
4,0.01,0.04,0.11,0.24,0.33,0.65,0.48,0.7,0.75,0.98,0.83,0.81,0.96,0.99,0.94,0.9,0.93
5,0.02,0.37,0.15,0.53,0.5,0.08,0.55,0.39,0.32,0.14,0.34,0.18,0.3,0.37,0.28,0.18,0.49
6,0.05,0.18,0.11,0.26,0.14,0.06,0.47,0.51,0.16,0.16,0.86,0.68,0.77,0.53,0.33,0.47,0.56
7,0.0,0.05,0.08,0.32,0.28,0.69,0.48,0.8,0.97,0.99,0.91,0.95,0.99,0.99,0.96,0.98,0.97
8,0.01,0.41,0.89,0.74,0.92,0.95,0.82,0.95,0.93,0.98,0.96,0.84,0.94,0.92,0.98,0.91,0.96
9,0.03,0.33,0.1,0.63,0.42,0.08,0.56,0.39,0.31,0.13,0.31,0.18,0.27,0.28,0.2,0.13,0.5


### Select top K

In [39]:
ranked_methods = np.argsort(-y_pred_proba, axis=1)
# Top-K Selection: Select top-K methods based on ranking
TOP_K = 3
top_k_methods = np.zeros_like(y_pred_proba, dtype=int)

# Set top-K methods as selected (binary)
for i, row in enumerate(ranked_methods):
    top_indices = row[:TOP_K]  # Get top-K indices for this instance
    top_k_methods[i, top_indices] = 1

# Convert to DataFrame for comparison
top_k_methods_df = pd.DataFrame(top_k_methods, columns=y_test.columns)

# Display the top-K selected methods
top_k_methods_df.head()

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0
1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1
3,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1
4,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0


In [40]:
len(test_set)

12

In [41]:
# Element-wise multiplication to find correct predictions
correct_predictions = top_k_methods_df * test_set

# Sum the total number of correct predictions
total_correct = correct_predictions.sum().sum()  # Sum across all rows and columns
print(f"Total Correct Predictions: {total_correct}")

# Optionally, calculate the number of correct predictions per instance
correct_per_instance = correct_predictions.sum(axis=1)
print("\nCorrect Predictions Per Instance:")
print(correct_per_instance)

# Optionally, calculate the accuracy as a percentage
total_possible = TOP_K * len(top_k_methods_df)  # Total number of actual positive labels
accuracy = (total_correct / total_possible) * 100
print(f"\nAccuracy: {accuracy:.2f}%")

Total Correct Predictions: 8.0

Correct Predictions Per Instance:
0     0.0
1     1.0
2     1.0
3     1.0
4     0.0
5     2.0
6     0.0
7     0.0
8     0.0
9     2.0
10    0.0
11    1.0
dtype: float64

Accuracy: 22.22%


In [42]:
# Step 1: Calculate the number of correct predictions per row (instance)
correct_per_instance = correct_predictions.sum(axis=1)

# Step 2: Identify rows with zero correct predictions
missed_all = (correct_per_instance == 0).sum()  # Count rows with no correct predictions
total_instances = len(correct_predictions)  # Total number of rows

# Step 3: Calculate the accuracy for missed rows
missed_accuracy = (missed_all / total_instances) * 100

# Output the results
print(f"Number of Rows with All Incorrect Predictions: {missed_all}")
print(f"Percentage of Rows with All Incorrect Predictions: {missed_accuracy:.2f}%")

Number of Rows with All Incorrect Predictions: 6
Percentage of Rows with All Incorrect Predictions: 50.00%


### Performance what if only 1 method

In [43]:
# Step 1: Initialize a DataFrame to store results
method_evaluation_results = []

# Step 2: Loop through each method
for method in y_test.columns:
    # Create a binary matrix where the current method is always 1 and others are 0
    single_method_matrix = np.zeros_like(y_test, dtype=int)
    single_method_matrix[:, y_test.columns.get_loc(method)] = 1  # Set current method column to 1
    
    # Convert to DataFrame
    single_method_df = pd.DataFrame(single_method_matrix, columns=y_test.columns, index=y_test.index)

    # Calculate true positives (correct assignments)
    true_positives = ((single_method_df == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((single_method_df == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((single_method_df == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # Handle division by zero

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # Handle division by zero

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': precision * 100,
        'Recall (%)': recall * 100
    })

In [44]:
# Step 3: Convert results to a DataFrame for easier visualization
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

# Display the evaluation results
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%)
0,RF_1_0,0,12,0,0.0,0.0
1,RF_2_0,0,12,0,0.0,0.0
2,RF_2_1,4,8,0,33.333333,100.0
3,RF_3_0,0,12,0,0.0,0.0
4,RF_3_1,8,4,0,66.666667,100.0
5,RF_3_2,0,12,0,0.0,0.0
6,RF_4_0,11,1,0,91.666667,100.0
7,RF_4_1,0,12,0,0.0,0.0
8,RF_4_2,0,12,0,0.0,0.0
9,RF_4_3,0,12,0,0.0,0.0
