In [1]:
import functions_ml as fml
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import hamming_loss, f1_score, jaccard_score, accuracy_score, multilabel_confusion_matrix, classification_report

Depois posso separar uma análise para problemas que não haviam no teste (30 plantas, 5 produtos, etc) (o dataset mantém o índice no y_test, basta fazer merge das colunas para filtrar plantas, produtos e dps dar drop)

# Oracle

In [2]:
# Load model
with open('trained_models/oracle.pkl', 'rb') as f:
    model = pickle.load(f)

# Test set ("new instances")

In [3]:
TEST_TOLERANCE_LIMIT = 0.01 / 100
test_results = pd.read_csv('datasets/test_instances_results.csv')
test_features = pd.read_csv('datasets/test_instances_features.csv')
test_set = fml.create_dataset(test_features, test_results)
test_set = fml.create_multi_label_target(test_set, TEST_TOLERANCE_LIMIT)

In [4]:
target_cols = [t for t in test_set.columns if t.startswith('RF')]
for t in target_cols:
    print(t, ':', test_set[t].sum())

RF_1_0 : 4
RF_2_0 : 8
RF_2_1 : 6
RF_3_0 : 8
RF_3_1 : 8
RF_3_2 : 11
RF_4_0 : 11
RF_4_1 : 10
RF_4_2 : 10
RF_4_3 : 8
RF_6_0 : 12
RF_6_1 : 10
RF_6_2 : 12
RF_6_3 : 10
RF_6_4 : 11
RF_6_5 : 6
RF_T_0 : 8


In [5]:
# Preprocessing (deve ser igual ao aplicado no desenvolvimento)
X_test = test_set.drop(columns=target_cols + ['instance'])
X_test = fml.binary_feature_selection(X_test)
y_test = test_set[target_cols]

## Predict

In [6]:
y_pred = pd.DataFrame(model.predict(X_test), columns=y_test.columns, index=y_test.index)
y_pred_proba = pd.DataFrame(model.predict_proba(X_test), columns=y_test.columns, index=y_test.index)

In [7]:
hamming = hamming_loss(y_test, y_pred)
print("Hamming Loss:", hamming)

f1 = f1_score(y_test, y_pred, average='micro')
print("Micro-Averaged F1 Score:", f1)

jaccard = jaccard_score(y_test, y_pred, average='samples')
print("Jaccard Similarity Score:", jaccard)

subset_accuracy = accuracy_score(y_test, y_pred)
print("Subset Accuracy:", subset_accuracy)

Hamming Loss: 0.18382352941176472
Micro-Averaged F1 Score: 0.6938775510204082
Jaccard Similarity Score: 0.302789744977245
Subset Accuracy: 0.08333333333333333


In [8]:
print(classification_report(y_test, y_pred, target_names=y_test.columns, zero_division="warn"))

              precision    recall  f1-score   support

      RF_1_0       0.33      0.25      0.29         4
      RF_2_0       0.50      0.12      0.20         8
      RF_2_1       0.75      0.50      0.60         6
      RF_3_0       0.83      0.62      0.71         8
      RF_3_1       1.00      0.50      0.67         8
      RF_3_2       1.00      0.45      0.62        11
      RF_4_0       1.00      0.55      0.71        11
      RF_4_1       0.86      0.60      0.71        10
      RF_4_2       1.00      0.50      0.67        10
      RF_4_3       1.00      0.62      0.77         8
      RF_6_0       1.00      0.67      0.80        12
      RF_6_1       1.00      0.70      0.82        10
      RF_6_2       1.00      0.58      0.74        12
      RF_6_3       1.00      0.70      0.82        10
      RF_6_4       1.00      0.45      0.62        11
      RF_6_5       0.75      0.50      0.60         6
      RF_T_0       1.00      0.88      0.93         8

   micro avg       0.92   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
method_evaluation_results = []
for method in y_test.columns:
    # Calculate true positives (correct assignments)
    true_positives = ((y_pred == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((y_pred == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((y_pred == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0 

    # Calculate F1-Score
    if (precision + recall) > 0:
        f1_score = (2 * precision * recall) / (precision + recall)
    else:
        f1_score = 0

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': f'{precision * 100 :.2f}',
        'Recall (%)': f'{recall * 100 :.2f}',
        'F1-Score': f'{f1_score * 100 :.2f}'
    })
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

In [10]:
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%),F1-Score
0,RF_1_0,1,2,3,33.33,25.0,28.57
1,RF_2_0,1,1,7,50.0,12.5,20.0
2,RF_2_1,3,1,3,75.0,50.0,60.0
3,RF_3_0,5,1,3,83.33,62.5,71.43
4,RF_3_1,4,0,4,100.0,50.0,66.67
5,RF_3_2,5,0,6,100.0,45.45,62.5
6,RF_4_0,6,0,5,100.0,54.55,70.59
7,RF_4_1,6,1,4,85.71,60.0,70.59
8,RF_4_2,5,0,5,100.0,50.0,66.67
9,RF_4_3,5,0,3,100.0,62.5,76.92


## Model probabilities

In [18]:
y_test

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
2,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,0
0,0,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0
4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1
23,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1
21,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
7,0,0,0,0,0,1,0,0,0,1,1,1,1,1,1,1,1
16,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0
18,0,1,0,0,0,0,1,1,0,0,1,0,1,1,0,0,0
17,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1


In [30]:
y_pred

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
2,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
23,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
16,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0


### Select top K

In [26]:
ranked_methods = np.argsort(-y_pred_proba, axis=1)
# Top-K Selection: Select top-K methods based on ranking
TOP_K = 3
top_k_methods = np.zeros_like(y_pred_proba, dtype=int)

# Set top-K methods as selected (binary)
for i, row in enumerate(ranked_methods):
    top_indices = row[:TOP_K]  # Get top-K indices for this instance
    top_k_methods[i, top_indices] = 1

# Convert to DataFrame for comparison
top_k_methods_df = pd.DataFrame(top_k_methods, columns=y_test.columns, index=y_test.index)

# Display the top-K selected methods
top_k_methods_df.head()

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1
4,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
13,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0
23,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0


In [27]:
len(test_set)

24

In [28]:
# Element-wise multiplication to find correct predictions
correct_predictions = top_k_methods_df * test_set

# Sum the total number of correct predictions
total_correct = correct_predictions.sum().sum()  # Sum across all rows and columns
print(f"Total Correct Predictions: {total_correct}")

# Optionally, calculate the number of correct predictions per instance
correct_per_instance = correct_predictions.sum(axis=1)
print("\nCorrect Predictions Per Instance:")
print(correct_per_instance)

# Optionally, calculate the accuracy as a percentage
total_possible = TOP_K * len(top_k_methods_df)  # Total number of actual positive labels
accuracy = (total_correct / total_possible) * 100
print(f"\nAccuracy: {accuracy:.2f}%")

Total Correct Predictions: 40.0

Correct Predictions Per Instance:
2     2.0
0     2.0
4     1.0
13    3.0
23    3.0
21    0.0
7     3.0
16    3.0
18    2.0
17    3.0
14    0.0
12    2.0
10    3.0
8     0.0
3     1.0
15    1.0
20    0.0
19    1.0
22    0.0
5     1.0
1     2.0
6     3.0
11    1.0
9     3.0
dtype: float64

Accuracy: 55.56%


In [29]:
# Step 1: Calculate the number of correct predictions per row (instance)
correct_per_instance = correct_predictions.sum(axis=1)

# Step 2: Identify rows with zero correct predictions
missed_all = (correct_per_instance == 0).sum()  # Count rows with no correct predictions
total_instances = len(correct_predictions)  # Total number of rows

# Step 3: Calculate the accuracy for missed rows
missed_accuracy = (missed_all / total_instances) * 100

# Output the results
print(f"Number of Rows with All Incorrect Predictions: {missed_all}")
print(f"Percentage of Rows with All Incorrect Predictions: {missed_accuracy:.2f}%")

Number of Rows with All Incorrect Predictions: 5
Percentage of Rows with All Incorrect Predictions: 20.83%


### Performance what if only 1 method

In [16]:
# Step 1: Initialize a DataFrame to store results
method_evaluation_results = []

# Step 2: Loop through each method
for method in y_test.columns:
    # Create a binary matrix where the current method is always 1 and others are 0
    single_method_matrix = np.zeros_like(y_test, dtype=int)
    single_method_matrix[:, y_test.columns.get_loc(method)] = 1  # Set current method column to 1
    
    # Convert to DataFrame
    single_method_df = pd.DataFrame(single_method_matrix, columns=y_test.columns, index=y_test.index)

    # Calculate true positives (correct assignments)
    true_positives = ((single_method_df == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((single_method_df == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((single_method_df == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # Handle division by zero

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # Handle division by zero

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': precision * 100,
        'Recall (%)': recall * 100
    })

In [17]:
# Step 3: Convert results to a DataFrame for easier visualization
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

# Display the evaluation results
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%)
0,RF_1_0,4,20,0,16.666667,100.0
1,RF_2_0,8,16,0,33.333333,100.0
2,RF_2_1,6,18,0,25.0,100.0
3,RF_3_0,8,16,0,33.333333,100.0
4,RF_3_1,8,16,0,33.333333,100.0
5,RF_3_2,11,13,0,45.833333,100.0
6,RF_4_0,11,13,0,45.833333,100.0
7,RF_4_1,10,14,0,41.666667,100.0
8,RF_4_2,10,14,0,41.666667,100.0
9,RF_4_3,8,16,0,33.333333,100.0
