In [2411]:
import functions_ml as fml
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import hamming_loss, f1_score, jaccard_score, accuracy_score, multilabel_confusion_matrix, classification_report

Depois posso separar uma análise para problemas que não haviam no teste (30 plantas, 5 produtos, etc) (o dataset mantém o índice no y_test, basta fazer merge das colunas para filtrar plantas, produtos e dps dar drop)

# Oracle

In [2412]:
# Load model
TOLERANCE = 0.01
with open(f'trained_models/oracle_{TOLERANCE}percent.pkl', 'rb') as f:
    model = pickle.load(f)

# Get train set features

In [2413]:
results = pd.read_csv('datasets/instances_results.csv')
features = pd.read_csv('datasets/multi_plant_instance_features.csv')
dataset = fml.create_dataset(features, results)
dataset = fml.create_multi_label_target(dataset, TOLERANCE / 100)
target_col = [t for t in dataset.columns if t.startswith('RF')]
X_train = dataset.drop(columns=target_col + ['instance'])
X_train = fml.binary_feature_selection(X_train)
# X_train = fml.multi_class_feature_selection(X_train)
del results, features, dataset, target_col

# Test set ("new instances")

In [2414]:
TEST_TOLERANCE_LIMIT = TOLERANCE / 100
test_results = pd.read_csv('datasets/test_instances_results.csv')
test_features = pd.read_csv('datasets/test_instances_features.csv')
test_set = fml.create_dataset(test_features, test_results)
test_set = fml.create_multi_label_target(test_set, TEST_TOLERANCE_LIMIT)

In [2415]:
target_cols = [t for t in test_set.columns if t.startswith('RF')]
for t in target_cols:
    print(t, ':', test_set[t].sum())

RF_1_0 : 17
RF_2_0 : 21
RF_2_1 : 19
RF_3_0 : 24
RF_3_1 : 23
RF_3_2 : 34
RF_4_0 : 32
RF_4_1 : 33
RF_4_2 : 39
RF_4_3 : 39
RF_6_0 : 49
RF_6_1 : 39
RF_6_2 : 46
RF_6_3 : 51
RF_6_4 : 49
RF_6_5 : 44
RF_T_0 : 66


# Conjuntos de instâncias
Separar em tamanhos vistos e não vistos, pois estes podem ter comportamentos diferentes dos planos dos modelo

In [2416]:
seen_nplants = [2, 4, 6, 15, 20]
seen_nproducts = [10, 60, 120]
unseen_nproducts = [p for p in test_set['num_products'].unique() if p not in seen_nproducts and p > max(seen_nproducts) or p < min(seen_nproducts)] # [np.int64(30), np.int64(150), np.int64(5)]
unseen_nplants = [p for p in test_set['num_plants'].unique() if p not in seen_nplants and p > max(seen_nplants)]  # [np.int64(30), np.int64(10)]

In [2417]:
test_set = test_set.loc[~(test_set['num_products'].isin(unseen_nproducts)) &
                        ~(test_set['num_plants'].isin(unseen_nplants))]

# Preprocessing (deve ser igual ao aplicado no desenvolvimento)

In [2418]:
X_test = test_set[X_train.columns]
y_test = test_set[target_cols]

## Predict

In [2419]:
y_pred = pd.DataFrame(model.predict(X_test), columns=y_test.columns, index=y_test.index)
probabilities = pd.DataFrame(model.predict_proba(X_test), columns=y_test.columns, index=y_test.index)

In [2420]:
hamming = hamming_loss(y_test, y_pred)
print("Hamming Loss:", hamming)

f1 = f1_score(y_test, y_pred, average='micro')
print("Micro-Averaged F1 Score:", f1)

jaccard = jaccard_score(y_test, y_pred, average='samples')
print("Jaccard Similarity Score:", jaccard)

subset_accuracy = accuracy_score(y_test, y_pred)
print("Subset Accuracy:", subset_accuracy)

Hamming Loss: 0.13878676470588236
Micro-Averaged F1 Score: 0.7584
Jaccard Similarity Score: 0.4262782139735265
Subset Accuracy: 0.15625


In [2421]:
print(classification_report(y_test, y_pred, target_names=y_test.columns, zero_division="warn"))

              precision    recall  f1-score   support

      RF_1_0       0.40      0.25      0.31         8
      RF_2_0       0.50      0.09      0.15        11
      RF_2_1       0.67      0.40      0.50        10
      RF_3_0       0.60      0.55      0.57        11
      RF_3_1       1.00      0.50      0.67        12
      RF_3_2       0.91      0.50      0.65        20
      RF_4_0       1.00      0.60      0.75        20
      RF_4_1       0.76      0.68      0.72        19
      RF_4_2       0.94      0.60      0.73        25
      RF_4_3       0.94      0.60      0.73        25
      RF_6_0       1.00      0.77      0.87        26
      RF_6_1       0.83      0.65      0.73        23
      RF_6_2       0.95      0.69      0.80        29
      RF_6_3       0.96      0.83      0.89        30
      RF_6_4       0.96      0.74      0.84        31
      RF_6_5       0.91      0.74      0.82        27
      RF_T_0       0.97      0.86      0.91        35

   micro avg       0.90   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2422]:
method_evaluation_results = []
for method in y_test.columns:
    # Calculate true positives (correct assignments)
    true_positives = ((y_pred == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((y_pred == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((y_pred == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0 

    # Calculate F1-Score
    if (precision + recall) > 0:
        f1_score = (2 * precision * recall) / (precision + recall)
    else:
        f1_score = 0

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': f'{precision * 100 :.2f}',
        'Recall (%)': f'{recall * 100 :.2f}',
        'F1-Score': f'{f1_score * 100 :.2f}'
    })
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

In [2423]:
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%),F1-Score
0,RF_1_0,2,3,6,40.0,25.0,30.77
1,RF_2_0,1,1,10,50.0,9.09,15.38
2,RF_2_1,4,2,6,66.67,40.0,50.0
3,RF_3_0,6,4,5,60.0,54.55,57.14
4,RF_3_1,6,0,6,100.0,50.0,66.67
5,RF_3_2,10,1,10,90.91,50.0,64.52
6,RF_4_0,12,0,8,100.0,60.0,75.0
7,RF_4_1,13,4,6,76.47,68.42,72.22
8,RF_4_2,15,1,10,93.75,60.0,73.17
9,RF_4_3,15,1,10,93.75,60.0,73.17


## Model methods choice

### Select top K

In [2424]:
def select_top_k_methods(probabilities_df, y_test, top_k, keep_probabilities=False):
    """
    Select the top-K methods based on probabilities.

    Parameters:
        probabilities_df (DataFrame): DataFrame of probabilities for each method.
        y_test (DataFrame): DataFrame with true labels for each method.
        top_k (int): Number of top methods to select.
        keep_probabilities (bool): If True, keep the probabilities of the top-K methods instead of marking them as 1.
    
    Returns:
        DataFrame: A DataFrame with either binary values or probabilities for the top-K methods.
    """
    # Ensure the input is a pandas DataFrame
    probabilities = probabilities_df.values  # Convert to numpy array for processing
    
    # Rank the methods based on probabilities in descending order
    ranked_methods = np.argsort(-probabilities, axis=1)
    
    # Initialize an array for top-K selection
    top_k_methods = np.zeros_like(probabilities)
    
    # Process each instance
    for i in range(probabilities.shape[0]):  # Loop over each row
        top_indices = ranked_methods[i, :top_k]  # Get top-K indices for this instance
        if keep_probabilities:
            top_k_methods[i, top_indices] = probabilities[i, top_indices]  # Assign probabilities
        else:
            top_k_methods[i, top_indices] = 1  # Mark as selected

    # Convert back to a DataFrame for consistency
    top_k_methods_df = pd.DataFrame(top_k_methods, columns=probabilities_df.columns, index=probabilities_df.index)
    return top_k_methods_df

In [2425]:
def evaluate_top_k_predictions(top_k_methods_df, y_test, top_k):
    """Evaluate top-K predictions against the true test set."""
    # Element-wise multiplication to find correct predictions
    correct_predictions = top_k_methods_df * y_test
    # Sum the total number of correct predictions
    total_correct = correct_predictions.sum().sum()  # Sum across all rows and columns
    # Accuracy as a percentage
    total_possible = top_k * len(top_k_methods_df)  # Total number of actual positive labels
    accuracy = (total_correct / total_possible) * 100
    # Precision (if top_k == 1)
    precision = None
    if top_k == 1:
        precision = total_correct / len(y_test) * 100
    # Correct predictions per instance
    correct_per_instance = correct_predictions.sum(axis=1)
    # Identify rows with zero correct predictions
    missed_all = (correct_per_instance == 0).sum()  # Count rows with no correct predictions
    total_instances = len(correct_predictions)  # Total number of rows
    # Percentage of rows with at least one correct prediction
    general_accuracy = (1 - (missed_all / total_instances)) * 100
    # Return metrics
    results = {
        # 'Total Correct Predictions': total_correct,
        # 'Accuracy (%)': accuracy,
        # 'Precision (%)': precision,
        # 'Missed Rows': missed_all,
        'General Accuracy (%)': general_accuracy
    }
    return results

In [2426]:
TOP_K = 1
top_k_methods_df_prob = select_top_k_methods(probabilities, y_test, top_k=TOP_K, keep_probabilities=True)
top_k_methods_df_bin = select_top_k_methods(probabilities, y_test, top_k=TOP_K, keep_probabilities=False)
evaluate_top_k_predictions(top_k_methods_df_bin, y_test, TOP_K)

{'General Accuracy (%)': np.float64(71.875)}

In [2427]:
# Step 1: Element-wise multiplication to find correct predictions
correct_predictions = top_k_methods_df_bin * y_test

# Step 2: Calculate the number of correct predictions per row
correct_per_row = correct_predictions.sum(axis=1)

# Step 3: Define the threshold (N) and filter rows where correct predictions >= N
N = 0 # Replace with your desired value for N
rows_with_at_least_N_correct = correct_per_row[correct_per_row == N]

# Step 4: Get the percentage of rows meeting the criterion
total_rows = len(correct_per_row)
percentage = (len(rows_with_at_least_N_correct) / total_rows) * 100

# Step 5: Display the results
print(f"Percentage of rows with {N} correct predictions: {percentage:.2f}%")

# Optionally, display these rows for further analysis
rows_with_N_correct_indices = rows_with_at_least_N_correct.index
matching_rows = top_k_methods_df_bin.loc[rows_with_N_correct_indices]
print("\nMatching Rows:")
# matching_rows

Percentage of rows with 0 correct predictions: 28.12%

Matching Rows:


In [2428]:
pd.concat([probabilities.loc[rows_with_N_correct_indices], test_set[['instance']].loc[rows_with_N_correct_indices]], axis=1)

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0,instance
87,0.027553,0.251735,0.047718,0.637484,0.28109,0.041166,0.559093,0.231432,0.054359,0.045834,0.43033,0.147037,0.201286,0.224419,0.073785,0.046207,0.214062,VAL01_12_20_60
110,0.03165,0.287897,0.166457,0.041562,0.157234,0.039836,0.051626,0.063133,0.052113,0.044919,0.049434,0.046457,0.032029,0.035737,0.059523,0.042065,0.052328,VAL11_12_10_30
84,0.594437,0.138878,0.101302,0.049191,0.060738,0.038923,0.05209,0.072566,0.04674,0.050321,0.048602,0.049577,0.03058,0.057365,0.062332,0.050632,0.058017,VAL10_12_6_120
70,0.02673,0.219804,0.051175,0.52717,0.370751,0.050564,0.523067,0.242577,0.063603,0.0452,0.403194,0.133865,0.208411,0.174052,0.100542,0.065407,0.218599,VAL00_12_20_60
36,0.023763,0.111859,0.09275,0.054651,0.145374,0.067391,0.052331,0.114409,0.162218,0.071789,0.051368,0.064314,0.049584,0.043073,0.103941,0.068054,0.064997,VAL11_12_10_10
85,0.026414,0.140578,0.108855,0.0468,0.08875,0.049249,0.051764,0.118592,0.062221,0.053414,0.050464,0.053345,0.065197,0.037801,0.092914,0.074668,0.067841,VAL10_12_10_10
75,0.028851,0.128356,0.077831,0.056138,0.127166,0.048039,0.060643,0.103895,0.054526,0.054898,0.073901,0.050446,0.042205,0.066789,0.120545,0.079018,0.07054,VAL10_12_6_30
63,0.047013,0.303,0.192543,0.052199,0.100803,0.037611,0.049114,0.078008,0.066412,0.045029,0.051659,0.048645,0.030494,0.037835,0.05356,0.043492,0.052299,VAL10_12_10_60
21,0.022005,0.081993,0.047804,0.041875,0.090538,0.038119,0.04769,0.064245,0.041486,0.045392,0.046085,0.045903,0.03393,0.043715,0.310265,0.153301,0.248221,VAL10_12_2_60
104,0.896782,0.151392,0.053952,0.077265,0.05741,0.036085,0.048318,0.066662,0.042397,0.048464,0.052523,0.050926,0.029877,0.041247,0.052788,0.042596,0.075879,VAL10_12_20_120


In [2429]:
pd.concat([y_test.loc[rows_with_N_correct_indices], test_set[['instance']].loc[rows_with_N_correct_indices]], axis=1)

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0,instance
87,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,0,1,VAL01_12_20_60
110,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,VAL11_12_10_30
84,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,VAL10_12_6_120
70,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,0,1,VAL00_12_20_60
36,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,VAL11_12_10_10
85,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,VAL10_12_10_10
75,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,VAL10_12_6_30
63,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,VAL10_12_10_60
21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,VAL10_12_2_60
104,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,VAL10_12_20_120


In [2430]:
top_k_methods_df_prob.loc[rows_with_N_correct_indices]

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
87,0.0,0.0,0.0,0.637484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110,0.0,0.287897,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84,0.594437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
70,0.0,0.0,0.0,0.52717,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.162218,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
85,0.0,0.140578,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75,0.0,0.128356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63,0.0,0.303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.310265,0.0,0.0
104,0.896782,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Performance what if only 1 method

In [2431]:
# Step 1: Initialize a DataFrame to store results
method_evaluation_results = []

# Step 2: Loop through each method
for method in y_test.columns:
    # Create a binary matrix where the current method is always 1 and others are 0
    single_method_matrix = np.zeros_like(y_test, dtype=int)
    single_method_matrix[:, y_test.columns.get_loc(method)] = 1  # Set current method column to 1
    
    # Convert to DataFrame
    single_method_df = pd.DataFrame(single_method_matrix, columns=y_test.columns, index=y_test.index)

    # Calculate true positives (correct assignments)
    true_positives = ((single_method_df == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((single_method_df == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((single_method_df == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # Handle division by zero

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # Handle division by zero

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': precision * 100,
        'Recall (%)': recall * 100
    })

In [2432]:
# Step 3: Convert results to a DataFrame for easier visualization
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)

# Display the evaluation results
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%)
0,RF_1_0,8,56,0,12.5,100.0
1,RF_2_0,11,53,0,17.1875,100.0
2,RF_2_1,10,54,0,15.625,100.0
3,RF_3_0,11,53,0,17.1875,100.0
4,RF_3_1,12,52,0,18.75,100.0
5,RF_3_2,20,44,0,31.25,100.0
6,RF_4_0,20,44,0,31.25,100.0
7,RF_4_1,19,45,0,29.6875,100.0
8,RF_4_2,25,39,0,39.0625,100.0
9,RF_4_3,25,39,0,39.0625,100.0
