In [67]:
import functions_ml as fml
import pandas as pd
import numpy as np
import pickle
from sklearn.multioutput import MultiOutputClassifier, ClassifierChain
from sklearn.ensemble import RandomForestClassifier
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
from sklearn.metrics import hamming_loss, f1_score, jaccard_score, accuracy_score, multilabel_confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.calibration import CalibratedClassifierCV

In [68]:
# PARAMETERS
TRAIN_TOLERANCE_LIMIT = 0.1 / 100

In [69]:
# 1. Get dataset
results = pd.read_csv('datasets/instances_results.csv')
features = pd.read_csv('datasets/multi_plant_instance_features.csv')
dataset = fml.create_dataset(features, results)

In [70]:
# 2. Create target columns for multi label classification
dataset = fml.create_multi_label_target(dataset, TRAIN_TOLERANCE_LIMIT)

In [71]:
target_col = [t for t in dataset.columns if t.startswith('RF')]
for t in target_col:
    print(t, ':', dataset[t].sum())

RF_1_0 : 232
RF_2_0 : 295
RF_2_1 : 275
RF_3_0 : 284
RF_3_1 : 298
RF_3_2 : 265
RF_4_0 : 280
RF_4_1 : 312
RF_4_2 : 291
RF_4_3 : 255
RF_6_0 : 304
RF_6_1 : 308
RF_6_2 : 328
RF_6_3 : 346
RF_6_4 : 324
RF_6_5 : 277
RF_T_0 : 333


In [72]:
# Extract features (X) and labels (y)
X = dataset.drop(columns=target_col + ['instance'])
X = fml.binary_feature_selection(X)
y = dataset[target_col]

In [73]:
validation_index = [460, 199, 349, 213, 347, 303, 515, 404, 309, 344,  79,  62,
       228, 415, 259, 554, 416, 290,  94, 507, 338,  47, 306, 129, 417, 324,
        86, 131, 263,   0, 429, 445, 299,  24, 253, 137, 448, 320, 335, 234,
       219, 381, 301, 479, 414, 238,  92, 329, 368, 501,  65, 237, 332, 134,
       291, 582, 257, 590, 503, 278, 345, 403,  16,  20, 376, 116, 523]
# Calculate train indices by excluding validation indices from all possible indices
all_indices = np.arange(len(X))
train_index = np.setdiff1d(all_indices, validation_index)
# Use these indices to create training and testing datasets
X_train, X_test = X.loc[train_index], X.loc[validation_index]
y_train, y_test = y.loc[train_index], y.loc[validation_index]

In [74]:
# # Initialize MultilabelStratifiedShuffleSplit
# msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)

# # Get train-validation split indices
# for train_index, validation_index in msss.split(X, y):
#     X_train, X_test = X.iloc[train_index], X.iloc[validation_index]
#     y_train, y_test = y.iloc[train_index], y.iloc[validation_index]

In [75]:
# Define the base RandomForestClassifier
base_rf_model = RandomForestClassifier(
    random_state=2112,
    n_estimators=100,
    max_depth=20,
    max_features='log2',
    min_samples_leaf=1,
    min_samples_split=2,
    class_weight='balanced'
)

# Wrap the RandomForestClassifier with CalibratedClassifierCV
calibrated_rf_model = CalibratedClassifierCV(
    estimator=base_rf_model,
    method='sigmoid',  # Can also use 'isotonic' for larger datasets
    cv=5               # 5-fold cross-validation for calibration
)

# Define the ClassifierChain
model = ClassifierChain(
    base_estimator=calibrated_rf_model,  # Use the calibrated classifier
    order=[i for i in range(len(y.columns))][::-1]  # Reverse order
)

# Train the ClassifierChain
model.fit(X_train, y_train)

In [76]:
# Train classifier on full data and save
# oracle = ClassifierChain(RandomForestClassifier(random_state=2112, n_estimators=100, max_depth=20, max_features='log2', min_samples_leaf=1, min_samples_split=2), order=[i for i in range(len(y.columns))][::-1])
# oracle.fit(X, y)
# with open('trained_models/oracle.pkl','wb') as f:
#     pickle.dump(oracle,f)

# Predictions on validation set

In [77]:
# Predict on the validation set
y_pred = pd.DataFrame(model.predict(X_test), columns=y_test.columns, index=y_test.index)
y_pred_proba = pd.DataFrame(model.predict_proba(X_test), columns=y_test.columns, index=y_test.index)

In [78]:
hamming = hamming_loss(y_test, y_pred)
print("Hamming Loss:", hamming)

f1 = f1_score(y_test, y_pred, average='micro')
print("Micro-Averaged F1 Score:", f1)

jaccard = jaccard_score(y_test, y_pred, average='samples')
print("Jaccard Similarity Score:", jaccard)

subset_accuracy = accuracy_score(y_test, y_pred)
print("Subset Accuracy:", subset_accuracy)

Hamming Loss: 0.09306409130816505
Micro-Averaged F1 Score: 0.8781609195402299
Jaccard Similarity Score: 0.5753546619218262
Subset Accuracy: 0.3283582089552239


In [79]:
method_evaluation_results = []
for method in y_test.columns:
    # Calculate true positives (correct assignments)
    true_positives = ((y_pred == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((y_pred == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((y_pred == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # Handle division by zero

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # Handle division by zero

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': precision * 100,
        'Recall (%)': recall * 100,
        'F1-Score (%)': (2 * precision * recall) / (precision + recall) * 100
    })

In [80]:
print(classification_report(y_test, y_pred, target_names=y_test.columns, zero_division="warn"))

              precision    recall  f1-score   support

      RF_1_0       0.83      0.88      0.86        17
      RF_2_0       0.95      0.78      0.86        27
      RF_2_1       0.91      0.80      0.85        25
      RF_3_0       0.83      0.83      0.83        24
      RF_3_1       0.91      0.71      0.80        28
      RF_3_2       0.95      0.86      0.90        22
      RF_4_0       0.95      0.73      0.83        26
      RF_4_1       0.88      0.77      0.82        30
      RF_4_2       1.00      0.74      0.85        27
      RF_4_3       0.95      0.88      0.91        24
      RF_6_0       1.00      0.93      0.96        29
      RF_6_1       0.96      0.89      0.93        28
      RF_6_2       0.89      0.81      0.85        31
      RF_6_3       0.82      0.88      0.85        32
      RF_6_4       0.93      0.87      0.90        31
      RF_6_5       0.95      0.88      0.91        24
      RF_T_0       1.00      0.97      0.98        32

   micro avg       0.92   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [81]:
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)
print(method_evaluation_results_df.to_latex())

\begin{tabular}{llrrrrrr}
\toprule
 & Method & True Positives & False Positives & False Negatives & Precision (%) & Recall (%) & F1-Score (%) \\
\midrule
0 & RF_1_0 & 15 & 3 & 2 & 83.333333 & 88.235294 & 85.714286 \\
1 & RF_2_0 & 21 & 1 & 6 & 95.454545 & 77.777778 & 85.714286 \\
2 & RF_2_1 & 20 & 2 & 5 & 90.909091 & 80.000000 & 85.106383 \\
3 & RF_3_0 & 20 & 4 & 4 & 83.333333 & 83.333333 & 83.333333 \\
4 & RF_3_1 & 20 & 2 & 8 & 90.909091 & 71.428571 & 80.000000 \\
5 & RF_3_2 & 19 & 1 & 3 & 95.000000 & 86.363636 & 90.476190 \\
6 & RF_4_0 & 19 & 1 & 7 & 95.000000 & 73.076923 & 82.608696 \\
7 & RF_4_1 & 23 & 3 & 7 & 88.461538 & 76.666667 & 82.142857 \\
8 & RF_4_2 & 20 & 0 & 7 & 100.000000 & 74.074074 & 85.106383 \\
9 & RF_4_3 & 21 & 1 & 3 & 95.454545 & 87.500000 & 91.304348 \\
10 & RF_6_0 & 27 & 0 & 2 & 100.000000 & 93.103448 & 96.428571 \\
11 & RF_6_1 & 25 & 1 & 3 & 96.153846 & 89.285714 & 92.592593 \\
12 & RF_6_2 & 25 & 3 & 6 & 89.285714 & 80.645161 & 84.745763 \\
13 & RF_6_3 & 28 & 6 &

In [82]:
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%),Recall (%),F1-Score (%)
0,RF_1_0,15,3,2,83.333333,88.235294,85.714286
1,RF_2_0,21,1,6,95.454545,77.777778,85.714286
2,RF_2_1,20,2,5,90.909091,80.0,85.106383
3,RF_3_0,20,4,4,83.333333,83.333333,83.333333
4,RF_3_1,20,2,8,90.909091,71.428571,80.0
5,RF_3_2,19,1,3,95.0,86.363636,90.47619
6,RF_4_0,19,1,7,95.0,73.076923,82.608696
7,RF_4_1,23,3,7,88.461538,76.666667,82.142857
8,RF_4_2,20,0,7,100.0,74.074074,85.106383
9,RF_4_3,21,1,3,95.454545,87.5,91.304348


## Model probabilities

### Select top K

In [83]:
ranked_methods = np.argsort(-y_pred_proba, axis=1)
# Top-K Selection: Select top-K methods based on ranking
TOP_K = 1
top_k_methods = np.zeros_like(y_pred_proba, dtype=int)

# Set top-K methods as selected (binary)
for i, row in enumerate(ranked_methods):
    top_indices = row[:TOP_K]  # Get top-K indices for this instance
    top_k_methods[i, top_indices] = 1

# Convert to DataFrame for comparison
top_k_methods_df = pd.DataFrame(top_k_methods, columns=y_test.columns, index=y_test.index)

# Display the top-K selected methods
top_k_methods_df.head()

Unnamed: 0,RF_1_0,RF_2_0,RF_2_1,RF_3_0,RF_3_1,RF_3_2,RF_4_0,RF_4_1,RF_4_2,RF_4_3,RF_6_0,RF_6_1,RF_6_2,RF_6_3,RF_6_4,RF_6_5,RF_T_0
460,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
199,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
349,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
213,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
347,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [84]:
# Element-wise multiplication to find correct predictions
correct_predictions = top_k_methods_df * y_test

# Sum the total number of correct predictions
total_correct = correct_predictions.sum().sum()  # Sum across all rows and columns
print(f"Total Correct Predictions: {total_correct}")

# Optionally, calculate the number of correct predictions per instance
correct_per_instance = correct_predictions.sum(axis=1)
print("\nCorrect Predictions Per Instance:")
print(correct_per_instance)

# Optionally, calculate the accuracy as a percentage
total_possible = y_test.sum().sum()  # Total number of actual positive labels
accuracy = (total_correct / total_possible) * 100
print(f"\nAccuracy: {accuracy:.2f}%")

# Precision
if TOP_K == 1:
    precision = total_correct / len(y_test) * 100
    print(f"Precision: {precision:.2f}%")

Total Correct Predictions: 54

Correct Predictions Per Instance:
460    1
199    1
349    1
213    1
347    0
      ..
16     1
20     1
376    1
116    1
523    1
Length: 67, dtype: int64

Accuracy: 11.82%
Precision: 80.60%


In [85]:
# Step 1: Calculate the number of correct predictions per row (instance)
correct_per_instance = correct_predictions.sum(axis=1)

# Step 2: Identify rows with zero correct predictions
missed_all = (correct_per_instance == 0).sum()  # Count rows with no correct predictions
total_instances = len(correct_predictions)  # Total number of rows

# Step 3: Calculate the accuracy for missed rows
accuracy = (1 - (missed_all / total_instances)) * 100

# Output the results
print(f"Number of Rows with All Incorrect Predictions: {missed_all}")
print(f"Percentage of Rows with at least one correct Prediction: {accuracy:.2f}%")

Number of Rows with All Incorrect Predictions: 13
Percentage of Rows with at least one correct Prediction: 80.60%


### Performance what if only 1 method

In [86]:
# Step 1: Initialize a DataFrame to store results
method_evaluation_results = []

# Step 2: Loop through each method
for method in y_test.columns:
    # Create a binary matrix where the current method is always 1 and others are 0
    single_method_matrix = np.zeros_like(y_test, dtype=int)
    single_method_matrix[:, y_test.columns.get_loc(method)] = 1  # Set current method column to 1
    
    # Convert to DataFrame
    single_method_df = pd.DataFrame(single_method_matrix, columns=y_test.columns, index=y_test.index)

    # Calculate true positives (correct assignments)
    true_positives = ((single_method_df == 1) & (y_test == 1))[method].sum()

    # Calculate false positives (predicted as 1 but actual is 0)
    false_positives = ((single_method_df == 1) & (y_test == 0))[method].sum()

    # Calculate false negatives (predicted as 0 but actual is 1)
    false_negatives = ((single_method_df == 0) & (y_test == 1))[method].sum()

    # Calculate Precision
    if (true_positives + false_positives) > 0:
        precision = true_positives / (true_positives + false_positives)
    else:
        precision = 0.0  # Handle division by zero

    # Calculate Recall
    if (true_positives + false_negatives) > 0:
        recall = true_positives / (true_positives + false_negatives)
    else:
        recall = 0.0  # Handle division by zero

    # Append results to the list
    method_evaluation_results.append({
        'Method': method,
        'True Positives': true_positives,
        'False Positives': false_positives,
        'False Negatives': false_negatives,
        'Precision (%)': precision * 100,
        # 'Recall (%)': recall * 100
    })

In [87]:
method_evaluation_results_df = pd.DataFrame(method_evaluation_results)
method_evaluation_results_df

Unnamed: 0,Method,True Positives,False Positives,False Negatives,Precision (%)
0,RF_1_0,17,50,0,25.373134
1,RF_2_0,27,40,0,40.298507
2,RF_2_1,25,42,0,37.313433
3,RF_3_0,24,43,0,35.820896
4,RF_3_1,28,39,0,41.791045
5,RF_3_2,22,45,0,32.835821
6,RF_4_0,26,41,0,38.80597
7,RF_4_1,30,37,0,44.776119
8,RF_4_2,27,40,0,40.298507
9,RF_4_3,24,43,0,35.820896


In [88]:
print(method_evaluation_results_df.to_latex())

\begin{tabular}{llrrrr}
\toprule
 & Method & True Positives & False Positives & False Negatives & Precision (%) \\
\midrule
0 & RF_1_0 & 17 & 50 & 0 & 25.373134 \\
1 & RF_2_0 & 27 & 40 & 0 & 40.298507 \\
2 & RF_2_1 & 25 & 42 & 0 & 37.313433 \\
3 & RF_3_0 & 24 & 43 & 0 & 35.820896 \\
4 & RF_3_1 & 28 & 39 & 0 & 41.791045 \\
5 & RF_3_2 & 22 & 45 & 0 & 32.835821 \\
6 & RF_4_0 & 26 & 41 & 0 & 38.805970 \\
7 & RF_4_1 & 30 & 37 & 0 & 44.776119 \\
8 & RF_4_2 & 27 & 40 & 0 & 40.298507 \\
9 & RF_4_3 & 24 & 43 & 0 & 35.820896 \\
10 & RF_6_0 & 29 & 38 & 0 & 43.283582 \\
11 & RF_6_1 & 28 & 39 & 0 & 41.791045 \\
12 & RF_6_2 & 31 & 36 & 0 & 46.268657 \\
13 & RF_6_3 & 32 & 35 & 0 & 47.761194 \\
14 & RF_6_4 & 31 & 36 & 0 & 46.268657 \\
15 & RF_6_5 & 24 & 43 & 0 & 35.820896 \\
16 & RF_T_0 & 32 & 35 & 0 & 47.761194 \\
\bottomrule
\end{tabular}

