In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:


def random_forest_pipeline_for_selected_features(datasets, target_column="Target", n_estimators=100, test_size=0.3, random_state=42, k_values=None, methods=None, output_dir="preprocessed/selected_features"):
    """
    Perform Random Forest classification on datasets with selected features.
    - datasets: List of dataset file paths (base names without method suffix).
    - target_column: Name of the target column.
    - n_estimators: Number of trees in the forest.
    - test_size: Proportion of data for testing.
    - random_state: Random state for reproducibility.
    - k_values: List of feature counts (k) used in feature selection.
    - methods: List of feature selection methods.
    - output_dir: Directory containing the selected feature files.
    """
    if k_values is None:
        k_values = [100, 200, 300, 400, 500]  # Default k values
    if methods is None:
        methods = ["std_mean", "anova", "chi2"]  # Default feature selection methods

    results = []  # Store results for comparison

    for dataset in datasets:
        dataset_name = dataset.split("/")[-1].replace("filtered_preprocessed_", "").replace(".csv", "")
        print(f"\nProcessing dataset: {dataset_name}")

        for method in methods:
            for k in k_values:
                print(f"\n  Feature selection method: {method.upper()}, k={k}")

                # Construct file path for the selected feature file
                selected_features_file = f"{output_dir}/{dataset_name}_{method}_k{k}_selected.csv"
                try:
                    # Load the dataset
                    data = pd.read_csv(selected_features_file)
                except FileNotFoundError:
                    print(f"File not found: {selected_features_file}. Skipping...")
                    continue

                # Separate features (X) and target (y)
                X = data.drop(columns=[target_column])
                y = data[target_column]

                # Split the data into training and testing sets
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, stratify=y, random_state=random_state)

                # Initialize and train the Random Forest classifier
                rf = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
                rf.fit(X_train, y_train)

                # Make predictions
                y_pred = rf.predict(X_test)

                # Evaluate the model
                accuracy = accuracy_score(y_test, y_pred)
                report = classification_report(y_test, y_pred)
                confusion = confusion_matrix(y_test, y_pred)

                print("\n    Random Forest Classifier Performance:")
                print(f"    Accuracy: {accuracy}")
                print(f"\n    Classification Report:\n{report}")
                print(f"\n    Confusion Matrix:\n{confusion}")

                # Store results
                results.append({
                    "Dataset": dataset_name,
                    "Feature Method": method.upper(),
                    "Feature Count (k)": k,
                    "Accuracy": accuracy,
                    "Classification Report": report,
                    "Confusion Matrix": confusion
                })

    return results

In [4]:
# Example usage
if __name__ == "__main__":
    # List of datasets (base file names without method suffix)
    datasets = [
        "preprocessed/filtered_preprocessed_GSE4290",
        "preprocessed/filtered_preprocessed_GSE19804",
        "preprocessed/filtered_preprocessed_GSE27562",
        "preprocessed/filtered_preprocessed_GSE33315",
        "preprocessed/filtered_preprocessed_GSE59856"
    ]

    target_column = "Target"  # Replace with the actual target column name

    print(f"Running Random Forest classification on multiple datasets with different feature selection methods and k values...")
    results = random_forest_pipeline_for_selected_features(datasets=datasets, target_column=target_column, n_estimators=100)

    # Display summary results
    for result in results:
        print("\nSummary for Dataset:", result["Dataset"])
        print("Feature Method:", result["Feature Method"])
        print("Feature Count (k):", result["Feature Count (k)"])
        print("Accuracy:", result["Accuracy"])


Running Random Forest classification on multiple datasets with different feature selection methods and k values...

Processing dataset: GSE4290

  Feature selection method: STD_MEAN, k=100


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.660377358490566

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.67      0.96      0.79        23
        non-tumor       1.00      0.29      0.44         7
oligodendroglioma       0.61      0.73      0.67        15

         accuracy                           0.66        53
        macro avg       0.57      0.49      0.47        53
     weighted avg       0.59      0.66      0.59        53


    Confusion Matrix:
[[ 0  4  0  4]
 [ 0 22  0  1]
 [ 0  3  2  2]
 [ 0  4  0 11]]

  Feature selection method: STD_MEAN, k=200

    Random Forest Classifier Performance:
    Accuracy: 0.7547169811320755

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.74      0.87      0.80        23
        non

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7735849056603774

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.75      0.91      0.82        23
        non-tumor       0.83      0.71      0.77         7
oligodendroglioma       0.79      1.00      0.88        15

         accuracy                           0.77        53
        macro avg       0.59      0.66      0.62        53
     weighted avg       0.66      0.77      0.71        53


    Confusion Matrix:
[[ 0  5  0  3]
 [ 0 21  1  1]
 [ 0  2  5  0]
 [ 0  0  0 15]]

  Feature selection method: STD_MEAN, k=400


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7358490566037735

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.72      0.91      0.81        23
        non-tumor       0.86      0.86      0.86         7
oligodendroglioma       0.71      0.80      0.75        15

         accuracy                           0.74        53
        macro avg       0.57      0.64      0.60        53
     weighted avg       0.63      0.74      0.68        53


    Confusion Matrix:
[[ 0  5  0  3]
 [ 0 21  1  1]
 [ 0  0  6  1]
 [ 0  3  0 12]]

  Feature selection method: STD_MEAN, k=500


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7358490566037735

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.83      0.87      0.85        23
        non-tumor       0.64      1.00      0.78         7
oligodendroglioma       0.67      0.80      0.73        15

         accuracy                           0.74        53
        macro avg       0.53      0.67      0.59        53
     weighted avg       0.63      0.74      0.68        53


    Confusion Matrix:
[[ 0  4  0  4]
 [ 0 20  1  2]
 [ 0  0  7  0]
 [ 0  0  3 12]]

  Feature selection method: ANOVA, k=100

    Random Forest Classifier Performance:
    Accuracy: 0.6792452830188679

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.69      0.87      0.77        23
        non-t

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.6981132075471698

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.71      0.87      0.78        23
        non-tumor       0.70      1.00      0.82         7
oligodendroglioma       0.67      0.67      0.67        15

         accuracy                           0.70        53
        macro avg       0.52      0.63      0.57        53
     weighted avg       0.59      0.70      0.64        53


    Confusion Matrix:
[[ 0  4  0  4]
 [ 0 20  2  1]
 [ 0  0  7  0]
 [ 0  4  1 10]]

  Feature selection method: ANOVA, k=300

    Random Forest Classifier Performance:
    Accuracy: 0.660377358490566

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.67      0.87      0.75        23
        non-tu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.6792452830188679

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.67      0.96      0.79        23
        non-tumor       0.88      1.00      0.93         7
oligodendroglioma       0.58      0.47      0.52        15

         accuracy                           0.68        53
        macro avg       0.53      0.61      0.56        53
     weighted avg       0.57      0.68      0.61        53


    Confusion Matrix:
[[ 0  4  0  4]
 [ 0 22  0  1]
 [ 0  0  7  0]
 [ 0  7  1  7]]

  Feature selection method: CHI2, k=200

    Random Forest Classifier Performance:
    Accuracy: 0.6792452830188679

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.50      0.12      0.20         8
     glioblastoma       0.70      0.83      0.76        23
        non-tu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.6792452830188679

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.65      0.87      0.74        23
        non-tumor       0.78      1.00      0.88         7
oligodendroglioma       0.69      0.60      0.64        15

         accuracy                           0.68        53
        macro avg       0.53      0.62      0.56        53
     weighted avg       0.58      0.68      0.62        53


    Confusion Matrix:
[[ 0  6  0  2]
 [ 0 20  1  2]
 [ 0  0  7  0]
 [ 0  5  1  9]]

  Feature selection method: CHI2, k=500


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.660377358490566

    Classification Report:
                   precision    recall  f1-score   support

      astrocytoma       0.00      0.00      0.00         8
     glioblastoma       0.67      0.78      0.72        23
        non-tumor       0.70      1.00      0.82         7
oligodendroglioma       0.62      0.67      0.65        15

         accuracy                           0.66        53
        macro avg       0.50      0.61      0.55        53
     weighted avg       0.56      0.66      0.60        53


    Confusion Matrix:
[[ 0  5  0  3]
 [ 0 18  2  3]
 [ 0  0  7  0]
 [ 0  4  1 10]]

Processing dataset: GSE19804

  Feature selection method: STD_MEAN, k=100

    Random Forest Classifier Performance:
    Accuracy: 0.9444444444444444

    Classification Report:
                        precision    recall  f1-score   support

           lung cancer       1.00      0.89      0.94        18
paired normal adjacent       0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.6976744186046512

    Classification Report:
              precision    recall  f1-score   support

      Benign       0.00      0.00      0.00        11
     Ectopic       0.78      1.00      0.88         7
   Malignant       0.58      0.94      0.71        16
      Normal       1.00      0.89      0.94         9

    accuracy                           0.70        43
   macro avg       0.59      0.71      0.63        43
weighted avg       0.55      0.70      0.61        43


    Confusion Matrix:
[[ 0  1 10  0]
 [ 0  7  0  0]
 [ 0  1 15  0]
 [ 0  0  1  8]]

  Feature selection method: STD_MEAN, k=500

    Random Forest Classifier Performance:
    Accuracy: 0.6511627906976745

    Classification Report:
              precision    recall  f1-score   support

      Benign       0.00      0.00      0.00        11
     Ectopic       0.86      0.86      0.86         7
   Malignant       0.52      0.88      0.65        16
      Norma

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.5672514619883041

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.50      0.43      0.46        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.43      0.40      0.41        30
         4_MLL       1.00      0.67      0.80         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.43      0.65      0.52        46
       8_T-ALL       0.88      0.88      0.88        25

      accuracy                           0.57       171
     macro avg       0.52      0.50      0.50       171
  weighted avg       0.54      0.57      0.55       171


    Confusion Matrix:
[[15  0  7  0  0  0 13  0]
 [ 0 12  0  0  0  0  0  0]
 [ 4  0 12  0  0  0 14  0]
 [ 0  0  0  6  0  0  1  2]
 [ 2  0  0  0  0  0  5  0]
 [ 2  0  1  0  0  0  4  0]
 [ 6  1  8  0  0  0 30  1]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7368421052631579

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.66      0.66      0.66        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.79      0.90      0.84        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.59      0.72      0.65        46
       8_T-ALL       0.92      0.96      0.94        25

      accuracy                           0.74       171
     macro avg       0.61      0.63      0.62       171
  weighted avg       0.68      0.74      0.71       171


    Confusion Matrix:
[[23  0  1  0  0  0 11  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  0 27  0  0  0  2  0]
 [ 0  0  0  7  0  0  2  0]
 [ 1  0  0  0  0  0  6  0]
 [ 4  0  1  0  0  0  2  0]
 [ 5  1  5  0  0  0 33  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7894736842105263

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.74      0.83      0.78        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.88      0.97      0.92        30
         4_MLL       1.00      0.67      0.80         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.65      0.76      0.70        46
       8_T-ALL       0.92      0.96      0.94        25

      accuracy                           0.79       171
     macro avg       0.64      0.65      0.64       171
  weighted avg       0.73      0.79      0.76       171


    Confusion Matrix:
[[29  0  0  0  0  0  6  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  0 29  0  0  0  0  0]
 [ 0  0  0  6  0  0  3  0]
 [ 1  0  0  0  0  0  6  0]
 [ 2  0  1  0  0  0  4  0]
 [ 5  1  3  0  0  0 35  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.783625730994152

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.79      0.89      0.84        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.83      0.97      0.89        30
         4_MLL       1.00      0.67      0.80         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.64      0.70      0.67        46
       8_T-ALL       0.86      0.96      0.91        25

      accuracy                           0.78       171
     macro avg       0.63      0.65      0.63       171
  weighted avg       0.72      0.78      0.75       171


    Confusion Matrix:
[[31  0  0  0  0  0  4  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  0 29  0  0  0  0  0]
 [ 0  0  0  6  0  0  3  0]
 [ 1  0  0  0  0  0  6  0]
 [ 1  0  1  0  0  0  5  0]
 [ 4  1  5  0  0  0 32  4]
 [ 1  0  0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.8070175438596491

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.80      0.94      0.87        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.85      0.97      0.91        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.67      0.74      0.70        46
       8_T-ALL       0.92      0.92      0.92        25

      accuracy                           0.81       171
     macro avg       0.65      0.67      0.65       171
  weighted avg       0.75      0.81      0.77       171


    Confusion Matrix:
[[33  0  0  0  0  0  2  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  0 29  0  0  0  0  0]
 [ 0  0  0  7  0  0  2  0]
 [ 0  0  0  0  0  0  7  0]
 [ 1  0  1  0  0  0  5  0]
 [ 5  1  4  0  0  0 34  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7894736842105263

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.74      0.83      0.78        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.85      0.97      0.91        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.66      0.72      0.69        46
       8_T-ALL       0.89      0.96      0.92        25

      accuracy                           0.79       171
     macro avg       0.63      0.67      0.65       171
  weighted avg       0.73      0.79      0.76       171


    Confusion Matrix:
[[29  0  0  0  0  0  6  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 29  0  0  0  0  1]
 [ 0  0  0  8  0  0  1  0]
 [ 2  0  0  0  0  0  5  0]
 [ 1  0  1  0  0  0  5  0]
 [ 6  1  4  0  0  0 33  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.8070175438596491

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.79      0.89      0.84        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.85      0.97      0.91        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.68      0.74      0.71        46
       8_T-ALL       0.89      0.96      0.92        25

      accuracy                           0.81       171
     macro avg       0.64      0.68      0.66       171
  weighted avg       0.74      0.81      0.77       171


    Confusion Matrix:
[[31  0  0  0  0  0  4  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 29  0  0  0  0  1]
 [ 0  0  0  8  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 1  0  1  0  0  0  5  0]
 [ 5  1  4  0  0  0 34  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.8011695906432749

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.79      0.89      0.84        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.85      0.93      0.89        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.67      0.74      0.70        46
       8_T-ALL       0.89      0.96      0.92        25

      accuracy                           0.80       171
     macro avg       0.64      0.68      0.66       171
  weighted avg       0.74      0.80      0.77       171


    Confusion Matrix:
[[31  0  0  0  0  0  4  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 28  0  0  0  1  1]
 [ 0  0  0  8  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 1  0  1  0  0  0  5  0]
 [ 5  1  4  0  0  0 34  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.8187134502923976

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.79      0.86      0.82        35
   2_TCF3-PBX1       1.00      1.00      1.00        12
  3_ETV6_RUNX1       0.88      1.00      0.94        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.68      0.78      0.73        46
       8_T-ALL       0.92      0.96      0.94        25

      accuracy                           0.82       171
     macro avg       0.66      0.69      0.67       171
  weighted avg       0.76      0.82      0.79       171


    Confusion Matrix:
[[30  0  0  0  0  0  5  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 30  0  0  0  0  0]
 [ 0  0  0  8  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 1  0  1  0  0  0  5  0]
 [ 5  0  3  0  0  0 36  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.8128654970760234

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.79      0.89      0.84        35
   2_TCF3-PBX1       1.00      1.00      1.00        12
  3_ETV6_RUNX1       0.85      0.97      0.91        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.69      0.76      0.72        46
       8_T-ALL       0.89      0.96      0.92        25

      accuracy                           0.81       171
     macro avg       0.65      0.68      0.67       171
  weighted avg       0.75      0.81      0.78       171


    Confusion Matrix:
[[31  0  0  0  0  0  4  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 29  0  0  0  0  1]
 [ 0  0  0  8  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 1  0  1  0  0  0  5  0]
 [ 5  0  4  0  0  0 35  2]
 [ 1  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.6549707602339181

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.62      0.60      0.61        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.82      0.60      0.69        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.46      0.70      0.55        46
       8_T-ALL       0.88      0.88      0.88        25

      accuracy                           0.65       171
     macro avg       0.59      0.57      0.57       171
  weighted avg       0.64      0.65      0.64       171


    Confusion Matrix:
[[21  0  0  0  0  0 14  0]
 [ 0 12  0  0  0  0  0  0]
 [ 1  0 18  0  0  0 10  1]
 [ 0  0  0  7  0  0  2  0]
 [ 2  0  0  0  0  0  5  0]
 [ 2  0  1  0  0  0  4  0]
 [ 8  1  3  0  0  0 32  2]
 [ 0  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7134502923976608

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.62      0.71      0.67        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.84      0.87      0.85        30
         4_MLL       1.00      0.89      0.94         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.54      0.63      0.58        46
       8_T-ALL       0.88      0.88      0.88        25

      accuracy                           0.71       171
     macro avg       0.60      0.62      0.61       171
  weighted avg       0.67      0.71      0.69       171


    Confusion Matrix:
[[25  0  0  0  0  0 10  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 26  0  0  0  3  1]
 [ 1  0  0  8  0  0  0  0]
 [ 2  0  0  0  0  0  5  0]
 [ 2  0  1  0  0  0  4  0]
 [10  1  4  0  0  0 29  2]
 [ 0  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7485380116959064

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.69      0.77      0.73        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.84      0.90      0.87        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.58      0.67      0.63        46
       8_T-ALL       0.89      0.96      0.92        25

      accuracy                           0.75       171
     macro avg       0.62      0.64      0.62       171
  weighted avg       0.69      0.75      0.72       171


    Confusion Matrix:
[[27  0  0  0  0  0  8  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 27  0  0  0  2  1]
 [ 1  0  0  7  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 2  0  1  0  0  0  4  0]
 [ 8  1  4  0  0  0 31  2]
 [ 0  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7426900584795322

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.72      0.74      0.73        35
   2_TCF3-PBX1       0.92      1.00      0.96        12
  3_ETV6_RUNX1       0.87      0.90      0.89        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.57      0.74      0.64        46
       8_T-ALL       0.88      0.84      0.86        25

      accuracy                           0.74       171
     macro avg       0.62      0.62      0.62       171
  weighted avg       0.70      0.74      0.72       171


    Confusion Matrix:
[[26  0  0  0  0  0  9  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 27  0  0  0  2  1]
 [ 1  0  0  7  0  0  1  0]
 [ 1  0  0  0  0  0  6  0]
 [ 2  0  1  0  0  0  4  0]
 [ 6  1  3  0  0  0 34  2]
 [ 0  0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Random Forest Classifier Performance:
    Accuracy: 0.7602339181286549

    Classification Report:
                precision    recall  f1-score   support

1_Hyperdiploid       0.78      0.71      0.75        35
   2_TCF3-PBX1       1.00      1.00      1.00        12
  3_ETV6_RUNX1       0.88      0.93      0.90        30
         4_MLL       1.00      0.78      0.88         9
          5_Ph       0.00      0.00      0.00         7
        6_Hypo       0.00      0.00      0.00         7
       7_Other       0.57      0.78      0.66        46
       8_T-ALL       0.88      0.88      0.88        25

      accuracy                           0.76       171
     macro avg       0.64      0.64      0.63       171
  weighted avg       0.72      0.76      0.73       171


    Confusion Matrix:
[[25  0  0  0  0  0 10  0]
 [ 0 12  0  0  0  0  0  0]
 [ 0  0 28  0  0  0  1  1]
 [ 0  0  0  7  0  0  2  0]
 [ 0  0  0  0  0  0  7  0]
 [ 2  0  1  0  0  0  4  0]
 [ 5  0  3  0  0  0 36  2]
 [ 0  0  