In [None]:
# This is for task 2, using the report to predict the pathologies

In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, classification_report, average_precision_score
)
import matplotlib.pyplot as plt


In [2]:
# Replace the path below with the actual location of your CSV
df_linked = pd.read_csv("/Users/cuprum/Desktop/SPH6004/assignment 2/data/mimic_task2_linked_data.csv")

# Now df_linked_again contains your data
#print(df_linked.head())

In [None]:
# Did the same thing as TA did for task 1, convert Na to 0, -1 to Na

# 'pathologies' is the list of columns need to fix.

pathologies = [
        "Enlarged Cardiomediastinum",
        "Cardiomegaly",
        "Lung Opacity",
        "Lung Lesion",
        "Edema",
        "Consolidation",
        "Pneumonia",
        "Atelectasis",
        "Pneumothorax",
        "Pleural Effusion",
        "Pleural Other",
        "Fracture",
        "Support Devices",
    ]

# Convert NaN --> 0 in those pathology columns
df_linked[pathologies] = df_linked[pathologies].fillna(0)

# Convert -1 --> NaN in those columns
df_linked[pathologies] = df_linked[pathologies].replace(-1, np.nan)


TfidfVectorizer converts each radiology “report_text” to a sparse vector, reflecting how often words appear relative to how commonly they appear overall.

This simple model will be using the LogisticRegression base estimator is straightforward but commonly effective.

In [4]:
# Dictionary to store each pathology's best model and a list to collect detailed test metrics
pathology_models = {}
detailed_test_metrics = []

# Define a parameter grid for tuning in the pipeline.
param_grid = {
    "tfidf__max_df": [0.8, 0.9],
    "tfidf__min_df": [5, 10],
    "logreg__C": [0.1, 1, 10]
}

for pathology in pathologies:
    print(f"\n----- Training model for: {pathology} -----")
    
    # Filter rows with non-NA labels for this pathology
    mask = df_linked[pathology].notna()
    df_sub = df_linked[mask]
    
    # Display unique labels for debugging
    unique_labels = np.unique(df_sub[pathology])
    print(f"Unique labels for {pathology}: {unique_labels}")
    
    # Prepare features and labels
    X_sub = df_sub["report_text"]
    y_sub = df_sub[pathology]  # Expected to be 0 or 1
    
    # Split data: 60% training, 20% validation, 20% test
    X_train, X_temp, y_train, y_temp = train_test_split(
        X_sub, y_sub, test_size=0.4, random_state=42
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42
    )
    
    # Check that each split has at least two unique classes.
    if (np.unique(y_train).size < 2 or 
        np.unique(y_val).size < 2 or 
        np.unique(y_test).size < 2):
        print(f"Skipping {pathology} due to insufficient class variety: {np.unique(y_sub)}.")
        continue
    
    # Build a pipeline with TfidfVectorizer and LogisticRegression
    pipeline = Pipeline([
        ("tfidf", TfidfVectorizer()),
        ("logreg", LogisticRegression(max_iter=1000, random_state=42))
    ])
    
    # Use GridSearchCV (with 5-fold CV) to tune hyperparameters using PRAUC as the scoring metric.
    grid_search = GridSearchCV(pipeline, param_grid, scoring="average_precision", cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    print("Best parameters:", grid_search.best_params_)
    best_pipeline = grid_search.best_estimator_
    
    # Compute predictions on the test set.
    y_test_pred = best_pipeline.predict(X_test)
    y_test_pred_proba = best_pipeline.predict_proba(X_test)[:, 1]
    # Reshape the probability array to 2D
    y_test_pred_proba_2d = y_test_pred_proba.reshape(-1, 1)
    
    # Compute overall test set metrics.
    test_acc = accuracy_score(y_test, y_test_pred)
    test_avg_prec = average_precision_score(y_test, y_test_pred_proba_2d, average="macro")
    
    # Compute per-class metrics using average=None.
    precisions = precision_score(y_test, y_test_pred, average=None, zero_division=0)
    recalls    = recall_score(y_test, y_test_pred, average=None, zero_division=0)
    f1s        = f1_score(y_test, y_test_pred, average=None, zero_division=0)
    
    # Assume index 0 corresponds to class 0 and index 1 corresponds to class 1.
    precision_0 = precisions[0]
    recall_0    = recalls[0]
    f1_0        = f1s[0]
    
    precision_1 = precisions[1]
    recall_1    = recalls[1]
    f1_1        = f1s[1]
    
    # Print the test classification report.
    print("Test classification report:")
    print(classification_report(y_test, y_test_pred, labels=np.unique(y_test)))
    
    # Save the best pipeline.
    pathology_models[pathology] = best_pipeline
    
    # Append the detailed test metrics for this pathology.
    detailed_test_metrics.append({
        "Label": pathology,
        "Accuracy": test_acc,
        "Average Precision": test_avg_prec,
        "Precision (Class 0)": precision_0,
        "Recall (Class 0)": recall_0,
        "F1 (Class 0)": f1_0,
        "Precision (Class 1)": precision_1,
        "Recall (Class 1)": recall_1,
        "F1 (Class 1)": f1_1
    })

# Convert detailed test metrics into a DataFrame and display it.
if len(detailed_test_metrics) == 0:
    print("No pathology had sufficient class variety for evaluation.")
else:
    df_metrics = pd.DataFrame(detailed_test_metrics)
    print("\nDetailed Test Metrics:")
    print(df_metrics)

df_metrics.to_csv("results_nlp.csv", index=False)


----- Training model for: Atelectasis -----
Unique labels for Atelectasis: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     16683
         1.0       0.92      0.89      0.90      2021

    accuracy                           0.98     18704
   macro avg       0.95      0.94      0.94     18704
weighted avg       0.98      0.98      0.98     18704


----- Training model for: Cardiomegaly -----
Unique labels for Cardiomegaly: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 10}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98     17035
         1.0       0.87      0.80      0.83      1951

    accuracy                           0.97     18986
   macro avg       0.92      0.89      0.91     18986
weighted avg       0.97      0.97      0.97     18986


----- Training model for: Consolidation -----
Unique labels for Consolidation: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     18634
         1.0       0.76      0.44      0.56       369

    accuracy                           0.99     19003
   macro avg       0.88      0.72      0.77     19003
weighted avg       0.98      0.99      0.98     19003


----- Training model for: Edema -----
Unique labels for Edema: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 10}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     18085
         1.0       0.86      0.72      0.78       636

    accuracy                           0.99     18721
   macro avg       0.93      0.86      0.89     18721
weighted avg       0.99      0.99      0.99     18721


----- Training model for: Enlarged Cardiomediastinum -----
Unique labels for Enlarged Cardiomediastinum: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 10}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     18608
         1.0       0.74      0.40      0.52       298

    accuracy                           0.99     18906
   macro avg       0.86      0.70      0.75     18906
weighted avg       0.99      0.99      0.99     18906


----- Training model for: Fracture -----
Unique labels for Fracture: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     18775
         1.0       0.80      0.66      0.73       391

    accuracy                           0.99     19166
   macro avg       0.90      0.83      0.86     19166
weighted avg       0.99      0.99      0.99     19166


----- Training model for: Lung Lesion -----
Unique labels for Lung Lesion: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     18516
         1.0       0.80      0.63      0.71       593

    accuracy                           0.98     19109
   macro avg       0.89      0.81      0.85     19109
weighted avg       0.98      0.98      0.98     19109


----- Training model for: No Finding -----
Unique labels for No Finding: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 10}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.91      0.91      0.91      9235
         1.0       0.92      0.91      0.91      9996

    accuracy                           0.91     19231
   macro avg       0.91      0.91      0.91     19231
weighted avg       0.91      0.91      0.91     19231


----- Training model for: Pleural Effusion -----
Unique labels for Pleural Effusion: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98     16499
         1.0       0.90      0.85      0.87      2453

    accuracy                           0.97     18952
   macro avg       0.94      0.92      0.93     18952
weighted avg       0.97      0.97      0.97     18952


----- Training model for: Pleural Other -----
Unique labels for Pleural Other: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.8, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     18936
         1.0       0.72      0.43      0.54       208

    accuracy                           0.99     19144
   macro avg       0.86      0.72      0.77     19144
weighted avg       0.99      0.99      0.99     19144


----- Training model for: Pneumonia -----
Unique labels for Pneumonia: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 10}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98     16906
         1.0       0.79      0.68      0.73      1137

    accuracy                           0.97     18043
   macro avg       0.89      0.83      0.86     18043
weighted avg       0.97      0.97      0.97     18043


----- Training model for: Pneumothorax -----
Unique labels for Pneumothorax: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     18709
         1.0       0.88      0.70      0.78       473

    accuracy                           0.99     19182
   macro avg       0.94      0.85      0.89     19182
weighted avg       0.99      0.99      0.99     19182


----- Training model for: Support Devices -----
Unique labels for Support Devices: [0. 1.]


  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_

Best parameters: {'logreg__C': 10, 'tfidf__max_df': 0.9, 'tfidf__min_df': 5}
Test classification report:
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98     17865
         1.0       0.84      0.70      0.76      1351

    accuracy                           0.97     19216
   macro avg       0.91      0.84      0.87     19216
weighted avg       0.97      0.97      0.97     19216


Detailed Test Metrics:
                         Label  Accuracy  Average Precision  \
0                  Atelectasis  0.978988           0.943214   
1                 Cardiomegaly  0.966554           0.891632   
2                Consolidation  0.986423           0.689671   
3                        Edema  0.986593           0.859389   
4   Enlarged Cardiomediastinum  0.988258           0.586991   
5                     Fracture  0.989721           0.813213   
6                  Lung Lesion  0.983673           0.793309   
7                   No Finding  0.911705