<a href="https://colab.research.google.com/github/subikkshas/PREPARE-ALL/blob/main/clinician_pred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import pandas as pd
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score

## Load Data from drive

In [2]:
def load_train_test_data(load_path='/content/drive/MyDrive/Final data/'):
    # Mount Google Drive (if not already mounted)
    try:
        drive.mount('/content/drive')
        print("Google Drive mounted successfully.")
    except:
        print("Google Drive already mounted or mounting failed.")

    # Load the data files
    try:
        loaded_X_train = pd.read_csv(os.path.join(load_path, 'X_train_encoded.csv'))
        loaded_y_train = pd.read_csv(os.path.join(load_path, 'y_train_encoded.csv'))
        loaded_X_test = pd.read_csv(os.path.join(load_path, 'X_test_encoded.csv'))
        loaded_y_test = pd.read_csv(os.path.join(load_path, 'loaded_y_test.csv'))

        print("Data loaded successfully!")
        print(f"X_train shape: {loaded_X_train.shape}")
        print(f"y_train shape: {loaded_y_train.shape}")
        print(f"X_test shape: {loaded_X_test.shape}")
        print(f"y_test shape: {loaded_y_test.shape}")

        return loaded_X_train, loaded_y_train, loaded_X_test, loaded_y_test

    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
        return None, None, None, None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None, None, None, None

In [3]:
X_train, y_train, X_test, y_test = load_train_test_data()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.
Data loaded successfully!
X_train shape: (1864, 26)
y_train shape: (1864, 1)
X_test shape: (467, 26)
y_test shape: (467, 1)


## Clean string

In [4]:
def clean_string_columns(df, columns_to_clean):
  existing_cols_to_clean = [col for col in columns_to_clean if col in df.columns]

  for col in existing_cols_to_clean:
      df[col] = df[col].astype("string").str.strip().str.lower()

  return df

## XGBOOST

In [5]:
import joblib
import os
from google.colab import drive

# Mount Google Drive (if not already mounted)
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except:
    print("Google Drive already mounted or mounting failed.")

# Define the path to the saved model in your Drive
load_path = '/content/drive/MyDrive/saved_models/'
model_filename = 'best_xgboost_model.joblib'
model_path = os.path.join(load_path, model_filename)

# Load the model
try:
    loaded_model = joblib.load(model_path)
    print(f"Model loaded successfully from: {model_path}")
except FileNotFoundError:
    print(f"Error: Model file not found at {model_path}")
except Exception as e:
    print(f"Error loading model: {e}")

# You can now use 'loaded_model' for predictions or further analysis
# For example: loaded_model.predict(X_test_encoded)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.
Model loaded successfully from: /content/drive/MyDrive/saved_models/best_xgboost_model.joblib


In [6]:
def evaluate_model(model, X_test, y_test, threshold=0.5):
    # Probabilities
    y_probs = model.predict_proba(X_test)[:, 1]

    # Apply threshold
    y_pred = (y_probs >= threshold).astype(int)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_probs)
    cm = confusion_matrix(y_test, y_pred)

    # Print neatly
    print("=== Evaluation Metrics (Summary) ===")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-score : {f1:.4f}")
    print(f"ROC AUC  : {auc:.4f}")
    print("\nConfusion Matrix:")
    print(cm)
    return

In [7]:
y_pred = loaded_model.predict(X_test)

In [8]:
evaluate_model(loaded_model, X_test, y_test, 0.4904101490974426)

=== Evaluation Metrics (Summary) ===
Accuracy : 0.5803
Precision: 0.3172
Recall   : 0.6372
F1-score : 0.4235
ROC AUC  : 0.6557

Confusion Matrix:
[[199 155]
 [ 41  72]]


## Clinician GARGI

In [9]:
clinician_gargi_df = pd.read_excel('/content/X_test for clinician Gargi.xlsx')
display(clinician_gargi_df.head())

Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk,Doctor Predictions
0,no,high,female,4.0,b,no,135000.0,good,no,non-high risk,b-other,intermediate,,intermediate,No
1,no,standard,male,2.0,b,no,20140.0,good,no,non-high risk,high hyperdiploidy,standard,negative,standard,No
2,yes,standard,male,7.0,b,yes,3830.0,good,no,,,high,negative,high,No
3,no,standard,male,4.0,b,yes,6840.0,good,no,non-high risk,b-other,intermediate,negative,intermediate,No
4,no,high,male,8.999316,b,no,70700.0,good,yes,non-high risk,b-other,high,negative,high,No


In [10]:
clinician_gargi_df = clean_string_columns(clinician_gargi_df, ['Doctor Predictions'])
clinician_gargi_target = clinician_gargi_df['Doctor Predictions']
clinician_gargi_features = clinician_gargi_df.drop('Doctor Predictions', axis=1)

print("Clinician Gargi Target:")
display(clinician_gargi_target.head())

print("\nClinician Gargi Features:")
display(clinician_gargi_features.head())

Clinician Gargi Target:


Unnamed: 0,Doctor Predictions
0,no
1,no
2,no
3,no
4,no



Clinician Gargi Features:


Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk
0,no,high,female,4.0,b,no,135000.0,good,no,non-high risk,b-other,intermediate,,intermediate
1,no,standard,male,2.0,b,no,20140.0,good,no,non-high risk,high hyperdiploidy,standard,negative,standard
2,yes,standard,male,7.0,b,yes,3830.0,good,no,,,high,negative,high
3,no,standard,male,4.0,b,yes,6840.0,good,no,non-high risk,b-other,intermediate,negative,intermediate
4,no,high,male,8.999316,b,no,70700.0,good,yes,non-high risk,b-other,high,negative,high


In [11]:
target_mapper = {'no': 0, 'yes': 1}
clinician_gargi_target_encoded = clinician_gargi_target.map(target_mapper)

print("Clinician Gargi Target Encoded:")
display(clinician_gargi_target_encoded.head())

Clinician Gargi Target Encoded:


Unnamed: 0,Doctor Predictions
0,0
1,0
2,0
3,0
4,0


In [12]:
# Calculate accuracy
accuracy = accuracy_score(y_test, clinician_gargi_target_encoded)
precision = precision_score(y_test, clinician_gargi_target_encoded)
recall = recall_score(y_test, clinician_gargi_target_encoded)
f1 = f1_score(y_test, clinician_gargi_target_encoded)
roc_auc = roc_auc_score(y_test, clinician_gargi_target_encoded)
cm = confusion_matrix(y_test, clinician_gargi_target_encoded)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")
print("Confusion Matrix:")
print(cm)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, clinician_gargi_target_encoded))

Accuracy: 0.5953
Precision: 0.2397
Recall: 0.3097
F1 Score: 0.2703
ROC AUC Score: 0.4981
Confusion Matrix:
[[243 111]
 [ 78  35]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.69      0.72       354
           1       0.24      0.31      0.27       113

    accuracy                           0.60       467
   macro avg       0.50      0.50      0.50       467
weighted avg       0.63      0.60      0.61       467



In [13]:
# Calculate concordance and discordance
concordant_gargi = (clinician_gargi_target_encoded == y_pred).sum()
discordant_gargi = (clinician_gargi_target_encoded != y_pred).sum()
total_predictions = len(y_pred)

print(f"Concordance: {concordant_gargi} out of {total_predictions} ({concordant_gargi/total_predictions:.6f})")
print(f"Discordance: {discordant_gargi} out of {total_predictions} ({discordant_gargi/total_predictions:.6f})")

Concordance: 255 out of 467 (0.546039)
Discordance: 212 out of 467 (0.453961)


## Clinician PRASANTH

In [14]:
clinician_prasanth_df = pd.read_excel('/content/X_test for clinician Gargi.Prasanth.xlsx')
display(clinician_prasanth_df.head())

Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk,Doctor Predictions - Gargi,Doctor's Preference - Prasanth
0,no,high,female,7.0,b,no,58000.0,poor,,non-high risk,b-other,high,.,.,Yes,Yes
1,no,standard,female,2.0,b,yes,23000.0,good,no,non-high risk,tcf3-pbx1,intermediate,.,.,Yes,Yes
2,no,high,male,18.0,t,yes,336700.0,,yes,not required,not required,t,.,.,Yes,Yes
3,no,high,male,6.0,b,no,71780.0,good,no,non-high risk,tcf3-pbx1,intermediate,.,.,Yes,Yes
4,no,standard,male,3.0,b,no,1600.0,good,no,non-high risk,etv6-runx1,standard,.,.,No,No


In [25]:
clinician_prasanth_df = clean_string_columns(clinician_prasanth_df, ["Doctor's Preference - Prasanth"])
clinician_prasanth_target = clinician_prasanth_df["Doctor's Preference - Prasanth"]
clinician_prasanth_features = clinician_prasanth_df.drop("Doctor's Preference - Prasanth", axis=1)

print("Clinician Prasanth Target:")
display(clinician_prasanth_target.head())

print("\nClinician Prasanth Features:")
display(clinician_prasanth_features.head())

Clinician Prasanth Target:


Unnamed: 0,Doctor's Preference - Prasanth
0,yes
1,yes
2,yes
3,yes
4,no



Clinician Prasanth Features:


Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk,Doctor Predictions - Gargi
0,no,high,female,7.0,b,no,58000.0,poor,,non-high risk,b-other,high,.,.,Yes
1,no,standard,female,2.0,b,yes,23000.0,good,no,non-high risk,tcf3-pbx1,intermediate,.,.,Yes
2,no,high,male,18.0,t,yes,336700.0,,yes,not required,not required,t,.,.,Yes
3,no,high,male,6.0,b,no,71780.0,good,no,non-high risk,tcf3-pbx1,intermediate,.,.,Yes
4,no,standard,male,3.0,b,no,1600.0,good,no,non-high risk,etv6-runx1,standard,.,.,No


In [16]:
target_mapper = {'no': 0, 'yes': 1}
clinician_prasanth_target_encoded = clinician_prasanth_target.map(target_mapper)

print("Clinician Prasanth Target Encoded:")
display(clinician_prasanth_target_encoded.head())

Clinician Prasanth Target Encoded:


Unnamed: 0,Doctor's Preference - Prasanth
0,1
1,1
2,1
3,1
4,0


In [17]:
# Calculate accuracy
accuracy = accuracy_score(y_test, clinician_prasanth_target_encoded)
precision = precision_score(y_test, clinician_prasanth_target_encoded)
recall = recall_score(y_test, clinician_prasanth_target_encoded)
f1 = f1_score(y_test, clinician_prasanth_target_encoded)
cm = confusion_matrix(y_test, clinician_prasanth_target_encoded)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, clinician_prasanth_target_encoded):.4f}")
print("Confusion Matrix:")
print(cm)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, clinician_prasanth_target_encoded))

Accuracy: 0.5824
Precision: 0.2500
Recall: 0.3628
F1 Score: 0.2960
ROC AUC Score: 0.5077
Confusion Matrix:
[[231 123]
 [ 72  41]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.65      0.70       354
           1       0.25      0.36      0.30       113

    accuracy                           0.58       467
   macro avg       0.51      0.51      0.50       467
weighted avg       0.64      0.58      0.60       467



In [18]:
# Calculate concordance and discordance
concordant_prasanth = (clinician_prasanth_target_encoded == y_pred).sum()
discordant_prasanth = (clinician_prasanth_target_encoded != y_pred).sum()
total_predictions = len(y_pred)

print(f"Concordance: {concordant_prasanth} out of {total_predictions} ({concordant_prasanth/total_predictions:.6f})")
print(f"Discordance: {discordant_prasanth} out of {total_predictions} ({discordant_prasanth/total_predictions:.6f})")

Concordance: 251 out of 467 (0.537473)
Discordance: 216 out of 467 (0.462527)


## Clinician BALAJI

In [19]:
clinician_balaji_df = pd.read_excel('/content/Copy of X_test for clinician.xlsx')
display(clinician_balaji_df.head())

Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk,Doctor Predictions
0,no,high,female,4.0,b,no,135000.0,good,no,non-high risk,b-other,intermediate,,intermediate,No
1,no,standard,male,2.0,b,no,20140.0,good,no,non-high risk,high hyperdiploidy,standard,negative,standard,No
2,yes,standard,male,7.0,b,yes,3830.0,good,no,,,high,negative,high,No
3,no,standard,male,4.0,b,yes,6840.0,good,no,non-high risk,b-other,intermediate,negative,intermediate,No
4,no,high,male,8.999316,b,no,70700.0,good,yes,non-high risk,b-other,high,negative,high,Yes


In [26]:
clinician_balaji_df = clean_string_columns(clinician_balaji_df, ["Doctor Predictions"])
clinician_balaji_target = clinician_balaji_df["Doctor Predictions"]
clinician_balaji_features = clinician_balaji_df.drop("Doctor Predictions", axis=1)

print("Clinician Balaji Target:")
display(clinician_balaji_target.head())

print("\nClinician Balaji Features:")
display(clinician_balaji_features.head())

Clinician Balaji Target:


Unnamed: 0,Doctor Predictions
0,no
1,no
2,no
3,no
4,yes



Clinician Balaji Features:


Unnamed: 0,Previous Treatment,NCI Risk,Sex,Age,Lineage,Bulky Disease,Highest presenting WBC,Prednisolone Response,CNS Disease,Cytogenetic groups,Detail cytogenetics,Provisional risk,MRD Status_EOI,Final Risk
0,no,high,female,4.0,b,no,135000.0,good,no,non-high risk,b-other,intermediate,,intermediate
1,no,standard,male,2.0,b,no,20140.0,good,no,non-high risk,high hyperdiploidy,standard,negative,standard
2,yes,standard,male,7.0,b,yes,3830.0,good,no,,,high,negative,high
3,no,standard,male,4.0,b,yes,6840.0,good,no,non-high risk,b-other,intermediate,negative,intermediate
4,no,high,male,8.999316,b,no,70700.0,good,yes,non-high risk,b-other,high,negative,high


In [21]:
target_mapper = {'no': 0, 'yes': 1}
clinician_balaji_target_encoded = clinician_balaji_target.map(target_mapper)

print("Clinician Balaji Target Encoded:")
display(clinician_balaji_target_encoded.head())

Clinician Balaji Target Encoded:


Unnamed: 0,Doctor Predictions
0,0
1,0
2,0
3,0
4,1


In [22]:
# Calculate accuracy
accuracy = accuracy_score(y_test, clinician_balaji_target_encoded)
precision = precision_score(y_test, clinician_balaji_target_encoded)
recall = recall_score(y_test, clinician_balaji_target_encoded)
f1 = f1_score(y_test, clinician_balaji_target_encoded)
cm = confusion_matrix(y_test, clinician_balaji_target_encoded)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, clinician_balaji_target_encoded):.4f}")
print("Confusion Matrix:")
print(cm)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, clinician_balaji_target_encoded))

Accuracy: 0.6146
Precision: 0.2721
Recall: 0.3540
F1 Score: 0.3077
ROC AUC Score: 0.5259
Confusion Matrix:
[[247 107]
 [ 73  40]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.70      0.73       354
           1       0.27      0.35      0.31       113

    accuracy                           0.61       467
   macro avg       0.52      0.53      0.52       467
weighted avg       0.65      0.61      0.63       467



In [23]:
# Calculate concordance and discordance
concordant_balaji = (clinician_balaji_target_encoded == y_pred).sum()
discordant_balaji = (clinician_balaji_target_encoded != y_pred).sum()
total_predictions = len(y_pred)

print(f"Concordance: {concordant_balaji} out of {total_predictions} ({concordant_balaji/total_predictions:.6f})")
print(f"Discordance: {discordant_balaji} out of {total_predictions} ({discordant_balaji/total_predictions:.6f})")

Concordance: 260 out of 467 (0.556745)
Discordance: 207 out of 467 (0.443255)


## Cohen's Kappa

In [24]:
from sklearn.metrics import cohen_kappa_score

# Cohen's Kappa for Model vs Clinicians
kappa_model_gargi = cohen_kappa_score(y_pred, clinician_gargi_target_encoded)
kappa_model_prasanth = cohen_kappa_score(y_pred, clinician_prasanth_target_encoded)
kappa_model_balaji = cohen_kappa_score(y_pred, clinician_balaji_target_encoded)

print("Cohen's Kappa (Model vs Clinicians):")
print(f"Model vs Gargi: {kappa_model_gargi:.4f}")
print(f"Model vs Prasanth: {kappa_model_prasanth:.4f}")
print(f"Model vs Balaji: {kappa_model_balaji:.4f}")

# Cohen's Kappa between Clinicians
kappa_gargi_prasanth = cohen_kappa_score(clinician_gargi_target_encoded, clinician_prasanth_target_encoded)
kappa_gargi_balaji = cohen_kappa_score(clinician_gargi_target_encoded, clinician_balaji_target_encoded)
kappa_prasanth_balaji = cohen_kappa_score(clinician_prasanth_target_encoded, clinician_balaji_target_encoded)

print("\nCohen's Kappa (Between Clinicians):")
print(f"Gargi vs Prasanth: {kappa_gargi_prasanth:.4f}")
print(f"Gargi vs Balaji: {kappa_gargi_balaji:.4f}")
print(f"Prasanth vs Balaji: {kappa_prasanth_balaji:.4f}")

Cohen's Kappa (Model vs Clinicians):
Model vs Gargi: 0.0627
Model vs Prasanth: 0.0514
Model vs Balaji: 0.0852

Cohen's Kappa (Between Clinicians):
Gargi vs Prasanth: -0.0508
Gargi vs Balaji: 0.5773
Prasanth vs Balaji: -0.0156
