In [None]:
!pip install kagglehub scikit-learn --quiet

import kagglehub
import pandas as pd
import os

path = kagglehub.dataset_download("s3programmerlead/personalized-anesthesia-management-dataset")

print(f"Dataset downloaded to: {path}")

csv_file_path = os.path.join(path, "Anesthesia_Dataset.csv")
df = pd.read_csv(csv_file_path)

print("\nFirst 5 rows of the dataset:")
print(df.head())

print("\nDataset Information:")
df.info()

Downloading from https://www.kaggle.com/api/v1/datasets/download/s3programmerlead/personalized-anesthesia-management-dataset?dataset_version_number=1...


100%|██████████| 3.92k/3.92k [00:00<00:00, 5.73MB/s]

Extracting files...
Dataset downloaded to: /root/.cache/kagglehub/datasets/s3programmerlead/personalized-anesthesia-management-dataset/versions/1

First 5 rows of the dataset:
   PatientID  Age Gender  BMI     SurgeryType SurgeryDuration AnesthesiaType  \
0          1   33      M   32    Neurological         217 min          Local   
1          2   33      M   23  Cardiovascular         181 min          Local   
2          3   58      F   24      Orthopedic          79 min        General   
3          4   65      F   26      Orthopedic         210 min          Local   
4          5   65      M   28    Neurological         221 min        General   

        PreoperativeNotes              PostoperativeNotes  PainLevel  \
0  Hypertension, diabetes  Minimal pain, no complications          7   
1    Stable, no allergies  Minimal pain, no complications          7   
2    Stable, no allergies  Minimal pain, no complications          3   
3    Stable, no allergies             Pain, slow recove




In [None]:
from sklearn.model_selection import train_test_split

df['SurgeryDuration'] = df['SurgeryDuration'].str.replace(' min', '').astype(int)

df_cleaned = df.drop(columns=['PatientID', 'PostoperativeNotes', 'Complications'])

df_processed = pd.get_dummies(df_cleaned, drop_first=True)

X = df_processed.drop('Outcome', axis=1)
y = df_processed['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Shape of training features:", X_train.shape)
print("Shape of testing features:", X_test.shape)
print("\nProcessed feature columns:")
print(X_train.columns)
print("\nFirst 5 rows of the processed training data:")
print(X_train.head())

Shape of training features: (240, 10)
Shape of testing features: (60, 10)

Processed feature columns:
Index(['Age', 'BMI', 'SurgeryDuration', 'PainLevel', 'Gender_M',
       'SurgeryType_Cosmetic', 'SurgeryType_Neurological',
       'SurgeryType_Orthopedic', 'AnesthesiaType_Local',
       'PreoperativeNotes_Stable, no allergies'],
      dtype='object')

First 5 rows of the processed training data:
     Age  BMI  SurgeryDuration  PainLevel  Gender_M  SurgeryType_Cosmetic  \
60    33   24              210          2      True                  True   
15    72   23              188          2     False                 False   
29    33   24              188          3     False                 False   
135   72   26              115          5     False                 False   
83    72   23              139          3     False                 False   

     SurgeryType_Neurological  SurgeryType_Orthopedic  AnesthesiaType_Local  \
60                      False                   False    

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

importances = model.feature_importances_
feature_names = X_train.columns

feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

print("\nFeature Importances (Most important first):")
print(feature_importance_df)

Model Accuracy: 0.53

Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.57      0.55        30
           1       0.54      0.50      0.52        30

    accuracy                           0.53        60
   macro avg       0.53      0.53      0.53        60
weighted avg       0.53      0.53      0.53        60


Feature Importances (Most important first):
                                  Feature  Importance
2                         SurgeryDuration    0.282456
0                                     Age    0.145882
3                               PainLevel    0.145830
1                                     BMI    0.139813
4                                Gender_M    0.061064
9  PreoperativeNotes_Stable, no allergies    0.058198
8                    AnesthesiaType_Local    0.055836
5                    SurgeryType_Cosmetic    0.038181
7                  SurgeryType_Orthopedic    0.036807
6                SurgeryType_Neurological  