In [15]:
# ================================
# Step 1: Import Libraries
# ================================
import zipfile
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [10]:
# ================================
# Step 2: Mount Google Drive
# ================================
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
# ================================
# Step 3: Extract Dataset
# ================================
zip_path = "/content/drive/MyDrive/kindey_stone_dataset.zip"
extract_path = "/content/dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

train_dir = "/content/dataset/kindey_stone_dataset/train"
val_dir   = "/content/dataset/kindey_stone_dataset/val"

In [12]:
# ================================
# Step 4: Load Images & Flatten
# ================================
def load_images(folder):
    X, y = [], []
    classes = os.listdir(folder)
    class_map = {cls:i for i, cls in enumerate(classes)}
    for cls in classes:
        cls_folder = os.path.join(folder, cls)
        for file in os.listdir(cls_folder):
            img_path = os.path.join(cls_folder, file)
            img = load_img(img_path, target_size=(64, 64))
            img_array = img_to_array(img) / 255.0
            X.append(img_array.flatten())
            y.append(class_map[cls])
    return np.array(X), np.array(y), class_map

X_train, y_train, class_map = load_images(train_dir)
X_val, y_val, _ = load_images(val_dir)

In [16]:
# ================================
# Step 5: Train Decision Tree in Batches (Verbose Simulation)
# ================================
batch_size = 1000
dt = DecisionTreeClassifier(random_state=42)

# Split training data into batches
for i in range(0, len(X_train), batch_size):
    X_batch = X_train[i:i+batch_size]
    y_batch = y_train[i:i+batch_size]

    # Decision Tree doesn't support partial fit, so we re-fit each batch
    dt.fit(X_batch, y_batch)

    # Evaluate on validation set
    y_val_pred = dt.predict(X_val)
    acc = accuracy_score(y_val, y_val_pred)
    print(f"Processed {i+len(X_batch)}/{len(X_train)} samples - Validation Accuracy: {acc:.4f}")

Processed 1000/35457 samples - Validation Accuracy: 0.4688
Processed 2000/35457 samples - Validation Accuracy: 0.4688
Processed 3000/35457 samples - Validation Accuracy: 0.4688
Processed 4000/35457 samples - Validation Accuracy: 0.4688
Processed 5000/35457 samples - Validation Accuracy: 0.4688
Processed 6000/35457 samples - Validation Accuracy: 0.4688
Processed 7000/35457 samples - Validation Accuracy: 0.4688
Processed 8000/35457 samples - Validation Accuracy: 0.4688
Processed 9000/35457 samples - Validation Accuracy: 0.4688
Processed 10000/35457 samples - Validation Accuracy: 0.4688
Processed 11000/35457 samples - Validation Accuracy: 0.4688
Processed 12000/35457 samples - Validation Accuracy: 0.4688
Processed 13000/35457 samples - Validation Accuracy: 0.4688
Processed 14000/35457 samples - Validation Accuracy: 0.4688
Processed 15000/35457 samples - Validation Accuracy: 0.4688
Processed 16000/35457 samples - Validation Accuracy: 0.4688
Processed 17000/35457 samples - Validation Accura

In [17]:
# ================================
# Step 6: Final Evaluation
# ================================
y_pred = dt.predict(X_val)
print("\n✅ Final Validation Accuracy:", accuracy_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred, target_names=list(class_map.keys())))



✅ Final Validation Accuracy: 0.5312128418549346

Classification Report:
               precision    recall  f1-score   support

       Stone       0.00      0.00      0.00      1577
   Non-Stone       0.53      1.00      0.69      1787

    accuracy                           0.53      3364
   macro avg       0.27      0.50      0.35      3364
weighted avg       0.28      0.53      0.37      3364



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
# ================================
# Step 7: Save Decision Tree Model
# ================================
joblib.dump(dt, "/content/dt_kidney_stone_verbose.pkl")
print("✅ Decision Tree model saved at /content/dt_kidney_stone_verbose.pkl")

✅ Decision Tree model saved at /content/dt_kidney_stone_verbose.pkl
