# Kidney Disease Classification — Colab Training (4-Class)

This notebook runs the project pipeline on **Google Colab GPU** for the 4-class dataset (Cyst, Normal, Stone, Tumor).

In [None]:
import os, sys
import tensorflow as tf
print('Python:', sys.version)
print('TensorFlow:', tf.__version__)
print('GPU devices:', tf.config.list_physical_devices('GPU'))

## 1) Clone repository

In [None]:
%cd /content
!rm -rf Kidney-Disease-Classification-System
!git clone https://github.com/saksham-1304/Kidney-Disease-Classification-System.git
%cd /content/Kidney-Disease-Classification-System
!git pull

## 2) Install dependencies

In [None]:
!pip install -q --upgrade pip

# Colab Python version is newer; old pins may be unavailable/incompatible.
!python -m pip install -q "tensorflow>=2.16,<2.22"
!python -m pip install -q "pyarrow>=15" "mlflow>=2.16,<3.0"

# Install remaining deps from requirements.txt except conflicting pins
import re
with open('requirements.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
skip = re.compile(r'^\s*(tensorflow|mlflow|pyarrow)\b', re.IGNORECASE)
filtered = [ln for ln in lines if not skip.match(ln)]
with open('requirements_colab.txt', 'w', encoding='utf-8') as f:
    f.writelines(filtered)

!pip install -q -r requirements_colab.txt
!python -m pip install -q ensure==1.0.2
!python -m pip install -q kaggle

## 3) Configure Kaggle API
Upload your `kaggle.json` from Kaggle Account → Create New API Token.

In [None]:
from google.colab import files
uploaded = files.upload()  # choose kaggle.json

!mkdir -p /root/.kaggle
!cp kaggle.json /root/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets list -s kidney | head -n 5

## 4) Run pipeline stages
Set `START_STAGE` based on where you want to begin:
- `1`: run full pipeline (data ingestion + base model + training + evaluation)
- `3`: run only training + evaluation (if stages 1/2 artifacts already exist in Colab session)

In [None]:
import os
import subprocess
import sys
from collections import deque

# Ensure local package is importable in Colab
!python -m pip install -q -e .

START_STAGE = 1  # change to 3 if you already have stage 1/2 artifacts

stages = [
    (1, 'cnnClassifier.pipeline.stage_01_data_ingestion'),
    (2, 'cnnClassifier.pipeline.stage_02_prepare_base_model'),
    (3, 'cnnClassifier.pipeline.stage_03_model_training'),
    (4, 'cnnClassifier.pipeline.stage_04_model_evaluation'),
]

def run_stage(stage_num, module_name):
    print(f'\n===== Running stage {stage_num}: {module_name} =====')
    cmd = [sys.executable, '-m', module_name]
    tail = deque(maxlen=200)

    with subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    ) as proc:
        for line in proc.stdout:
            print(line, end='')
            tail.append(line.rstrip('\n'))

        return_code = proc.wait()

    if return_code != 0:
        print('\n----- LAST 200 LOG LINES -----')
        print('\n'.join(tail))
        raise RuntimeError(f'Stage {stage_num} failed with exit code {return_code}')

for stage_num, module_name in stages:
    if START_STAGE <= stage_num:
        run_stage(stage_num, module_name)

# copy final model for Flask serving path only if training produced it
os.makedirs('model', exist_ok=True)
if os.path.exists('artifacts/training/model.h5'):
    !cp artifacts/training/model.h5 model/model.h5
    print('Pipeline complete. Final model at model/model.h5')
else:
    print('Pipeline did not produce artifacts/training/model.h5. Check stage logs above.')

## 5) Download trained outputs

In [None]:
!zip -r trained_outputs.zip artifacts/training model scores.json
from google.colab import files
files.download('trained_outputs.zip')