In [1]:
# --- 1. Installation and Imports ---
!pip install -U ultralytics optuna psutil scikit-learn kaggle

import torch
import os
import shutil
import random
import yaml
import time
import numpy as np
import psutil
from pathlib import Path
from datetime import datetime
from ultralytics import YOLO
import optuna

# Import the Colab file downloader utility
from google.colab import files

print("Packages installed and libraries imported.")

# --- 2. Mount Google Drive (to read the dataset) ---
from google.colab import drive
drive.mount('/content/drive')

# --- 3. Define Paths ---
# This is the path to your 2K sampled dataset on Google Drive
SOURCE_2K_DATASET_PATH = Path('/content/drive/MyDrive/Colab Notebooks/rdd_sampled_2k_test')
# ‚≠ê FIX: Define as a Path object to prevent AttributeError
BOOSTED_DATASET_PATH = Path('/content/drive/MyDrive/RDD_2K_Pothole_Boosted')
# This is the NEW yaml file we will use for training
yaml_file = str(BOOSTED_DATASET_PATH / 'boosted_dataset.yaml') # Use the '/' operator

if SOURCE_2K_DATASET_PATH.exists():
    print(f"\n‚úÖ Base 2K Dataset found: {SOURCE_2K_DATASET_PATH}")
else:
    print(f"\n‚ùå ERROR: Base 2K Dataset not found at {SOURCE_2K_DATASET_PATH}.")

# --- 4. GPU CHECK ---
if not torch.cuda.is_available():
    print("\n\n" + "="*50)
    print("‚ùå WARNING: NO GPU DETECTED!")
    print("Please go to `Runtime` -> `Change runtime type` and select `T4 GPU`.")
    print("="*50 + "\n\n")
else:
    print("\n\n" + "="*50)
    print("‚úÖ SUCCESS: GPU is active!")
    !nvidia-smi
    print("="*50 + "\n\n")

Packages installed and libraries imported.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

‚úÖ Base 2K Dataset found: /content/drive/MyDrive/Colab Notebooks/rdd_sampled_2k_test


‚úÖ SUCCESS: GPU is active!
Mon Nov 17 14:53:53 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              9W /

In [4]:
# --- Cell 2: Create Pothole-Boosted Dataset ---

print("Starting to create 'Pothole-Boosted' dataset...")

# --- 1. Define Pothole Dataset and Config ---
POTHOLE_KAGGLE_SLUG = 'ryukijanoramunae/pothole-dataset'
POTHOLE_DOWNLOAD_PATH = Path('/content/pothole_data_raw')
POTHOLE_CLASS_ID_ORIGINAL = 0 # In the Kaggle set, potholes are class 0
POTHOLE_CLASS_ID_TARGET = 4 # In your 5-class model, potholes are class 4

# --- 2. Download Pothole Data from Kaggle ---
# ‚≠ê FIX: This 'try' block will now stop the cell if it fails.
try:
    print("Downloading Kaggle dataset...")
    # This command will now succeed because you ran the cell in STEP 2
    !kaggle datasets download -d {POTHOLE_KAGGLE_SLUG} -p {POTHOLE_DOWNLOAD_PATH} --unzip
    print("Kaggle dataset downloaded successfully.")
except Exception as e:
    print(f"‚ùå FATAL ERROR: Kaggle download failed.")
    print("Please ensure your 'kaggle.json' is uploaded and you ran the 'mv' command cell.")
    # Stop execution if download fails
    raise e

# --- 3. Re-create Target Directory ---
if BOOSTED_DATASET_PATH.exists():
    print(f"Removing old boosted dataset...")
    shutil.rmtree(BOOSTED_DATASET_PATH)

print(f"Creating new dataset at {BOOSTED_DATASET_PATH}...")
# Copy the entire 2K dataset as our base
shutil.copytree(SOURCE_2K_DATASET_PATH, BOOSTED_DATASET_PATH)
print("Copied 2K base dataset.")

# --- 4. Find and Merge New Pothole Data ---
pothole_images = list(POTHOLE_DOWNLOAD_PATH.rglob('*.jpg'))
random.shuffle(pothole_images)

MAX_BOOST_IMAGES = 2000
pothole_images = pothole_images[:MAX_BOOST_IMAGES]
print(f"Found {len(pothole_images)} new pothole images to merge.")

# Split them 80/20 for train/val
train_split_index = int(len(pothole_images) * 0.8)
new_train_images = pothole_images[:train_split_index]
new_val_images = pothole_images[train_split_index:]

# --- 5. Processing Function to Merge and Re-map ---
def merge_and_remap(image_list, split):
    target_img_dir = BOOSTED_DATASET_PATH / 'images' / split
    target_label_dir = BOOSTED_DATASET_PATH / 'labels' / split
    images_merged = 0
    annos_merged = 0

    for i, img_path in enumerate(image_list):
        try:
            # Find the original label file
            label_path = img_path.parent.parent / 'labels' / (img_path.stem + '.txt')
            if not label_path.exists():
                label_path = img_path.parent / (img_path.stem + '.txt') # Check alternative path
                if not label_path.exists():
                    continue # Skip if no label

            with open(label_path, 'r') as f:
                lines = f.readlines()

            new_label_content = []
            for line in lines:
                parts = line.split()
                if not parts: continue

                # RE-MAP: Change class 0 to class 4
                if int(parts[0]) == POTHOLE_CLASS_ID_ORIGINAL:
                    new_label_content.append(f"{POTHOLE_CLASS_ID_TARGET} {' '.join(parts[1:])}")
                    annos_merged += 1

            # If we found potholes, save the new label and copy the image
            if new_label_content:
                new_img_name = f"pothole_boost_{i}_{img_path.name}"

                # Write new label file
                with open(target_label_dir / (Path(new_img_name).stem + '.txt'), 'w') as f:
                    f.write('\n'.join(new_label_content))

                # Copy image
                shutil.copy(img_path, target_img_dir / new_img_name)
                images_merged += 1
        except Exception as e:
            print(f"Skipping {img_path.name}: {e}")

    return images_merged, annos_merged

# Run the merge
print("Merging 'train' split...")
train_imgs, train_annos = merge_and_remap(new_train_images, 'train')
print(f"Merged {train_imgs} new train images with {train_annos} pothole annotations.")

print("Merging 'val' split...")
val_imgs, val_annos = merge_and_remap(new_val_images, 'val')
print(f"Merged {val_imgs} new val images with {val_annos} pothole annotations.")


# --- 6. Update the dataset.yaml file ---
# We just need to update the 'path' in the YAML we copied
boosted_yaml_path = BOOSTED_DATASET_PATH / 'dataset.yaml'
if boosted_yaml_path.exists():
    with open(boosted_yaml_path, 'r') as f:
        config = yaml.safe_load(f)

    # Update path to be the new absolute path
    config['path'] = str(BOOSTED_DATASET_PATH.absolute())

    with open(boosted_yaml_path, 'w') as f:
        yaml.dump(config, f, sort_keys=False)

    # Overwrite the global yaml_file variable
    yaml_file = str(boosted_yaml_path)

    print(f"\n‚úÖ 'Pothole-Boosted' 5-Class dataset created.")
    print(f"The pipeline will now use this new dataset: {yaml_file}")
else:
    print(f"‚ùå FAILED to find copied dataset.yaml at {boosted_yaml_path}")

Starting to create 'Pothole-Boosted' dataset...
Downloading Kaggle dataset...
Dataset URL: https://www.kaggle.com/datasets/ryukijanoramunae/pothole-dataset
License(s): DbCL-1.0
Downloading pothole-dataset.zip to /content/pothole_data_raw
 96% 747M/776M [00:04<00:00, 49.6MB/s]
100% 776M/776M [00:04<00:00, 198MB/s] 
Kaggle dataset downloaded successfully.
Removing old boosted dataset...
Creating new dataset at /content/drive/MyDrive/RDD_2K_Pothole_Boosted...
Copied 2K base dataset.
Found 2000 new pothole images to merge.
Merging 'train' split...
Merged 1600 new train images with 3915 pothole annotations.
Merging 'val' split...
Merged 400 new val images with 955 pothole annotations.

‚úÖ 'Pothole-Boosted' 5-Class dataset created.
The pipeline will now use this new dataset: /content/drive/MyDrive/RDD_2K_Pothole_Boosted/dataset.yaml


In [5]:
# --- CLASS NAMES (Must match your YAML) ---
CLASS_NAMES = {
    0: 'longitudinal crack',
    1: 'transverse crack',
    2: 'alligator crack',
    3: 'other corruption',
    4: 'Pothole'
}

# --- A. GPU RESOURCE MONITOR ---
class ColabResourceMonitor:
    """Monitor resources and set defaults for Colab GPU."""

    def __init__(self, max_training_hours=2):
        self.max_training_hours = max_training_hours
        self.start_time = time.time()
        self.gpu_available = torch.cuda.is_available()
        self.initial_setup()

    def initial_setup(self):
        print("\nAnalyzing Colab Runtime (Optimized for GPU Performance)...")
        cpu_count = psutil.cpu_count(logical=False) or psutil.cpu_count()

        if self.gpu_available:
            self.batch_size = 16
            self.image_size = 640
            self.workers = 8
            self.device = None
            self.amp = True
        else:
            print("WARNING: GPU not found, falling back to slow CPU settings.")
            self.batch_size = 2
            self.image_size = 416
            self.workers = 0
            self.device = 'cpu'
            self.amp = False

        print(f"Optimized settings: Batch Size={self.batch_size}, Image Size={self.image_size}, Workers={self.workers}")

    def get_optimized_config(self):
        return {
            'batch_size': self.batch_size,
            'image_size': self.image_size,
            'workers': self.workers,
            'epochs': 30, # 30 epochs on a ~4K dataset is a good, fast run
            'patience': 10,
            'amp': self.amp,
            'cache': True, # Cache the dataset in RAM for max speed
            'save_period': -1,
            'device': self.device
        }

# --- B. CLASS DISTRIBUTION ANALYZER ---
class ClassDistributionAnalyzer:
    """Analyzes and prints the class distribution in the dataset's train split."""
    def __init__(self, yaml_path):
        self.yaml_path = Path(yaml_path)
        self.class_counts = {i: 0 for i in CLASS_NAMES.keys()}

    def run_analysis(self):
        if not self.yaml_path.exists():
            print("YAML file not found.")
            return

        with open(self.yaml_path, 'r') as f:
            config = yaml.safe_load(f)

        base_path = Path(config.get('path', self.yaml_path.parent))
        train_labels_dir = base_path / config.get('train', 'images/train').replace('images', 'labels')

        if not train_labels_dir.is_dir():
            print(f"Training labels directory not found: {train_labels_dir}")
            return

        train_label_files = list(train_labels_dir.glob('*.txt'))
        total_annotations = 0

        for label_file in train_label_files:
            try:
                with open(label_file, 'r') as f:
                    content = f.read().strip()
                    if content:
                        for line in content.split('\n'):
                            if line.strip():
                                class_id = int(line.split()[0])
                                if class_id in self.class_counts:
                                    self.class_counts[class_id] += 1
                                    total_annotations += 1
            except: continue

        print(f"\nDataset Analysis ({len(train_label_files)} train labels):")
        print("------------------------------------------")
        print(f"Total Annotations Found: {total_annotations}")
        if total_annotations > 0:
            for class_id, count in self.class_counts.items():
                percentage = (count / total_annotations) * 100
                print(f"  {CLASS_NAMES[class_id]:<20}: {count:>5} ({percentage:.1f}%) ")
        else:
            print("  WARNING: Could not read any annotations. Training may fail.")
        print("------------------------------------------")

# --- C. BAYESIAN OPTIMIZER (Definition only) ---
class BayesianOptimizer:
    def __init__(self, dataset_yaml, resource_monitor):
        pass # We are skipping optimization
    def optimize(self, n_trials=10, timeout_minutes=15):
        pass # We are skipping optimization

# --- D. OPTIMIZED TRAINER (GPU, Drive Storage, yolov8s) ---
class OptimizedTrainer:

    def __init__(self, dataset_yaml, resource_monitor, best_params):
        self.dataset_yaml = dataset_yaml
        self.monitor = resource_monitor
        self.best_params = best_params
        self.model = None
        self.results = None

        # Save to a NEW folder on your Drive
        self.PROJECT_PATH = '/content/drive/MyDrive/RDD_GPU_Training_Runs'
        self.run_name = 'rdd_2k_boosted_yolov8s_run' # New fixed name

        self.MAX_EPOCHS = self.monitor.get_optimized_config()['epochs']
        self.PATIENCE = self.monitor.get_optimized_config()['patience']
        self.BASE_LINE_MAP = 0.417

        self.final_model_path = Path(self.PROJECT_PATH) / self.run_name / 'weights' / 'best.pt'

    def train_model(self, resume=False):
        print(f"\nStarting final training on yolov8s ({self.MAX_EPOCHS} epochs max)...")

        model_path = ''
        if resume:
            model_path = str(Path(self.PROJECT_PATH) / self.run_name / 'weights' / 'last.pt')
            print(f"Loading model from checkpoint: {model_path}")
        else:
            model_path = 'yolov8s.pt'
            print(f"Loading base model: {model_path}")

        self.model = YOLO(model_path)
        config = self.monitor.get_optimized_config()

        train_params = {
            'data': self.dataset_yaml,
            'epochs': self.MAX_EPOCHS,
            'batch': config['batch_size'],
            'imgsz': config['image_size'],
            'lr0': self.best_params.get('lr0', 0.01),
            'box': self.best_params.get('box', 0.05),
            'cls': self.best_params.get('cls', 0.5),
            'warmup_epochs': self.best_params.get('warmup_epochs', 5),
            'degrees': self.best_params.get('degrees', 10),
            'scale': self.best_params.get('scale', 0.3),
            'patience': self.PATIENCE,
            'workers': config['workers'],
            'amp': config['amp'],
            'cache': config['cache'],
            'save_period': config['save_period'],
            'cos_lr': True,
            'device': config['device'],
            'project': self.PROJECT_PATH,
            'name': self.run_name,
            'plots': True, 'verbose': True, 'exist_ok': True,
            'resume': resume
        }

        print(f"Final Config: Epochs={self.MAX_EPOCHS}, Batch={config['batch_size']}, Device={config['device']}")
        print(f"Checkpoints will be saved to: {self.PROJECT_PATH}/{self.run_name}")

        try:
            self.results = self.model.train(**train_params)
            print(f"\n‚úÖ Training complete. Best model saved to: {self.final_model_path}")
            return True
        except Exception as e:
            print(f"‚ùå Training failed: {e}")
            return False

    def evaluate_model(self):
        print("\n==================================================")
        print("DETAILED BOOSTED MODEL EVALUATION")
        print("==================================================")

        if not self.final_model_path.exists():
            print(f"‚ùå Error: Model not found at {self.final_model_path}.")
            return None

        print(f"Loading best model from: {self.final_model_path}")
        model = YOLO(self.final_model_path)

        print("Running validation to generate metrics...")
        val_results = model.val(data=self.dataset_yaml, verbose=True)

        performance = {
            'mAP50': val_results.box.map50,
            'mAP50_95': val_results.box.map,
            'precision': val_results.box.mp,
            'recall': val_results.box.mr,
        }

        print(f"\nPerformance Metrics (Validation Set):")
        print(f"  mAP@0.5:    {performance['mAP50']:.4f} ({performance['mAP50']:.1%}) <--- Main Score")
        print(f"  mAP@0.5:0.95: {performance['mAP50_95']:.4f} ({performance['mAP50_95']:.1%})")
        print(f"  Precision:  {performance['precision']:.4f} ({performance['precision']:.1%})")
        print(f"  Recall:     {performance['recall']:.4f} ({performance['recall']:.1%})")

        print(
            "\nPer-Class mAP@0.5:\n" +
            "----------------------")
        class_results = val_results.box.ap_class_index
        class_maps = val_results.box.ap

        for i, class_index in enumerate(class_results):
            class_name = CLASS_NAMES.get(class_index, f'Class {class_index}')
            map_value = class_maps[i] if np.isfinite(class_maps[i]) else 0.0
            print(f"  - {class_name:<20}: {map_value:.4f}")

        improvement = performance['mAP50'] - self.BASE_LINE_MAP
        print(f"\nImprovement vs RDD Baseline ({self.BASE_LINE_MAP:.1%}): {improvement:+.1%}")

        return performance

    def download_best_model(self):
        print("\n==================================================")
        print("DOWNLOAD MODEL")
        print("==================================================")
        if not self.final_model_path.exists():
            print(f"‚ùå Error: Model not found at {self.final_model_path}.")
            return
        print(f"Preparing to download: {self.final_model_path}")
        files.download(str(self.final_model_path))

print("All classes defined successfully.")

All classes defined successfully.


In [6]:
# --- Cell 4: Initialization and Recovery ---

# 1. Initialize resource monitor (sets GPU-safe parameters)
monitor = ColabResourceMonitor(max_training_hours=2)

# 2. Analyze and print the class distribution of the NEW BOOSTED dataset
# You should see a much higher number for 'Pothole' now
analyzer = ClassDistributionAnalyzer(yaml_file)
analyzer.run_analysis()

# 3. Load Best Params (Recovery)
# We are skipping the 20-min optimization and re-using your previous results.
best_params = {
    'lr0': 0.018415672546309683,
    'box': 0.05035344914171263,
    'cls': 0.3397850106139112,
    'warmup_epochs': 3,
    'degrees': 8.046959850463304,
    'scale': 0.26962935492158147
}
print("‚úÖ RECOVERY: Loaded best_params from previous run.")

# 4. Create the Trainer Object
if 'yaml_file' in globals():
    trainer = OptimizedTrainer(yaml_file, monitor, best_params)
    print(f"Trainer created. Ready to train on: {yaml_file}")
else:
    print("‚ùå 'yaml_file' not defined. Please run Cell 2.")


Analyzing Colab Runtime (Optimized for GPU Performance)...
Optimized settings: Batch Size=16, Image Size=640, Workers=8

Dataset Analysis (3200 train labels):
------------------------------------------
Total Annotations Found: 8546
  longitudinal crack  :   910 (10.6%) 
  transverse crack    :   874 (10.2%) 
  alligator crack     :   849 (9.9%) 
  other corruption    :   842 (9.9%) 
  Pothole             :  5071 (59.3%) 
------------------------------------------
‚úÖ RECOVERY: Loaded best_params from previous run.
Trainer created. Ready to train on: /content/drive/MyDrive/RDD_2K_Pothole_Boosted/dataset.yaml


In [7]:
# --- Cell 5: Train Boosted Model ---

# Define the permanent paths on your Google Drive
PERSISTENT_PROJECT_PATH = '/content/drive/MyDrive/RDD_GPU_Training_Runs'
PERSISTENT_RUN_NAME = 'rdd_2k_boosted_yolov8s_run' # The new fixed name

# Check if a checkpoint ('last.pt') already exists ON GOOGLE DRIVE
resume_path = Path(PERSISTENT_PROJECT_PATH) / PERSISTENT_RUN_NAME / 'weights' / 'last.pt'
resume_training = resume_path.exists()

if resume_training:
    print(f"‚úÖ Checkpoint found at {resume_path}.")
    print("Training will resume from your previous progress.")
else:
    print(f"‚ÑπÔ∏è No checkpoint found. Starting a new training run.")
    print(f"(Progress will be saved to {resume_path.parent.parent})")


if 'trainer' in locals() and Path(yaml_file).exists():

    # Pass the 'resume_training' flag to the train_model method
    if trainer.train_model(resume=resume_training):
        print("\n‚úÖ Training is complete.")
        print(f"Your 'Pothole Boosted' model is saved to Google Drive at: {trainer.final_model_path}")
        print("You can now run Cell 6 to evaluate or Cell 7 to download.")
    else:
        print("‚ùå Training process failed.")
else:
    print("Cannot start training: 'trainer' object not defined. Please run Cell 4.")

‚ÑπÔ∏è No checkpoint found. Starting a new training run.
(Progress will be saved to /content/drive/MyDrive/RDD_GPU_Training_Runs/rdd_2k_boosted_yolov8s_run)

Starting final training on yolov8s (30 epochs max)...
Loading base model: yolov8s.pt
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 21.5MB 187.8MB/s 0.1s
Final Config: Epochs=30, Batch=16, Device=None
Checkpoints will be saved to: /content/drive/MyDrive/RDD_GPU_Training_Runs/rdd_2k_boosted_yolov8s_run
Ultralytics 8.3.228 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=0.05035344914171263, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.3397850106139112, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/drive/MyDrive/RDD_2K_Pothole

In [8]:
# --- Cell 6: Evaluate Boosted Model ---

if 'trainer' in locals() and trainer.final_model_path.exists():
    trainer.evaluate_model()
else:
    print("‚ùå Model not found. Please run Cell 5 to train the model first.")


DETAILED BOOSTED MODEL EVALUATION
Loading best model from: /content/drive/MyDrive/RDD_GPU_Training_Runs/rdd_2k_boosted_yolov8s_run/weights/best.pt
Running validation to generate metrics...
Ultralytics 8.3.228 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,127,519 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.4¬±0.1 ms, read: 7.7¬±4.6 MB/s, size: 61.9 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/RDD_2K_Pothole_Boosted/labels/val.cache... 7733 images, 1840 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 7736/7736 9.8Mit/s 0.0s
[34m[1mval: [0m/content/drive/MyDrive/RDD_2K_Pothole_Boosted/images/val/Japan_006536.jpg: 1 duplicate labels removed
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 484/484 2.0it/s 4:01
                   all       7736      14971      0.534      0.449

In [9]:
# --- Cell 7: Download Boosted Model ---

if 'trainer' in locals() and trainer.final_model_path.exists():
    trainer.download_best_model()
else:
    print("‚ùå Model not found. Please run Cell 5 to train the model first.")


DOWNLOAD MODEL
Preparing to download: /content/drive/MyDrive/RDD_GPU_Training_Runs/rdd_2k_boosted_yolov8s_run/weights/best.pt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
# --- NEW CELL: Run this after uploading kaggle.json ---

import os

# Create the .kaggle directory
!mkdir -p ~/.kaggle

# Move the uploaded kaggle.json file
!mv kaggle.json ~/.kaggle/

# Set the correct permissions (this is a required step)
!chmod 600 ~/.kaggle/kaggle.json

print("‚úÖ kaggle.json is installed and ready.")

‚úÖ kaggle.json is installed and ready.


In [None]:
import matplotlib.pyplot as plt

# Data from your Table 5.2 (Simulated Results)
# Adjust these values if your simulation produced different numbers
labels = ['Severe Repairs', 'Moderate Repairs', 'Minor Repairs (Unfunded)']
# Example: 150k + 120k = 270k (Severe), 80k + 90k + 50k = 220k (Moderate), 0 (Minor)
# Let's use percentages or raw amounts. Here using raw amounts for the funded parts.
# Severe: 270,000
# Moderate: 220,000
# Remaining/Unfunded: (Representing the gap or just showing funded distribution)

# Let's stick to the Funded distribution for clarity as per your description
# "Severe repairs accounted for approximately 55%... Moderate utilized remaining 45%"
sizes = [55, 45] 
labels_funded = ['Severe Repairs', 'Moderate Repairs']
colors = ['#ff9999', '#66b3ff'] # Red-ish for Severe, Blue-ish for Moderate

plt.figure(figsize=(8, 8))
plt.pie(sizes, labels=labels_funded, colors=colors, autopct='%1.1f%%', startangle=140, shadow=True)
plt.title('Budget Allocation by Severity Severity')
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.

# Save the chart
plt.savefig('budget_allocation_pie_chart.png')
print("Pie chart saved as 'budget_allocation_pie_chart.png'")
plt.show()