In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.54-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.54-py3-none-any.whl (903 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m903.1/903.1 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.54 ultralytics-thop-2.0.13


In [2]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
import torch
import torchvision
import tensorflow as tf
from torchvision.models import vit_b_32, ViT_B_32_Weights
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from ultralytics import YOLO
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import cv2
import gc
import pickle
import yaml

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# Paths
TRAIN_PATH = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
VAL_PATH = '/kaggle/input/coco-2017-dataset/coco2017/val2017'
ANNOTATIONS_PATH = '/kaggle/input/coco-2017-dataset/coco2017/annotations'
FILTERED_DATASET = '/kaggle/input/filtered-coco-dataset'
WORKING_DIR = '/kaggle/working'
FILTERED_CATEGORIES = ['person', 'cat', 'dog']

In [4]:
class DataProcessor:
    def __init__(self, train_csv_path, test_csv_path):
        self.train_data = pd.read_csv(train_csv_path)
        self.test_data = pd.read_csv(test_csv_path)
        self.label_encoder = LabelEncoder()
        
    def preprocess_image(self, image_path, target_size=(224, 224)):
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, target_size)
        image = image / 255.0
        return image

    def load_data(self, data, path_prefix):
        images = []
        labels = []
        for _, row in data.iterrows():
            image_path = os.path.join(path_prefix, row['image'])
            images.append(self.preprocess_image(image_path))
            labels.append(row['category_id'])
        
        images = np.array(images)
        labels = np.array(labels)
        
        # Encode labels
        labels_encoded = self.label_encoder.fit_transform(labels)
        labels_one_hot = to_categorical(labels_encoded)
        
        return images, labels_one_hot

In [5]:
class MetricsEvaluator:
    def __init__(self, working_dir, timestamp):
        self.working_dir = working_dir
        self.timestamp = timestamp
        
    def evaluate_all_models(self, results_paths):
        """Evaluate all models using saved results"""
        print("\nEvaluating model performance...")
        
        # Load test data
        X_test = np.load(os.path.join(self.working_dir, f'X_test_{self.timestamp}.npy'))
        y_test = np.load(os.path.join(self.working_dir, f'y_test_{self.timestamp}.npy'))
        
        histories = {}
        predictions = {}
        
        # Load results for each model
        for model_name, path in results_paths.items():
            model_dir = os.path.join(self.working_dir, path)
            
            # Load history
            with open(os.path.join(model_dir, 'history.pkl'), 'rb') as f:
                histories[model_name] = pickle.load(f)
            
            # Load predictions
            predictions[model_name] = np.load(os.path.join(model_dir, 'predictions.npy'))
        
        # Plot metrics
        self.plot_metrics(histories, predictions, y_test)

In [6]:
class ModelTrainer:
    def __init__(self, data_processor):
        self.data_processor = data_processor
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        self.working_dir = WORKING_DIR
        
    def train_mobilenet(self, X_train, y_train, X_test, y_test):
        """Train MobileNetV2 model"""
        print("Training MobileNetV2...")
        
        # Build the model
        base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        
        # Unfreeze some layers for fine-tuning
        for layer in base_model.layers[-30:]:
            layer.trainable = True
            
        # Add classification head
        inputs = Input(shape=(224, 224, 3))
        x = base_model(inputs)
        x = GlobalAveragePooling2D()(x)
        x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.5)(x)
        x = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)
        x = Dropout(0.3)(x)
        outputs = Dense(len(self.data_processor.label_encoder.classes_), activation='softmax')(x)
        model = Model(inputs, outputs)
        
        # Compile model
        optimizer = Adam(learning_rate=0.0001)
        model.compile(optimizer=optimizer,
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
        
        # Train model with reduced batch size and memory optimization
        history = model.fit(
            X_train, y_train,
            validation_data=(X_test, y_test),
            epochs=30,
            batch_size=16,  # Reduced batch size to save memory
            verbose=1
        )
        
        return history.history, model
    
    def train_faster_rcnn(self, X_train, y_train, X_test, y_test):
        """Train Faster R-CNN model with memory optimization"""
        print("Training Faster R-CNN...")
        
        # Initialize Faster R-CNN with updated weights parameter
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
        )
        model.train()
        
        # Rest of the method remains the same
        num_classes = len(self.data_processor.label_encoder.classes_) + 1  # +1 for background
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
            in_features, num_classes)
        
        # Move model to GPU if available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        def prepare_detection_targets(images, labels, device):
            targets = []
            for img_idx in range(len(images)):
                label_idx = labels[img_idx].argmax()
                boxes = torch.FloatTensor([[100, 100, 124, 124]]).to(device)  # Example box
                
                target = {
                    'boxes': boxes,
                    'labels': torch.tensor([label_idx + 1], dtype=torch.int64).to(device),
                    'image_id': torch.tensor([img_idx]).to(device),
                    'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
                    'iscrowd': torch.zeros((1,), dtype=torch.int64).to(device)
                }
                targets.append(target)
            return targets
        
        # Training parameters
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
        num_epochs = 30
        batch_size = 8  # Reduced batch size
        
        history = {'train_loss': [], 'val_loss': []}
        
        for epoch in range(num_epochs):
            model.train()
            epoch_loss = 0
            num_batches = 0
            
            # Process in smaller batches
            for i in range(0, len(X_train), batch_size):
                batch_images = X_train[i:i+batch_size]
                batch_labels = y_train[i:i+batch_size]
                
                # Clear memory
                torch.cuda.empty_cache()
                
                # Prepare data
                images = [torch.FloatTensor(img).permute(2, 0, 1).to(device) for img in batch_images]
                targets = prepare_detection_targets(batch_images, batch_labels, device)
                
                # Forward pass
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                
                # Backward pass
                optimizer.zero_grad()
                losses.backward()
                optimizer.step()
                
                epoch_loss += losses.item()
                num_batches += 1
                
                # Free up memory
                del images, targets, loss_dict, losses
                torch.cuda.empty_cache()
            
            avg_loss = epoch_loss / num_batches
            history['train_loss'].append(avg_loss)
            print(f'Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f}')
        
        return history, model
    
    def train_yolo(self, X_train, y_train, X_test, y_test):
        """Train YOLOv8 model"""
        print("Training YOLOv8...")
        
        # Initialize YOLOv8 with smaller image size
        model = YOLO('yolov8n.pt')  # Using smallest YOLOv8 model
        
        # Prepare YOLO-specific dataset format
        dataset_path = os.path.join(self.working_dir, 'yolo_dataset')
        os.makedirs(dataset_path, exist_ok=True)
        
        # Save images and labels in YOLO format
        train_yaml = {
            'path': dataset_path,
            'train': 'images/train',
            'val': 'images/val',
            'nc': len(self.data_processor.label_encoder.classes_),
            'names': list(self.data_processor.label_encoder.classes_)
        }
        
        with open(os.path.join(dataset_path, 'dataset.yaml'), 'w') as f:
            yaml.dump(train_yaml, f)
        
        # Train with memory-optimized parameters
        history = model.train(
            data=os.path.join(dataset_path, 'dataset.yaml'),
            epochs=30,
            imgsz=224,
            batch=8,  # Reduced batch size
            cache=False  # Disable caching to save memory
        )
        
        return vars(history), model
    
    def train_vit(self, X_train, y_train, X_test, y_test):
        """Train Vision Transformer model"""
        print("Training ViT...")
        
        # Initialize ViT
        model = vit_b_32(weights=ViT_B_32_Weights.DEFAULT)
        
        # Modify for number of classes
        num_classes = len(self.data_processor.label_encoder.classes_)
        model.heads = torch.nn.Linear(model.heads.in_features, num_classes)
        
        # Move to GPU if available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        return self._train_torch_model(model, X_train, y_train, X_test, y_test, "vit")
    
    def _train_torch_model(self, model, X_train, y_train, X_test, y_test, model_name):
        """Generic PyTorch training loop with memory optimization"""
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Training parameters
        batch_size = 16  # Reduced batch size
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
        criterion = torch.nn.CrossEntropyLoss()
        num_epochs = 30
        
        history = {
            'train_loss': [],
            'val_loss': [],
            'train_acc': [],
            'val_acc': []
        }
        
        for epoch in range(num_epochs):
            model.train()
            train_loss = 0
            train_correct = 0
            num_batches = 0
            
            # Training loop
            for i in range(0, len(X_train), batch_size):
                # Clear cache periodically
                if i % (batch_size * 10) == 0:
                    torch.cuda.empty_cache()
                
                batch_x = torch.FloatTensor(X_train[i:i+batch_size]).to(device)
                batch_y = torch.FloatTensor(y_train[i:i+batch_size]).to(device)
                
                optimizer.zero_grad()
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
                
                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                _, true_classes = torch.max(batch_y, 1)
                train_correct += (predicted == true_classes).sum().item()
                num_batches += 1
                
                # Free memory
                del batch_x, batch_y, outputs, loss
            
            # Calculate metrics
            train_loss = train_loss / num_batches
            train_acc = train_correct / len(X_train)
            
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            
            print(f'Epoch {epoch+1}/{num_epochs} - loss: {train_loss:.4f} - acc: {train_acc:.4f}')
            
            # Clear memory after each epoch
            torch.cuda.empty_cache()
            gc.collect()
        
        return history, model
    
    def _save_model_results(self, model_name, history, predictions=None):
        """Save training history and predictions"""
        results_dir = os.path.join(self.working_dir, f'{model_name}_{self.timestamp}')
        os.makedirs(results_dir, exist_ok=True)
        
        # Save history
        with open(os.path.join(results_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
            
        # Save predictions if available
        if predictions is not None:
            np.save(os.path.join(results_dir, 'predictions.npy'), predictions)
            
    def _get_predictions(self, model, X_test):
        """Get predictions for evaluation"""
        if isinstance(model, tf.keras.Model):
            return model.predict(X_test)
        else:
            model.eval()
            with torch.no_grad():
                return model(torch.FloatTensor(X_test)).numpy()
                
    def train_models(self):
        """Train all models sequentially with memory management"""
        print("Starting model training sequence...")
        
        # Dictionary to store paths to saved results
        results_paths = {}
        
        # Load and process data once
        print("Loading and processing data...")
        X_train, y_train = self.data_processor.load_data(
            self.data_processor.train_data, TRAIN_PATH)
        X_test, y_test = self.data_processor.load_data(
            self.data_processor.test_data, TRAIN_PATH)
        
        # Save test data for later evaluation
        np.save(os.path.join(self.working_dir, f'X_test_{self.timestamp}.npy'), X_test)
        np.save(os.path.join(self.working_dir, f'y_test_{self.timestamp}.npy'), y_test)
        
        try:
            # Train MobileNetV2
            #print("\nTraining MobileNetV2...")
            #history, model = self.train_mobilenet(X_train, y_train, X_test, y_test)
            #predictions = self._get_predictions(model, X_test)
            #self._save_model_results('mobilenet', history, predictions)
            #results_paths['mobilenet'] = f'mobilenet_{self.timestamp}'
            
            # Clear memory
            #del model, history, predictions
            #tf.keras.backend.clear_session()
            #gc.collect()
            
            # Train Faster R-CNN
            print("\nTraining Faster R-CNN...")
            history, model = self.train_faster_rcnn(X_train, y_train, X_test, y_test)
            predictions = self._get_predictions(model, X_test)
            self._save_model_results('faster_rcnn', history, predictions)
            results_paths['faster_rcnn'] = f'faster_rcnn_{self.timestamp}'
            
            # Clear memory
            del model, history, predictions
            torch.cuda.empty_cache()
            gc.collect()
            
            # Train YOLOv8
            print("\nTraining YOLOv8...")
            history, model = self.train_yolo(X_train, y_train, X_test, y_test)
            predictions = self._get_predictions(model, X_test)
            self._save_model_results('yolo', history, predictions)
            results_paths['yolo'] = f'yolo_{self.timestamp}'
            
            # Clear memory
            del model, history, predictions
            gc.collect()
            
            # Train ViT
            print("\nTraining ViT...")
            history, model = self.train_vit(X_train, y_train, X_test, y_test)
            predictions = self._get_predictions(model, X_test)
            self._save_model_results('vit', history, predictions)
            results_paths['vit'] = f'vit_{self.timestamp}'
            
            # Clear memory
            del model, history, predictions
            torch.cuda.empty_cache()
            gc.collect()
            
        except Exception as e:
            print(f"Error during training: {str(e)}")
            raise e
        
        finally:
            # Clean up training data
            del X_train, y_train
            gc.collect()
            
        return results_paths

In [None]:
def main():
    # Initialize data processor
    data_processor = DataProcessor(
        os.path.join(FILTERED_DATASET, 'train_data.csv'),
        os.path.join(FILTERED_DATASET, 'test_data.csv')
    )
    
    # Initialize trainer
    trainer = ModelTrainer(data_processor)
    
    # Train all models and get paths to results
    results_paths = trainer.train_models()
    
    # Evaluate results
    evaluator = MetricsEvaluator(WORKING_DIR, trainer.timestamp)
    evaluator.evaluate_all_models(results_paths)
    
    print("Training and evaluation complete. Results saved in working directory.")

if __name__ == "__main__":
    main()

Starting model training sequence...
Loading and processing data...

Training Faster R-CNN...
Training Faster R-CNN...


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 202MB/s] 


Epoch 1/30 - Loss: 0.1574
Epoch 2/30 - Loss: 0.1426
Epoch 3/30 - Loss: 0.1412
Epoch 4/30 - Loss: 0.1362
Epoch 5/30 - Loss: 0.1320
Epoch 6/30 - Loss: 0.1272
Epoch 7/30 - Loss: 0.1190
Epoch 8/30 - Loss: 0.1199
Epoch 9/30 - Loss: 0.1089
Epoch 10/30 - Loss: 0.0862
Epoch 11/30 - Loss: 0.0689
Epoch 12/30 - Loss: 0.0551
Epoch 13/30 - Loss: 0.0444
Epoch 14/30 - Loss: 0.0366
Epoch 15/30 - Loss: 0.0300
Epoch 16/30 - Loss: 0.0252
Epoch 17/30 - Loss: 0.0304
Epoch 18/30 - Loss: 0.0240
Epoch 19/30 - Loss: 0.0209
Epoch 20/30 - Loss: 0.0219
Epoch 21/30 - Loss: 0.0240
Epoch 22/30 - Loss: 0.0206
Epoch 23/30 - Loss: 0.0200
