In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
%pwd

'c:\\Users\\Kushal\\Documents\\GitHub\\kidney-disease-classification'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    base_model_path: Path
    trained_model_path: Path
    training_data_path: Path
    params_num_epochs: int
    params_learning_rate: float
    params_batch_size: int

In [5]:
from classifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from classifier.utils.common import read_yaml, create_directories

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import datasets, models, transforms

In [21]:
class ConfigurationManager:
    """Read config file and create config objects for each task"""
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        """Method to get configuration for model training"""
        training_config = self.config.training
        base_model_config = self.config.prepare_base_model
        data_ingestion_config = self.config.data_ingestion
        params = self.params
        create_directories([training_config.root_dir])

        training_data_path_ = os.path.join(data_ingestion_config.unzip_dir, 'kidney-ct-scan-image')

        training_config = TrainingConfig(
            root_dir = Path(training_config.root_dir),
            base_model_path = Path(base_model_config.base_model_path),
            trained_model_path = Path(training_config.trained_model_path),
            training_data_path = Path(training_data_path_),
            params_num_epochs = params.EPOCHS,
            params_learning_rate = params.LEARNING_RATE,
            params_batch_size = params.BATCH_SIZE
            )
        
        return training_config

In [60]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.loss = []
        self.criterion = torch.nn.CrossEntropyLoss() 
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    def get_base_model(self):
        """Get the base model created in prepare_base_model step"""
        config = self.config
        model = models.resnet18()
        optimizer = torch.optim.Adam(model.parameters())
        checkpoint = torch.load(config.base_model_path)
        epoch = checkpoint['epoch']

        num_ftrs = model.fc.in_features
        # Here the size of each output sample is set to 2.
        model.fc = nn.Linear(num_ftrs, 2)

        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        self.model = model
        self.optimizer = optimizer
    
    def train_val_generator(self):
        """Method to create training and validation data generator; 80% training, 20% validation"""
        config = self.config
        transform = transforms.Compose([
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ])
        
        # Load the datasets with ImageFolder
        dataset = datasets.ImageFolder(config.training_data_path, transform = transform)
        
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

        # Define the data loaders
        self.train_loader = DataLoader(train_dataset, batch_size=config.params_batch_size,
                                        shuffle=True)
        
        self.val_loader = DataLoader(val_dataset, batch_size=config.params_batch_size, 
                                     shuffle=False)
        
    def _evaluate(self, data_loader):
        """Method to evaluate the model and report accuracy"""
        model = self.model
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in data_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = model(images)
                probs = F.softmax(outputs, 1)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Validation Accuracy: {accuracy:.2f}%')


    def _train_one_epoch(self, data_loader):
        """Method to train just one epoch"""
        self.model.train()
        total_loss = 0
        for images, labels in data_loader:
            images, labels = images.to(self.device), labels.to(self.device)
            outputs = self.model(images)
            loss = self.criterion(outputs, labels)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
        avg_loss = total_loss / len(data_loader)
        self.loss.append(avg_loss)
    

    def training(self):
        """Training method"""
        config = self.config
        # Train the model
        for epoch in range(config.params_num_epochs):
            self._train_one_epoch(self.train_loader)
            self._evaluate(self.val_loader)
        
        self.save_model()


    def save_model(self):
        # create_directories([self.config.base_model_path])
        torch.save({
            'epoch': 0,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict()
            }, self.config.trained_model_path)
    


In [61]:
if __name__ == '__main__':
    training_config = ConfigurationManager().get_training_config()
    trainer = Training(training_config)
    trainer.get_base_model()
    trainer.train_val_generator()
    trainer.training()

[2024-08-03 21:28:21,460: INFO: common yaml file: config\config.yaml loaded successfully]
[2024-08-03 21:28:21,466: INFO: common yaml file: params.yaml loaded successfully]
[2024-08-03 21:28:21,466: INFO: common created directory at: artifacts]
[2024-08-03 21:28:21,476: INFO: common created directory at: artifacts/training]
