In [1]:
import os
from pathlib import Path
from dataclasses import dataclass

In [2]:
os.getcwd()

'c:\\Users\\jhasu\\Desktop\\Mlops_template\\experiments'

In [3]:
os.chdir('../')

In [4]:
os.getcwd()

'c:\\Users\\jhasu\\Desktop\\Mlops_template'

In [5]:
@dataclass(frozen=True)
class trainingEntity:
    model_dir: Path
    epochs: int
    training_data: Path
    batch_size: int
    learning_rate: int
    image_size: tuple
    num_classes: int

In [6]:
from project_name.Utills.utills import read_yaml
from project_name.contants import CONFIG_FILE_PATH, PARAMS_FILE_PATH


class ConfigManager:
    def __init__(self,config = CONFIG_FILE_PATH,params = PARAMS_FILE_PATH):
        self.config =  read_yaml(config)
        self.params = read_yaml(params)
        os.makedirs(self.config.artifact_root,exist_ok=True)
    
    def get_training_entity(self)->trainingEntity:

        config =self.config.model_training
        params = self.params

        entity = trainingEntity(
            model_dir= Path(config.model_dir),
            training_data= Path(config.data_dir),
            epochs= params.epochs,
            batch_size= params.batch_size,
            learning_rate= params.learning_rate,
            image_size= params.image_size,
            num_classes = params.num_classes
        )
        return entity

In [None]:
import torch
from torch import nn, optim
from torchvision import transforms,datasets,models
from torch.utils.data import DataLoader
import mlflow
import mlflow.pytorch

class ModelTraining:
    def __init__(self,trainingEntity:trainingEntity):
        self.entity = trainingEntity

    def train(self):
        
        transform = transforms.Compose([
            transforms.Resize(self.entity.image_size),
            transforms.ToTensor()
        ])

        dataset = datasets.ImageFolder(root= self.entity.training_data,transform= transform)
        dataloader = DataLoader(dataset, batch_size=self.entity.batch_size, shuffle= True)

        model= models.resnet18(weights=None)
        model.fc= nn.Linear(model.fc.in_features,self.entity.num_classes) 

        criterion= nn.CrossEntropyLoss()
        optimizer= optim.Adam(model.parameters(), lr=self.entity.learning_rate)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)

        model.train()

        mlflow.set_experiment("MLOPs Template")
        for epoch in range(self.entity.epochs):
            
            mlflow.log_param("Batch_size",self.entity.batch_size)
            mlflow.log_param("Learning rate", self.entity.learning_rate)
            mlflow.log_param("Epochs", self.entity.epochs)
            mlflow.log_param("Image size", self.entity.image_size)

            total_loss= 0.0
            correct= 0
            total= 0

            for images,labels in dataloader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                #print(outputs,labels)
                loss= criterion(outputs,labels)
                loss.backward()
                optimizer.step()
                total_loss+=loss.item()
                _,predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

            acc = correct / total
            print(f"Epoch {epoch+1}: Loss={total_loss:.4f}, Accuracy={acc:.4f}")

            mlflow.log_metric("loss", total_loss, step=epoch)
            mlflow.log_metric("accuracy", acc, step=epoch)

            print(f"Epoch {epoch+1}/{self.entity.epochs}, Loss: {total_loss:.4f}")
        input_example = torch.rand(1, 3, 224, 224).to(device).cpu().numpy()
        mlflow.pytorch.log_model(model, artifact_path="model",input_example= input_example,registered_model_name="MLOPs Template")
        
        class_to_idx = dataset.class_to_idx
        mlflow.log_dict(class_to_idx, "class_to_idx.json")

        torch.save(model.state_dict(),self.entity.model_dir)
        print(f"Model saved to {self.entity.model_dir}")

In [29]:
config= ConfigManager()
entity= config.get_training_entity()
component = ModelTraining(entity)
component.train()



Epoch 1: Loss=5.3281, Accuracy=0.8923
Epoch 1/5, Loss: 5.3281
Epoch 2: Loss=2.8727, Accuracy=0.9333
Epoch 2/5, Loss: 2.8727
Epoch 3: Loss=1.0246, Accuracy=0.9769
Epoch 3/5, Loss: 1.0246
Epoch 4: Loss=0.8776, Accuracy=0.9795
Epoch 4/5, Loss: 0.8776
Epoch 5: Loss=1.4267, Accuracy=0.9667
Epoch 5/5, Loss: 1.4267




Model saved to artifact\model
