In [1]:
import os

In [2]:
%pwd

'/home/joshua/Desktop/Refactory/SwahiliNewsClassifier/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/joshua/Desktop/Refactory/SwahiliNewsClassifier'

In [12]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    """
    Configuration class for training the model.

    Attributes:
        root_dir (Path): The root directory where training-related data will be stored or processed.
        trained_model_path (Path): The filepath where the trained model will be saved.
        training_data (Path): The directory or filepath where training data is located.
        params_epochs_1 (int): The number of epochs for fine tuning the LLM
        params_epochs_2 (int): The number of epochs for fit 1.
        params_epochs_3 (int): The number of epochs for fit 2.
        params_epochs_4 (int): The number of epochs for  fit 3.
        params_epochs_5 (int): The number of epochs for fit 4.
        params_batch_size_1(int): The batch size for the first data loader.
        params_batch_size_2 (int): The batch size for the second data loader.
        params_learning_rate_1 (float): This is the learning rate for llm
        params_learning_rate_2 (float):This is the learning rate for fit 1
        params_learning_rate_3 (float):This is the learning rate for fit 2
        params_learning_rate_4 (float):This is the learning rate for fit 3
        params_learning_rate_5 (float):This is the learning rate for fit 4
      
    """
    root_dir: Path
    trained_model_path: Path
    training_data: Path
    params_epochs_1: int
    params_epochs_2: int
    params_epochs_3: int
    params_epochs_4: int
    params_epochs_5: int
    params_batch_size_1: int
    params_batch_size_2: int
    params_learning_rate_1:float
    params_learning_rate_2:float
    params_learning_rate_3:float
    params_learning_rate_4:float
    params_learning_rate_5:float



In [5]:
from swahiliNewsClassifier.constants import *
from swahiliNewsClassifier.utils.common import read_yaml, create_directories

In [13]:
from pathlib import Path

class ConfigurationManager:
    """Class for managing configuration files and preparing base models.
    
    This class handles the loading of configuration files and parameters,
    as well as the creation of directories necessary for preparing base models.
    
    Attributes:
        config_filepath (str, optional): The filepath of the configuration file. Defaults to CONFIG_FILE_PATH.
        params_filepath (str, optional): The filepath of the parameters file. Defaults to PARAMS_FILE_PATH.
    
    Methods:
        get_prepare_base_model_config(): Retrieves the configuration for preparing base models.
    """

    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        """Initializes the ConfigurationManager.

        Args:
            config_filepath (str, optional): The filepath of the configuration file. Defaults to CONFIG_FILE_PATH.
            params_filepath (str, optional): The filepath of the parameters file. Defaults to PARAMS_FILE_PATH.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        """
        Retrieves the training configuration parameters and constructs a TrainingConfig object.

        This method extracts the training configuration parameters from the overall configuration and parameters files,
        constructs the path to the training data directory, creates necessary directories, and packages all the parameters
        into a TrainingConfig object.

        Returns:
            TrainingConfig: An instance of TrainingConfig containing the training configuration parameters.

        Raises:
            ValueError: If any required configuration parameter is missing or invalid.
        """
        training = self.config.training
        params = self.params

        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "dataset/train")

        create_directories([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            training_data=Path(training_data),
            params_epochs_1=params.EPOCHS_1,
            params_epochs_2=params.EPOCHS_2,
            params_epochs_3=params.EPOCHS_3,
            params_epochs_4=params.EPOCHS_4,
            params_epochs_5=params.EPOCHS_5,
            params_batch_size_1=params.BATCH_SIZE_1,
            params_batch_size_2=params.BATCH_SIZE_2,
            params_learning_rate_1 = params.LEARNING_RATE_1,
            params_learning_rate_2 = params.LEARNING_RATE_2,
            params_learning_rate_3 = params.LEARNING_RATE_3,
            params_learning_rate_4 = params.LEARNING_RATE_4,
            params_learning_rate_5 = params.LEARNING_RATE_5           

           
        )

        return training_config

In [11]:
import torch
import fastai
from fastai.text.all import *
import pandas as pd
import numpy as np
from functools import partial
import io
import os
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'torch._C'

In [None]:
class SwahiliNewsClassifier:
    def __init__(self, training_config:TrainingConfig):
        self.config = training_config
        self.df_trn = None
        self.df_val = None
        self.df_lm = None
        self.dls_lm = None
        self.learn_lm = None
        self.dls_classifier = None
        self.learn_classifier = None

    def preprocess_data(self):
        self.df_trn, self.df_val = train_test_split(self.config.training_data, stratify=self.config.training_data['category'], test_size=0.3, random_state=123)
        self.df_lm = pd.concat([self.df_trn, self.df_val], axis=0)[['content']]

    def prepare_language_model(self, lm_epochs=self.config.params_epochs_1, learning_rate =self.config.params_learning_rate_1, bs = self.config.params_batch_1):
        
        # Language Model
        dblock = DataBlock(
            blocks=TextBlock.from_df('content', is_lm=True),
            get_x=ColReader('text'),
            splitter=RandomSplitter(0.1))
        self.dls_lm = dblock.dataloaders(self.df_lm, bs)
        self.learn_lm = language_model_learner(self.dls_lm, AWD_LSTM, drop_mult=0.3, metrics=[accuracy]).to_fp16()
        self.learn_lm.lr_find()
        self.learn_lm.fine_tune(lm_epochs, learning_rate)
        self.learn_lm.save_encoder('finetuned')

    def prepare_classifier_learner(self,bs=self.config.params_batch_2, layer_1_epochs = self.config.params_epochs_2,
                            layer_2_epochs = self.config.params_epochs_3,layer_3_epochs = self.config.params_epochs_4, 
                            layer_4_epochs = self.config.params_epochs_5, layer_1_lr =self.config.params_learning_rate_2,
                            layer_2_lr =self.config.params_learning_rate_3,layer_3_lr =self.config.params_learning_rate_4,
                            layer_4_lr =self.config.params_learning_rate_5,):
        # Classifier Learner
        blocks = (TextBlock.from_df('content', seq_len=self.dls_lm.seq_len, vocab=self.dls_lm.vocab), CategoryBlock())
        dls = DataBlock(
            blocks=blocks,
            get_x=ColReader('text'),
            get_y=ColReader('category'),
            splitter=RandomSplitter(0.2))
        self.dls_classifier = dls.dataloaders(self.df_trn, bs)
        self.learn_classifier = text_classifier_learner(self.dls_classifier, AWD_LSTM, metrics=[accuracy]).to_fp16()
        encoder_path_1 = os.path.join(self.config.trained_model_path, 'finetuned_llm')
        self.learn_classifier.load_encoder(encoder_path_1)
        self.learn_classifier.lr_find()
        self.learn_classifier.fit_one_cycle(layer_1_epochs, layer_1_lr)
        self.learn_classifier.freeze_to(-2)
        self.learn_classifier.fit_one_cycle(layer_2_epochs, slice(1e-3/(2.6**4),layer_2_lr))
        self.learn_classifier.freeze_to(-3)
        self.learn_classifier.fit_one_cycle(layer_3_epochs, slice(5e-3/(2.6**4),layer_3_lr))
        self.learn_classifier.unfreeze()
        self.learn_classifier.fit_one_cycle(layer_4_epochs, slice(1e-3/(2.6**4),layer_4_lr))
        encoder_path_2 = os.path.join(self.config.trained_model_path, 'SwahiliNewsclassifier')
        self.learn_classifier.save_encoder(encoder_path_2)


In [None]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = SwahiliNewsClassifier(config=training_config)
    training.preprocess_data()
    training.prepare_language_model()
    training.prepare_classifier_learner()
except Exception as e:
    raise e