In [1]:
import os
os.chdir("../")
%pwd

'e:\\parth\\DonorsChoose-Application-Screening-app'

In [2]:
# model_training:
#   root_dir: artifacts/model_training
#   local_train_file: artifacts/data_transform/train.npz

# PENALITY:l2
# C:0.1
# MAX_ITER:2000
# SOLVER:sag
# CLASS_WEIGHT:None

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    local_train_file: Path
    penalty:str
    C:float
    max_iter:int
    solver:str

In [7]:
from donorschoose.constants import *
import os
from donorschoose.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
        ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        params = self.params
        create_directories([config.root_dir])
        model_training_config = ModelTrainingConfig(
            root_dir=config.root_dir,
            local_train_file = config.local_train_file,
            penalty=params.PENALTY,
            C=self.params.C,
            max_iter=params.MAX_ITER,
            solver=params.SOLVER,

        )
        return model_training_config

# data_Config = ConfigurationManager()
# data_Config =data_Config.get_model_training_config()
# data_Config

In [8]:
import os
from donorschoose import logger
from pathlib import Path
from donorschoose.utils.common import read_csv
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.linear_model import LogisticRegression
import joblib

class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config
        self.X_train=None
        self.y_train = None
    def read_files(self):
        '''
        Fetches data from the specified URLs and returns DataFrames.
        '''
        try: 
            root_dir = self.config.root_dir
            os.makedirs(root_dir, exist_ok=True)         
            train_file_path = Path(self.config.local_train_file)          
            train_data = np.load(train_file_path)
            sparse_matrix = coo_matrix((train_data['data'], (train_data['row'], train_data['col'])), shape=train_data['shape'])

            sparse_matrix = sparse_matrix.tocsr()
 
            self.X_train = sparse_matrix[:, :-1].toarray() 
            self.y_train = sparse_matrix[:, -1].toarray().flatten()   

            logger.info(f"Shape of X_train: {self.X_train.shape}")
            logger.info(f"Shape of y_train: {self.y_train.shape}")

        except Exception as e:
            raise e

    def train_log_reg(self):
        logreg = LogisticRegression(
                penalty=self.config.penalty,
                C=self.config.C,
                max_iter=self.config.max_iter,
                solver=self.config.solver
                )
        logreg.fit(self.X_train, self.y_train)
        model_filename = Path(self.config.root_dir,"logistic_regression_model.pkl")
        joblib.dump(logreg, model_filename)
        logger.info(f"model saved succesfully {model_filename}")
        

In [9]:
data_Config = ConfigurationManager()
Model_training = ModelTraining(data_Config.get_model_training_config())
Model_training.read_files()
Model_training.train_log_reg()

[2024-02-18 09:52:29,974: INFO: common: YAML file 'config\config.yaml' loaded successfully]
[2024-02-18 09:52:29,978: INFO: common: YAML file 'params.yaml' loaded successfully]
[2024-02-18 09:52:29,981: INFO: common: created directory at: artifacts]
[2024-02-18 09:52:29,984: INFO: common: created directory at: artifacts/model_training]
[2024-02-18 09:52:30,001: INFO: 1308082293: Shape of X_train: (700, 1181)]
[2024-02-18 09:52:30,003: INFO: 1308082293: Shape of y_train: (700,)]
[2024-02-18 09:52:44,739: INFO: 1308082293: model saved succesfully artifacts\model_training\logistic_regression_model.pkl]
