In [25]:
import os

In [26]:
%pwd

'c:\\Users\\RICH-FILES\\Desktop\\ml'

In [27]:
os.chdir("../")

In [28]:
%pwd

'c:\\Users\\RICH-FILES\\Desktop'

In [29]:
proj_link = 'c:\\Users\\RICH-FILES\\Desktop\\ml\\client-subscription-prediction'
os.chdir(proj_link)


In [30]:
# create root entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    preprocessor_name: str
    target_column: str
    class_weight: str
    max_iter: int
    penalty: int
    C: float
    solver: str
    random_state: int
    

In [31]:
#create configuration manager 
from clientClassifier.constants import *
from clientClassifier.utils.common import read_yaml, create_directories

In [32]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        params_file_path = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
            
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
        
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.Logistic_Regression
        schema = self.schema.TARGET_COLUMN
        
        
        create_directories([config["root_dir"]])
        
        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            preprocessor_name=config.preprocessor_name,
            target_column=schema.name,
            class_weight=params.class_weight,
            max_iter=params["max_iter"],
            penalty=params["penalty"],
            C=params["C"],
            solver=params["solver"],
            random_state=params["random_state"]
        )
        
        return model_trainer_config

In [None]:
#define model trainer components
import os
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
from clientClassifier import logger
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from pathlib import Path




In [34]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        
    def train(self):
        logger.info("Training model")
        
        # load train and test data
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)
        
        # separate features and target variable
        X_train = train_data.drop(columns=[self.config.target_column], axis=1)
        y_train = train_data[self.config.target_column]
        
        X_test = test_data.drop(columns=[self.config.target_column], axis=1)
        y_test = test_data[self.config.target_column]
        
        # create preprocessing pipeline for numerical and categorical features
        numerical_features = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()       
        categorical_features = X_train.select_dtypes(include=['object']).columns.tolist()   

        numerical_transformer = StandardScaler()
        categorical_transformer = OneHotEncoder(handle_unknown='ignore')                
        
        preprocessor = ColumnTransformer(   
            transformers=[
                ('num', numerical_transformer, numerical_features),
                ('cat', categorical_transformer, categorical_features)
            ]
        )   
        
        # create a pipeline with preprocessing and model
        pipeline = Pipeline([   
            ('preprocessor', preprocessor),
            ('classifier', LogisticRegression(
                class_weight=self.config.class_weight,
                max_iter=self.config.max_iter,
                penalty=self.config.penalty,
                C=self.config.C,
                solver=self.config.solver,
                random_state=self.config.random_state
            ))
        ])      
        
        # fit the pipeline on the training data
        pipeline.fit(X_train, y_train)      
        
        # make predictions on the test data 
        y_pred = pipeline.predict(X_test)      
         
        # evaluate the model
        report = classification_report(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)   
        accuracy = accuracy_score(y_test, y_pred)   

        logger.info(f"Classification Report:\n{report}")
        logger.info(f"Confusion Matrix:\n{cm}") 
        logger.info(f"Accuracy: {accuracy}")   
         
        # save the model    
        
        
        model = pipeline.named_steps['classifier']
        preprocessor = pipeline.named_steps['preprocessor'] 
        model_path = os.path.join(self.config.root_dir, self.config.model_name)                     
        preprocessor_path = os.path.join(self.config.root_dir, self.config.preprocessor_name)
        joblib.dump(preprocessor, preprocessor_path)    
        joblib.dump(model, model_path)  
        
        logger.info(f"Model saved at: {model_path}")
        logger.info(f"Preprocessor saved at: {preprocessor_path}")      
        
        # save the model
        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_name))

In [35]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(model_trainer_config)
    model_trainer.train()
except Exception as e:
    logger.exception(e)
    raise e 



[2025-04-12 12:47:29,457: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-12 12:47:29,460: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-12 12:47:29,466: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-04-12 12:47:29,468: INFO: common: created directory at: artifacts]
[2025-04-12 12:47:29,470: INFO: common: created directory at: artifacts/model_trainer]
[2025-04-12 12:47:29,471: INFO: 385929610: Training model]
[2025-04-12 12:47:29,585: INFO: 385929610: Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.74      0.82       807
           1       0.20      0.54      0.29        98

    accuracy                           0.72       905
   macro avg       0.56      0.64      0.56       905
weighted avg       0.85      0.72      0.76       905
]
[2025-04-12 12:47:29,586: INFO: 385929610: Confusion Matrix:
[[595 212]
 [ 45  53]]]
[2025-04-12 12:47:29,587: INFO: 385929610: 