In [1]:
import os

In [2]:
%pwd

'/Users/ravina/Desktop/CustomerChurnPrediction/research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'/Users/ravina/Desktop/CustomerChurnPrediction'

In [5]:
from dataclasses import dataclass 
from pathlib import Path

@dataclass(frozen=True)  #this is not python class but dataclass, here you can define the veriables without using self keyword.
class ModelTrainerConfig:
    root_dir:Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    n_estimators: int
    min_samples_split: int
    max_depth: int
    criterion: str
    random_state: int
    target_column: str
    #n_estimators=100, random_state=42
    

In [6]:
from CustomerChurnPrediction.constants import *
from CustomerChurnPrediction.utils.common import read_yaml,create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self)-> ModelTrainerConfig:
        config=self.config.model_trainer
        params=self.params.RandomForestClassifier
        #params=self.params.XGBClassifier
        schema=self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            min_samples_split=params.min_samples_split,
            max_depth=params.max_depth,
            criterion=params.criterion,
            random_state=params.random_state,
            target_column=schema.name
    
        )

        return model_trainer_config
    #'min_samples_split', 'max_depth', and 'criterion'

In [8]:
import pandas as pd
import os
from CustomerChurnPrediction import logger
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.utils.validation import check_X_y
from sklearn.utils.multiclass import type_of_target
import joblib #we can also use pickle to save the model here, but joblib is better than pickle.

In [9]:
class ModelTrainer:
    def __init__(self,config: ModelTrainerConfig):
        self.config=config
    
    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)

        train_x=train_data.drop([self.config.target_column],axis=1)
        test_x=test_data.drop([self.config.target_column],axis=1)
        train_y=train_data[[self.config.target_column]]
        test_y=test_data[[self.config.target_column]]
        print("shape of train_x is:",train_x)

        train_x, train_y = check_X_y(train_x, train_y.values.ravel(), multi_output=True)
        test_x, test_y = check_X_y(test_x, test_y.values.ravel(), multi_output=True)

        # Check the type of target variable (classification or regression)
        target_type = type_of_target(train_y)
        if target_type not in ['binary', 'multiclass']:
            raise ValueError(f"Unsupported target variable type: {target_type}. Model supports binary or multiclass classification.")


        #lr=ElasticNet(alpha=self.config.alpha,l1_ratio=self.config.l1_ratio,random_state=32)
        rfc=RandomForestClassifier(n_estimators=self.config.n_estimators,min_samples_split = self.config.min_samples_split,max_depth= self.config.max_depth, criterion = self.config.criterion,  random_state=self.config.random_state)
        rfc.fit(train_x,train_y)
        joblib.dump(rfc,os.path.join(self.config.root_dir,self.config.model_name))
        """
        xgb = XGBClassifier(
        subsample=0.7,
        reg_lambda=0.5,
        reg_alpha=0.3,
        n_estimators=self.config.n_estimators,
        min_child_weight=5,
        max_depth=6,
        learning_rate=0.1,
        gamma=0.5,
        colsample_bytree=0.1,
        random_state=self.config.random_state
            )
        xgb.fit(train_x,train_y)
        joblib.dump(xgb,os.path.join(self.config.root_dir,self.config.model_name))


        """
        



In [10]:
try: 
    config=ConfigurationManager()
    model_trainer_config=config.get_model_trainer_config()
    model_trainer_config=ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-04-21 19:49:46,598:INFO:yaml file:config/config.yaml loaded successfully]
[2024-04-21 19:49:46,601:INFO:yaml file:params.yaml loaded successfully]
[2024-04-21 19:49:46,604:INFO:yaml file:Schema.yaml loaded successfully]
[2024-04-21 19:49:46,604:INFO:created directory at:artifacts]
[2024-04-21 19:49:46,605:INFO:created directory at:artifacts/model_trainer]
shape of train_x is:        CreditScore       Age    Tenure   Balance  EstimatedSalary  Geography  \
0        -0.138179  0.260825 -0.890057  0.489133        -1.010560          1   
1        -0.812520 -0.282875 -0.350204  1.216528         0.487473          2   
2        -0.419342 -1.426110  1.032908 -1.225848         1.146645          0   
3        -1.441449  0.169917 -1.199920  0.678867         1.405501          0   
4         0.046263 -1.281586  1.724464 -1.225848        -0.842833          0   
...            ...       ...       ...       ...              ...        ...   
12734     0.444615 -0.130923 -1.041760  1.330258       