In [12]:
import os
%pwd  # this tell us which path we are currently working , so based on the below output path we are working under the research file
os.chdir("C:\datascience End to End Projects\End-to-End-Heart-Disease-Application-")  #  but i would like to work with main ProjectML_with_MLFlow file , so for getting i step back in path inorder to enter the main project file i used this command os.chdir("../")
%pwd

'C:\\datascience End to End Projects\\End-to-End-Heart-Disease-Application-'

In [13]:
# Now iam creating the entity class which consist of config.yaml folder model trainer code part variables, along with that iam adding some more varaibles like alpha,l1_ratio,target_column inside my entity class
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str  # this target column is present inside the Schema.yaml file which it tells us the quality of the Wine based on the value it returns
    

In [14]:
from PROJECTML.constants import *
from PROJECTML.utils.common import read_yaml, create_directories

In [15]:
# this template we use for every stage like data_ingestion,data_validation,data_transformation, model trainer .. etc
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        #params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        #self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


# this is part of code for the Model trainerConfig which helps us to return the configuration
        
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer   # here iam reading the schema, params 
        #params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            target_column = schema.name # here from schema iam taking the name which i will return through target_column
            
        )

        return model_trainer_config # here iam returning all variables from the configuration

In [39]:
# these packages i need in order to create my Model Trainer components 
import pandas as pd
import matplotlib.pyplot as plt
import os
from PROJECTML import logger
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
#from sklearn.metrics import plot_roc_curve
import joblib # here iam saving the model because i want to save the data
from src.PROJECTML.config.configuration import ConfigurationManager
from src.PROJECTML.components.data_transformation import DataTransformation
import pickle

# now here iam defining a class called model trainer inside it will take ModelTrainerConfig
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config


    # here iam creating a methode which it will traine the model by using train and test dataset
    def train(self):
        self.train_data = pd.read_csv(self.config.train_data_path) # here it is taking the paths of train and test dataset
        self.test_data = pd.read_csv(self.config.test_data_path)

        self.x_train=self.train_data.drop(columns=['HeartDisease'])
        self.y_train=self.train_data['HeartDisease']
        self.x_test=self.test_data.drop(columns=['HeartDisease'])
        self.y_test=self.test_data['HeartDisease']
        
        


    def model(self):

        config = ConfigurationManager()
        data_transformation_config = config.get_data_transformation_config()
        data_transformation = DataTransformation(config=data_transformation_config)
        pipeline_object=data_transformation.pipeline_creation()
        print(f"got  the pipeline_object {pipeline_object}")

        pipe = pipeline_object
        pipe.fit(self.x_train, self.y_train)
        prediction = pipe.predict(self.x_test)
        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
        print("Accuracy : ", '{0:.2%}'.format(accuracy_score(self.y_test, prediction)))
        print("Cross Validation Score : ", '{0:.2%}'.format(cross_val_score(pipe, self.x_train, self.y_train, cv=cv, scoring='roc_auc').mean()))
        print("ROC_AUC Score : ", '{0:.2%}'.format(roc_auc_score(self.y_test, prediction)))

    



        
        # lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42) # here i have created my Elastic model which it takes the alpha,l1_ratio, random state values 
        # lr.fit(train_x, train_y) # here i have initiated the model training

        #pickle.dump(pipe,os.path.join(self.config.root_dir,self.config.model_name),open('model.pkl','wb')) # here are training my model iam just saving inside the folder Model_trainer which it will get create inside the artifacts


        with open(os.path.join(self.config.root_dir, self.config.model_name), 'wb') as model_pkl_file:
            pickle.dump(pipe, model_pkl_file)


In [41]:
try:
    config = ConfigurationManager() # here iam initlizing my ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config() # here iam getting my get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config) # here iam  passing my  model_trainer_config to the ModelTrainer function
    
    #data_transformation.pipeline_creation()
    model_trainer_config.train() # here iam training the model
    model_trainer_config.model()
except Exception as e:
    raise e

[2024-02-27 20:24:08,455: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-27 20:24:08,458: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-27 20:24:08,461: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-27 20:24:08,463: INFO: common: created directory at: artifacts]
[2024-02-27 20:24:08,466: INFO: common: created directory at: artifacts/model_trainer]
[2024-02-27 20:24:08,491: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-27 20:24:08,496: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-27 20:24:08,504: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-27 20:24:08,507: INFO: common: created directory at: artifacts]
[2024-02-27 20:24:08,510: INFO: common: created directory at: artifacts/data_transformation]
(734, 11)
(734,)
(184, 11)
(184,)
Index values of x_train: ['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS', 'RestingECG', 'MaxHR', '

[2024-02-27 20:24:08,529: INFO: data_transformation: <<<<<<<---------------------Started feature scalling Pipeline ------------------------>>>>>>>>>>>>>>>>]
ColumnTransformer(remainder='passthrough',
                  transformers=[('minmax_scale', MinMaxScaler(), [7]),
                                ('standard_scale', StandardScaler(),
                                 [0, 3, 5])])
[2024-02-27 20:24:08,535: INFO: data_transformation: <<<<<<<---------------------Started creating Model Pipeline ------------------------>>>>>>>>>>>>>>>>]
LogisticRegression(C=10, max_iter=2000, random_state=0)
[2024-02-27 20:24:08,537: INFO: data_transformation: <<<<<<<---------------------Started Combining all Pipeline into one pipeline------------------------>>>>>>>>>>>>>>>>]
[2024-02-27 20:24:08,538: INFO: data_transformation: <<<<<<<---------------------Making all Pipeline into one pipeline------------------------>>>>>>>>>>>>>>>>]
Pipeline(steps=[('columntransformer-1',
                 ColumnTransform