In [1]:
import os
%pwd  # this tell us which path we are currently working , so based on the below output path we are working under the research file
os.chdir("C:\datascience End to End Projects\End-to-End-Heart-Disease-Application-main")  #  but i would like to work with main ProjectML_with_MLFlow file , so for getting i step back in path inorder to enter the main project file i used this command os.chdir("../")
%pwd

'C:\\datascience End to End Projects\\End-to-End-Heart-Disease-Application-main'

In [2]:
# Now iam creating the entity class which consist of config.yaml folder model trainer code part variables, along with that iam adding some more varaibles like alpha,l1_ratio,target_column inside my entity class
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str  # this target column is present inside the Schema.yaml file which it tells us the quality of the Wine based on the value it returns

In [3]:
from PROJECTML.constants import *
from PROJECTML.utils.common import read_yaml, create_directories

In [4]:
# this template we use for every stage like data_ingestion,data_validation,data_transformation, model trainer .. etc
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        #params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        #self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


# this is part of code for the Model trainerConfig which helps us to return the configuration
        
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer   # here iam reading the schema, params 
        #params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            target_column = schema.name # here from schema iam taking the name which i will return through target_column
            
        )

        return model_trainer_config # here iam returning all variables from the configuration

In [5]:
# these packages i need in order to create my Model Trainer components 
import pandas as pd
import matplotlib.pyplot as plt
import os
from PROJECTML import logger
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
#from sklearn.metrics import plot_roc_curve
import joblib # here iam saving the model because i want to save the data


# now here iam defining a class called model trainer inside it will take ModelTrainerConfig
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config



    # here iam creating a methode which it will traine the model by using train and test dataset
    def train(self):
        train_data = pd.read_csv(self.config.train_data_path) # here it is taking the paths of train and test dataset
        test_data = pd.read_csv(self.config.test_data_path)


        self.train_x = train_data.drop([self.config.target_column], axis=1)  # here iam dropping my target column in train_x
        self.test_x = test_data.drop([self.config.target_column], axis=1)  # here iam dropping my target column in test_X
        self.train_y = train_data[self.config.target_column]  # here iam keeping the target column in train_y
        self.test_y = test_data[self.config.target_column] # here iam keeping the target column in test_y


    def model(self):

        self.classifier_lr = LogisticRegression(random_state = 0, C=10, penalty= 'l2')
        self.classifier_lr.fit(self.train_x,self.train_y)
        prediction = self.classifier_lr.predict(self.test_x)
        cv = RepeatedStratifiedKFold(n_splits = 10,n_repeats = 3,random_state = 1)
        print("Accuracy : ",'{0:.2%}'.format(accuracy_score(self.test_y,prediction)))
        print("Cross Validation Score : ",'{0:.2%}'.format(cross_val_score(self.classifier_lr,self.train_x,self.train_y,cv = cv,scoring = 'roc_auc').mean()))
        print("ROC_AUC Score : ",'{0:.2%}'.format(roc_auc_score(self.test_y,prediction)))


        
        # lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42) # here i have created my Elastic model which it takes the alpha,l1_ratio, random state values 
        # lr.fit(train_x, train_y) # here i have initiated the model training

        joblib.dump(self.classifier_lr, os.path.join(self.config.root_dir, self.config.model_name)) # here are training my model iam just saving inside the folder Model_trainer which it will get create inside the artifacts

In [6]:
try:
    config = ConfigurationManager() # here iam initlizing my ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config() # here iam getting my get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config) # here iam  passing my  model_trainer_config to the ModelTrainer function
    model_trainer_config.train() # here iam training the model
    model_trainer_config.model()
except Exception as e:
    raise e

[2024-02-22 01:58:57,989: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-22 01:58:57,994: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-22 01:58:57,998: INFO: common: created directory at: artifacts]
[2024-02-22 01:58:58,001: INFO: common: created directory at: artifacts/model_trainer]


FileNotFoundError: [Errno 2] No such file or directory: 'artifacts\\feature_selection\\Modified_train.csv'