In [1]:
import os

In [2]:
%pwd

'c:\\Users\\kk061\\OneDrive\\Desktop\\python\\Mlflow Main\\Heart Stroke Prediction\\notebook'

In [18]:
import pandas as pd
df=pd.read_csv('artifacts/data_transformation/train.csv')
df.isnull().sum()

cat__gender_Female                       0
cat__gender_Male                         0
cat__gender_Other                        0
cat__ever_married_No                     0
cat__ever_married_Yes                    0
cat__work_type_Govt_job                  0
cat__work_type_Never_worked              0
cat__work_type_Private                   0
cat__work_type_Self-employed             0
cat__work_type_children                  0
cat__Residence_type_Rural                0
cat__Residence_type_Urban                0
cat__smoking_status_Unknown              0
cat__smoking_status_formerly smoked      0
cat__smoking_status_never smoked         0
cat__smoking_status_smokes               0
remainder__age                           0
remainder__hypertension                  0
remainder__heart_disease                 0
remainder__avg_glucose_level             0
remainder__bmi                           0
stroke                                 964
dtype: int64

In [3]:
os.chdir('../')
%pwd

'c:\\Users\\kk061\\OneDrive\\Desktop\\python\\Mlflow Main\\Heart Stroke Prediction'

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str

In [8]:
from src.heartstrokeprediction.constants import *
from src.heartstrokeprediction.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.LogisticRegression
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            target_column = schema.name
            
        )

        return model_trainer_config

In [21]:
import pandas as pd
import os
from src.heartstrokeprediction import logger
from sklearn.linear_model import LogisticRegression
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report


In [22]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        # Load the training and testing data
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        # Split features (X) and target (y) for both train and test datasets
        train_x = train_data.drop([self.config.target_column], axis=1)
        test_x = test_data.drop([self.config.target_column], axis=1)
        train_y = train_data[[self.config.target_column]]
        test_y = test_data[[self.config.target_column]]

        # Standard scaling (feature normalization) - fit the scaler on the training set
        scaler = StandardScaler()
        train_x_scaled = scaler.fit_transform(train_x)  # Fit and transform the training data
        test_x_scaled = scaler.transform(test_x)  # Only transform the test data

        # Initialize and train the Logistic Regression model
        lr = LogisticRegression(random_state=42)
        lr.fit(train_x_scaled, train_y)

        # Evaluate the model
        train_predictions = lr.predict(train_x_scaled)
        test_predictions = lr.predict(test_x_scaled)

        # Print classification report for training and testing sets
        print("Training Classification Report:")
        print(classification_report(train_y, train_predictions))
        
        print("Testing Classification Report:")
        print(classification_report(test_y, test_predictions))

        # Save the trained model
        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))
        joblib.dump(scaler, os.path.join(self.config.root_dir, "scaler.pkl"))  # Save the scaler

        print(f"Model saved to {os.path.join(self.config.root_dir, self.config.model_name)}")
        print(f"Scaler saved to {os.path.join(self.config.root_dir, 'scaler.pkl')}")


In [23]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2025-04-09 02:55:28,107: INFO: common: YAML file: config\config.yaml loaded successfully]
[2025-04-09 02:55:28,109: INFO: common: YAML file: params.yaml loaded successfully]
[2025-04-09 02:55:28,112: INFO: common: YAML file: schema.yaml loaded successfully]
[2025-04-09 02:55:28,115: INFO: common: created directory at: artifacts]
[2025-04-09 02:55:28,117: INFO: common: created directory at: artifacts/model_trainer]
Training Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98      3417
           1       0.00      0.00      0.00       160

    accuracy                           0.96      3577
   macro avg       0.48      0.50      0.49      3577
weighted avg       0.91      0.96      0.93      3577

Testing Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97      1444
           1       0.00      0.00      0.00        89

    accuracy               

  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
