In [None]:
import os

In [None]:
%pwd

In [None]:
# Change the current working directory to the project root
os.chdir("../")

In [None]:
%pwd

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_data_file: Path
    local_data_file: Path

In [None]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_data_file=config.source_data_file,
            local_data_file=config.local_data_file
        )
        return data_ingestion_config

In [None]:
import os
import shutil
from defaultMlProj import logger
from defaultMlProj.utils.common import get_size


In [None]:
class DataIngestion:
    def __init__ (self, config: DataIngestionConfig):
        self.config = config

    def copy_data_file(self):

        source = Path(self.config.source_data_file)
        destination = Path(self.config.local_data_file)

        try:
            logger.info(f"Starting data ingestion:copying{source} to {destination}")

            destination.parent.mkdir(parents=True, exist_ok=True)

            if not source.exists():
                raise Exception(f"Source file {source.absolute()} does not exist")

            if destination.exists():
                logger.info(f"File destination {destination} already exists. Skipping copy.")
            else:
                shutil.copy(source, destination)
                logger.info(f"File copied successfully: {source} to {destination}")

        except Exception as e:
            logger.exception(f"Error occurred while copying data file: {e}")
            raise e

In [None]:
from pathlib import Path

# Define the expected path
source_path = Path("notebook/data/default.csv")

print("Current working directory:", Path(".").absolute())
print("Expected source path:", source_path.absolute())
print("Does file exist?", source_path.exists())

In [None]:
# Updating the pipeline item on the workflow list
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.copy_data_file()
except Exception as e:
    raise e

##### Stage two Data Validation

In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir("../")

In [None]:
%pwd

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv(r"artifacts/data_ingestion/default.csv", sep="\t")
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: str
    all_schema: dict

In [None]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.columns

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            STATUS_FILE=config.STATUS_FILE,
            
        )

        return data_validation_config

In [None]:
import os
from defaultMlProj import logger
from defaultMlProj.entity.config_entity import DataValidationConfig
import pandas as pd

class DataValidation:
    def __init__ (self, config: DataValidationConfig):
        self.config =config

    def validate_all_columns(self) -> bool:
        try:
            logger.info("Starting data validation: validating all columns")
            validation_status = None

            df = pd.read_csv(self.config.root_dir)
            all_cols = list(df.columns)

            all_schema = self.config.all_schema.keys()

            for col in all_cols:
                if col in all_schema:
                    validation_status = False
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"Validation status: {validation_status}\n")
                else:
                    validation_status = True
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"Validation status: {validation_status}\n")
            logger.info(f"Data validation completed with status: {validation_status}")

            return validation_status
        except Exception as e:
            logger.exception(f"Error occurred during data validation: {e}")
            raise e





In [None]:
# Pipeline creation
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(config=data_validation_config)
    data_validation.validate_all_columns()
except Exception as e:
    raise e

#### Note: Validation stage skipped in my defaultMlProj

##### Workflows
1. Update config.yaml
2. Update schema.yaml
3. Update params.yaml
4. Update entity
5. Update the configuration manager in src config
6. Update the components
7. Update the pipeline
8. Update the main.py
9. Update the app.py

#### Model transformation stage

In [None]:
import os
from pathlib import Path


@dataclass(frozen=True)
class DataTransformatonConfig:
    root_dir: Path
    data_path: Path


In [None]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformatonConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])
        
        data_transformation_config = DataTransformatonConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )

        return data_transformation_config

#### Model Transformation stage

In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir("../")

In [None]:
%pwd

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [None]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )

        return data_transformation_config

        

In [None]:
# components/data_transformation.py

import os
import pandas as pd
from pathlib import Path
from defaultMlProj import logger
from sklearn.model_selection import train_test_split


class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.target_column = "default_risk_score"  # actual target column name


    def train_test_split(self):
        df = pd.read_csv(self.config.data_path, sep='\t')

        try:
            logger.info("Starting data transformation: train-test split")
            logger.info(f"Full dataset shape: {df.shape}")

            # Validate target column exists
            if self.target_column not in df.columns:
                raise ValueError(f"Target column '{self.target_column}' not found in data. Columns: {list(df.columns)}")

            # Separate features and target
            X = df.drop(columns=[self.target_column])
            y = df[self.target_column]

            logger.info(f"Feature matrix X shape: {X.shape}")  # Should be (800, 9)
            logger.info(f"Target vector y shape: {y.shape}")   # Should be (800,)

            # Perform train-test split
            X_train, X_test, y_train, y_test = train_test_split(
                X, y,
                test_size=0.2,
                random_state=42
            )

            logger.info(f"Train features shape: {X_train.shape}, Train target shape: {y_train.shape}")
            logger.info(f"Test features shape: {X_test.shape}, Test target shape: {y_test.shape}")

            # Recombine for saving (optional: keeps target in dataset)
            train_df = pd.DataFrame(X_train, columns=X.columns)
            train_df[self.target_column] = y_train.values

            test_df = pd.DataFrame(X_test, columns=X.columns)
            test_df[self.target_column] = y_test.values

            # Save to CSV
            train_csv_path = os.path.join(self.config.root_dir, "train.csv")
            test_csv_path = os.path.join(self.config.root_dir, "test.csv")

            train_df.to_csv(train_csv_path, index=False)
            test_df.to_csv(test_csv_path, index=False)

            logger.info(f"Train dataset saved to {train_csv_path}")
            logger.info(f"Test dataset saved to {test_csv_path}")

            return train_df, test_df

        except Exception as e:
            logger.exception(f"Error occurred during train-test split: {e}")
            raise e

In [None]:
# Pipeline creation
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.train_test_split()
except Exception as e:
    raise e

#### Model Trainer Stage

In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir("../")

In [None]:
%pwd

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str

In [None]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
        )

        return model_trainer_config
    
    def get_params(self):
        """
        Returns the parameters loaded from params.yaml
        """
        return self.params

In [None]:
import os
import pandas as pd
import numpy as np
import joblib
import xgboost
from defaultMlProj import logger
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import StackingRegressor    
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig, params):
        self.config = config
        self.params = params
        self.target_column = params.target_column

    def create_model(self):
        # from sklearn.pipeline import Pipeline
        # from sklearn.preprocessing import StandardScaler
        # from sklearn.linear_model import LinearRegression
        # from sklearn.neighbors import KNeighborsRegressor
        # from sklearn.tree import DecisionTreeRegressor
        # from sklearn.ensemble import RandomForestRegressor, StackingRegressor
        try:
            logger.info("Started creating models")
            # Extract params
            p = self.params.model_params

            models = {}

            # Linear Regression
            models['LinearRegression'] = Pipeline([
                ('scaler', StandardScaler()),
                ('regressor', LinearRegression(
                    fit_intercept=p.linear_regression.fit_intercept
                ))
            ])

            # KNN
            models['KNN'] = Pipeline([
                ('scaler', StandardScaler()),
                ('regressor', KNeighborsRegressor(
                    n_neighbors=p.knn.n_neighbors,
                    weights=p.knn.weights,
                    algorithm=p.knn.algorithm
                ))
            ])

            # Decision Tree
            models['DecisionTree'] = DecisionTreeRegressor(
                criterion=p.decision_tree.criterion,
                max_depth=p.decision_tree.max_depth,
                min_samples_split=p.decision_tree.min_samples_split,
                min_samples_leaf=p.decision_tree.min_samples_leaf,
                random_state=p.decision_tree.random_state
            )
            
            # Random Forest
            models['RandomForest'] = RandomForestRegressor(
                n_estimators=p.random_forest.n_estimators,
                criterion=p.random_forest.criterion,
                max_depth=p.random_forest.max_depth,
                min_samples_split=p.random_forest.min_samples_split,
                min_samples_leaf=p.random_forest.min_samples_leaf,
                random_state=p.random_forest.random_state
            )

            # Stacking Regressor
            base_estimators = list(models.items())

            final_estimator = LinearRegression(
                fit_intercept=p.linear_regression.fit_intercept
            )

            stacking = StackingRegressor(
                estimators=base_estimators,
                final_estimator=final_estimator,
                cv=p.stacking_regressor.cv,
                n_jobs=p.stacking_regressor.n_jobs
            )

            models['Stacking Regressor'] = stacking
            logger.info(f"Models created: {list(models.keys())}")
            return models
        
        except Exception as e:
            logger.exception(f"Error occurred while creating models: {e}")
            raise e
        
    def train_and_evaluate(self):
        logger.info("Starting model training with external parameters")
        try:
            # Load data
            train_df = pd.read_csv(self.config.train_data_path, sep=',')
            test_df = pd.read_csv(self.config.test_data_path, sep=',')
            
            logger.info(f"Train data shape: {train_df.shape}, Test data shape: {test_df.shape}")

            X_train = train_df.drop(columns=[self.target_column])
            y_train = train_df[self.target_column]
            X_test = test_df.drop(columns=[self.target_column])
            y_test = test_df[self.target_column]

            logger.info(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
            logger.info(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

            # Create models using params
            models = self.create_model()

            # Get CV settings from params
            cv_params = self.params.cv_settings
            cv = KFold(
                n_splits=cv_params.n_splits,
                shuffle=cv_params.shuffle,
                random_state=cv_params.random_state
            )
            results = {}

            for name, model in models.items():
                try:
                    scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='r2')
                    results[name] = scores
                    logger.info(f"{name} R2 = {scores.mean():.4f} (+/- {scores.std() * 2:.4f})")
                except Exception as e:
                    logger.exception(f"Failed to evaluate {name}: {e}")
                    raise e
                
            # The best model
            best_name = max(results, key=lambda k: results[k].mean())
            best_model = models[best_name].fit(X_train, y_train)

            # Final evaluation
            y_pred = best_model.predict(X_test)
            test_r2 = r2_score(y_test, y_pred)
            test_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

            logger.info(f"Best model: {best_name} | Test R2 : {test_r2:.4f}, RMSE : {test_rmse:.4f}")

            # Save model
            Path(self.config.model_name).parent.mkdir(parents=True, exist_ok=True)
            joblib.dump(best_model, self.config.model_name)
            logger.info(f"Model saved to {self.config.model_name}")

            return best_model, test_r2, test_rmse
        except Exception as e:
            logger.exception(f"Error occurred during model training and evaluation: {e}")
            raise e

In [None]:
# Pipeline creation
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    params = config.get_params()
    model_trainer = ModelTrainer(config=model_trainer_config, params=params)
    model_trainer.train_and_evaluate()
except Exception as e:
    logger.info(f"Error in model training pipeline: {e}")
    raise e

In [None]:
train_df = pd.read_csv(model_trainer_config.train_data_path, sep='\t')
print("Columns in train_df:", train_df.columns.tolist())

#### Model Evaluation Stage

In [1]:
import os

In [2]:
%pwd

'd:\\End-to-end-Default-Risk-Pred-mlProject\\notebook'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\End-to-end-Default-Risk-Pred-mlProject'

In [13]:
# Configure DagsHub for MLflow tracking
import dagshub
import mlflow

dagshub.init(repo_owner='klan86at', repo_name='Default-risk-prediction', mlflow=True)

# Now MLflow is connected to DagsHub
mlflow.set_tracking_uri("https://dagshub.com/klan86at/Default-risk-prediction.mlflow")
mlflow.set_experiment("DefaultRiskPrediction")

[2025-07-25 15:26:53,581: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-07-25 15:26:53,590: INFO: helpers: Accessing as klan86at]
[2025-07-25 15:26:54,444: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/klan86at/Default-risk-prediction "HTTP/1.1 200 OK"]
[2025-07-25 15:26:55,325: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-07-25 15:26:55,335: INFO: helpers: Initialized MLflow to track repo "klan86at/Default-risk-prediction"]


[2025-07-25 15:26:55,339: INFO: helpers: Repository klan86at/Default-risk-prediction initialized!]


2025/07/25 15:26:55 INFO mlflow.tracking.fluent: Experiment with name 'DefaultRiskPrediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/73d4320aaa634e8498ebadec4f5a8af9', creation_time=1753446414716, experiment_id='0', last_update_time=1753446414716, lifecycle_stage='active', name='DefaultRiskPrediction', tags={}>

In [14]:
from pathlib import Path
from dataclasses import dataclass

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    metric_file_name: str
    mlflow_uri: str
    experiment_name: str

In [15]:
from defaultMlProj.constants.constant import *
from defaultMlProj.utils.common import read_yaml, create_directories, save_json

In [16]:
# ModelEvaluation Configuration Manager

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            metric_file_name=config.metric_file_name,
            mlflow_uri=config.mlflow_uri,
            experiment_name=config.experiment_name
        )
        
        return model_evaluation_config
    
    def get_params(self):
        """
        Returns the parameters loaded from params.yaml
        """
        return self.params


In [20]:
# Model evaluation component
import os
import joblib
import json
import pandas as pd
import mlflow
import mlflow.sklearn
from defaultMlProj import logger
from pathlib import Path
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from defaultMlProj.entity.config_entity import ModelEvaluationConfig
from defaultMlProj.utils.common import read_yaml, create_directories

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig, params):
        self.config = config
        self.params = params
        self.target_column = params.target_column

    def evaluate_model(self):
        try:
            logger.info("Starting model evaluation with Mlflow logging")

            os.makedirs(self.config.root_dir, exist_ok=True)

            # Load test data
            test_df = pd.read_csv(self.config.test_data_path, sep=',')
            X_test = test_df.drop(columns=[self.target_column])
            y_test = test_df[self.target_column]

            logger.info(f"Test data shape: {test_df.shape}")

            # Load trained model
            model = joblib.load(self.config.model_path)
            logger.info(f"Model loaded from {self.config.model_path}")

            # Make predictions
            y_pred = model.predict(X_test)

            # Metrics
            r2 = r2_score(y_test, y_pred)
            rmse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)

            # Save metrics to JSON
            metrics = {
                "r2_score": r2,
                "rmse": rmse,
                "mae": mae
            }

            with open(self.config.metric_file_name, 'w') as f:
                json.dump(metrics, f, indent=4)

            logger.info(f"Metrics saved to {self.config.metric_file_name}")

            # Set up MLflow
            mlflow.set_tracking_uri(self.config.mlflow_uri)
            mlflow.set_experiment(self.config.experiment_name)

            with mlflow.start_run():
                # Log the parameters
                self.log_params_flattened(self.params.model_params)
                mlflow.log_param("target_column", self.target_column)
                mlflow.log_param("cv_splits", self.params.cv_settings.n_splits)
                
                # Log the metrics
                mlflow.log_metric("r2_score", r2)
                mlflow.log_metric("rmse", rmse)
                mlflow.log_metric("mae", mae)

                # Saving model
                model_temp_path = Path(self.config.model_path)
                model_temp_path.parent.mkdir(parents=True, exist_ok=True)

                joblib.dump(model, model_temp_path)
                mlflow.log_artifact(model_temp_path, "model")
                logger.info(f"Model logged to MLflow as artifact: {model_temp_path}")
                

                # Log artifacts
                mlflow.log_artifact(self.config.metric_file_name)
                logger.info(f"Model and metrics logged to mlflow under experiment '{self.config.experiment_name}'")

            return metrics
        except Exception as e:
            logger.exception(f"Error occurred during model evaluation: {e}")
            raise e
        
    def log_params_flattened(self, params, parent_key=''):
        """ Recursively log parameters to MLflow, flattening nested dictionaries.
        """
        for key, value in params.items():
            new_key = f"{parent_key}.{key}" if parent_key else key
            if isinstance(value, dict):
                    self.log_params_flattened(value, new_key)
            else:
                mlflow.log_param(new_key, value)



In [21]:
# Model evaluation  pipeline
try:
    config = ConfigurationManager()
    model_eval_config = config.get_model_evaluation_config()
    params = config.get_params()
    model_eval = ModelEvaluation(config=model_eval_config, params=params)
    metrics = model_eval.evaluate_model()
    logger.info(f"Model evaluation completed with metrics: {metrics}")
except Exception as e:
    logger.info(f"Error in model evaluation pipeline: {e}")
    raise e

[2025-07-25 15:47:19,010: INFO: common: YAML file config\config.yaml loaded successfully.]
[2025-07-25 15:47:19,016: INFO: common: YAML file params.yaml loaded successfully.]
[2025-07-25 15:47:19,018: INFO: common: Created directory: artifacts]
[2025-07-25 15:47:19,021: INFO: common: Created directory: artifacts/model_evaluation]
[2025-07-25 15:47:19,022: INFO: 2561630473: Starting model evaluation with Mlflow logging]
[2025-07-25 15:47:19,029: INFO: 2561630473: Test data shape: (160, 10)]
[2025-07-25 15:47:19,033: INFO: 2561630473: Model loaded from artifacts/model_trainer/model.joblib]
[2025-07-25 15:47:19,040: INFO: 2561630473: Metrics saved to artifacts/model_evaluation/metrics.json]
[2025-07-25 15:47:51,370: INFO: 2561630473: Model logged to MLflow as artifact: artifacts\model_trainer\model.joblib]
[2025-07-25 15:47:52,494: INFO: 2561630473: Model and metrics logged to mlflow under experiment 'DefaultRiskPrediction']
🏃 View run unleashed-lark-34 at: https://dagshub.com/klan86at/De

In [None]:
# pip install dagshub mlflow

# import dagshub
# dagshub.init(repo_owner='klan86at',
#              repo_name='Default-risk-prediction',
#              mlflow=True)

# import mlflow
# with mlflow.start_run():
#   mlflow.log_param('parameter name', 'value')
#   mlflow.log_metric('metric name', 1)