In [4]:
import pandas as pd
import numpy as np
import os
from sklearn.tree import DecisionTreeRegressor
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories
from config import *
from mlProject.constants import *
from dataclasses import dataclass
from mlProject.constants import *
from pathlib import Path
import joblib

In [2]:
%pwd

'c:\\Users\\S Kumar\\project\\end_to_end_project_ml\\research'

In [3]:
os.chdir('../')

'C:\\Users\\S Kumar\\project\\end_to_end_project_ml'

In [5]:
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    X_data_train_dir:Path
    Y_data_train_dir:Path
    X_data_test_dir:Path
    Y_data_test_dir:Path 
    model_name: str
    max_depth: int
    min_samples_leaf: int
    

    

In [6]:
class ConfiguarationManager:
    def __init__(self, config_file_path=CONFIG_FILE_PATH,param_file_path=PARAMS_FILE_PATH,schema_file_path=SCHEMA_FILE_PATH):
        self.config_file = read_yaml(config_file_path)
        self.param_file = read_yaml(param_file_path)
        self.schema_file= read_yaml(schema_file_path)

        create_directories([self.config_file.artifacts_root])
    def get_model_config(self)->ModelTrainingConfig:
        config=self.config_file.model_training 
        param=self.param_file.DecisionTreeRegressor
        model_config = ModelTrainingConfig(
            root_dir=config.root_dir,
            X_data_train_dir=config.X_data_train_dir,
            Y_data_train_dir=config.Y_data_train_dir,
            X_data_test_dir=config.X_data_test_dir,
            Y_data_test_dir=config.Y_data_test_dir,
            model_name=config.model_name,
            max_depth=param.max_depth,
            min_samples_leaf=param.min_samples_leaf
        )
        return model_config

In [8]:
class ModelTraning:
    def __init__(self, config: ModelTrainingConfig):
        create_directories([config.root_dir])
        self.config = config
        self.model = DecisionTreeRegressor(max_depth=self.config.max_depth, min_samples_leaf=self.config.min_samples_leaf)
        self.X_train = pd.read_csv(self.config.X_data_train_dir)
        self.Y_train = pd.read_csv(self.config.Y_data_train_dir)
        self.X_test = pd.read_csv(self.config.X_data_test_dir)
        self.Y_test = pd.read_csv(self.config.Y_data_test_dir)

    def train_model(self):
        self.model.fit(self.X_train, self.Y_train)
        joblib.dump(self.model, os.path.join(self.config.root_dir, self.config.model_name))

In [9]:
config = ConfiguarationManager()
model_trainer_config = config.get_model_config()
model_trainer_config = ModelTraning(config=model_trainer_config)
model_trainer_config.train_model()

[2025-04-25 15:11:45,617]: INFO:common:yaml file: config\config.yaml loaded successfully]
[2025-04-25 15:11:45,619]: INFO:common:yaml file: params.yaml loaded successfully]
[2025-04-25 15:11:45,622]: INFO:common:yaml file: schema.yaml loaded successfully]
[2025-04-25 15:11:45,624]: INFO:common:created directory at: artifact]
[2025-04-25 15:11:45,625]: INFO:common:created directory at: artifact/model_training]


In [10]:
import joblib

In [11]:
model=joblib.load(os.path.join(model_trainer_config.config.root_dir, model_trainer_config.config.model_name))

In [12]:
y_pred=model.predict(model_trainer_config.X_test)

In [13]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [14]:
print("Mean Squared Error:", mean_squared_error(model_trainer_config.Y_test, y_pred))

Mean Squared Error: 1.5537756173975965


In [15]:
r2_score(model_trainer_config.Y_test, y_pred)

0.3982649962441974

In [18]:
x_test = pd.read_csv(model_trainer_config.config.X_data_test_dir)
y_test = pd.read_csv(model_trainer_config.config.Y_data_test_dir)
x_train = pd.read_csv(model_trainer_config.config.X_data_train_dir)
y_train = pd.read_csv(model_trainer_config.config.Y_data_train_dir)

In [19]:
from sklearn.preprocessing import StandardScaler

In [20]:
scaler=StandardScaler()

In [21]:
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)  

In [22]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(max_depth=model_trainer_config.config.max_depth, min_samples_leaf=model_trainer_config.config.min_samples_leaf)

In [23]:
model.fit(x_train_scaled, y_train)
y_pred = model.predict(x_test_scaled)   

In [24]:
r2_score(model_trainer_config.Y_test, y_pred)

0.3982649962441974