In [1]:
%pwd

'/Users/melihaltin/Documents/Development/data-science/Lung-Cancer/Lung-Cancer/research'

In [2]:
import os 

In [3]:
os.chdir('../')

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    train_data: Path

In [16]:
from Lung_Cancer.constants import *
from Lung_Cancer.utils.common import read_yaml , create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_training_config(self):
        self.config = self.config.model_training
        
        create_directories([self.config.root_dir])
        return ModelTrainingConfig(
            root_dir = Path(self.config.root_dir),
            train_data = Path(self.config.train_data),
        )
    
        

In [6]:
import pandas as pd 
from sklearn.ensemble import RandomForestClassifier
import joblib

In [14]:
class ModelTrainer:
    def __init__(self, config):
        self.config = config
        self.model_training_config = config.get_model_training_config()
        self.train_data = self.model_training_config.train_data

    def train(self):
        train_df = pd.read_csv(self.train_data)
     
        X_train, y_train = train_df.drop('LUNG_CANCER', axis=1), train_df['LUNG_CANCER']
        
        
        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        
        return model
    
    def save_model(self, model):
        model_path = self.model_training_config.root_dir / 'model.joblib'
        joblib.dump(model, model_path)
        
        return model_path
    
        
        

In [17]:

try:
    config_manager = ConfigurationManager()
    config = config_manager
    model_trainer = ModelTrainer(config)
    model = model_trainer.train()
    model_path = model_trainer.save_model(model)
    print('Model trained successfully and saved at ', model_path)
    
except Exception as e:
    print('Training failed')
    print(e)

[2024-04-22 15:41:50,992]- Lung_Cancer.logging - INFO - yaml file: config/config.yaml loaded successfully
[2024-04-22 15:41:50,994]- Lung_Cancer.logging - INFO - yaml file: params.yaml loaded successfully
[2024-04-22 15:41:50,996]- Lung_Cancer.logging - INFO - yaml file: schema.yaml loaded successfully
[2024-04-22 15:41:50,997]- Lung_Cancer.logging - INFO - created directory at: artifacts
[2024-04-22 15:41:50,998]- Lung_Cancer.logging - INFO - created directory at: artifacts/model_training
Model trained successfully and saved at  artifacts/model_training/model.joblib
