In [1]:
from dataclasses import dataclass
from pathlib import Path

In [2]:
import os
os.chdir("E:/Documents/d_s/full_stack/ML_project")

In [3]:
os.getcwd()

'E:\\Documents\\d_s\\full_stack\\ML_project'

In [4]:
from xmlrpc.client import Boolean


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_data_path: Path
    source_file: str
    raw_dir: str
    ingested_train_dir: Path
    ingested_test_dir: Path

@dataclass(frozen=True)
class DataIngestionArtifact:
    train_file_path: Path
    test_file_path: Path
    is_ingested: bool
    message: str

In [5]:
from Insurance.constants import *
from Insurance.utils import read_yaml, create_directories

In [6]:


class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        model_filepath = MODEL_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH,
        time_stamp=CURRENT_TIME_STAMP):
        self.config = read_yaml(config_filepath)
        self.model = read_yaml(model_filepath)
        self.schema=read_yaml(schema_filepath)
        self.time_stamp=time_stamp
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        data_ingestion_artifact_dir=os.path.join(config.root_dir,self.time_stamp)

        create_directories([data_ingestion_artifact_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=data_ingestion_artifact_dir,
            source_data_path=config.source_data_path,
            source_file= config.source_file,
            raw_dir=os.path.join(data_ingestion_artifact_dir,config.raw_dir),
            ingested_train_dir=os.path.join(data_ingestion_artifact_dir,config.ingested_train_dir),
            ingested_test_dir=os.path.join(data_ingestion_artifact_dir,config.ingested_test_dir)
        )

        return data_ingestion_config

In [7]:
from distutils.command.config import config
import os,shutil, sys
from  Insurance.exception import InsuranceException
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from Insurance.logger import logging


class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
        self.raw_data_file=os.path.join(self.config.raw_dir,self.config.source_file)

    def get_insurance_data(self)  :
        try:
            source_file_path=os.path.join(self.config.source_data_path,self.config.source_file)

            os.makedirs(os.path.dirname(self.raw_data_file), exist_ok= True)
            shutil.copy(source_file_path,self.raw_data_file)
            

        except Exception as e:
            raise InsuranceException(e,sys) from e    
        

    def test_train_spilt(self) -> DataIngestionArtifact:
        try:
            df=pd.read_csv(self.raw_data_file)
            df["exp_cat"] = pd.cut(
                    df["expenses"],
                    bins=[0.0, 1500.0, 3000.0, 10000.0, 60000.0, np.inf],
                    labels=[1,2,3,4,5]
                )

            logging.info(f"Splitting data into train and test")
            strat_train_set = None
            strat_test_set = None    

            split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)  

            for train_index,test_index in split.split(df, df["exp_cat"]):
                strat_train_set = df.loc[train_index].drop(["exp_cat"],axis=1)
                strat_test_set = df.loc[test_index].drop(["exp_cat"],axis=1)  

            train_file_path = os.path.join(self.config.ingested_train_dir,
                                                self.config.source_file)

            test_file_path = os.path.join(self.config.ingested_test_dir,
                                            self.config.source_file)

            if strat_train_set is not None:
                    os.makedirs(self.config.ingested_train_dir,exist_ok=True)
                    logging.info(f"Exporting training datset to file: [{train_file_path}]")
                    strat_train_set.to_csv(train_file_path,index=False)

            if strat_test_set is not None:
                    os.makedirs(self.config.ingested_test_dir, exist_ok= True)
                    logging.info(f"Exporting test dataset to file: [{test_file_path}]")
                    strat_test_set.to_csv(test_file_path,index=False) 

            data_injestion_artifact=DataIngestionArtifact(
                            train_file_path= train_file_path,
                            test_file_path= test_file_path,
                            is_ingested= True,
                            message=f"Data Injestion stage is completed."
                            
                )                                      
            logging.info(f"Data Ingestion artifact:[{data_injestion_artifact}]")

            return data_injestion_artifact

        except Exception as e:
            raise InsuranceException(e,sys) from e  

    def initiate_data_ingestion(self)-> DataIngestionArtifact:
        try:
            self.get_insurance_data()
            return self.test_train_spilt()
        except Exception as e:
            raise InsuranceException(e,sys) from e 

    def __del__(self):
        logging.info(f"{'>>'*20}Data Ingestion log completed.{'<<'*20} \n\n")                  
            


In [8]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_injestion_artifact=data_ingestion.initiate_data_ingestion()
    
except Exception as e:
    raise InsuranceException(e,sys) from e

In [9]:
data_injestion_artifact

DataIngestionArtifact(train_file_path='artifacts/data_ingestion\\2022-10-09-15-39-44\\train\\insurance.csv', test_file_path='artifacts/data_ingestion\\2022-10-09-15-39-44\\test\\insurance.csv', is_ingested=True, message='Data Injestion stage is completed.')

In [10]:
from Insurance.entity.config_entity import DataIngestionConfig

ModuleNotFoundError: No module named 'Insurance.entity.config_entity'