In [4]:
import os
import sys
from src.exception import CustomException
from src.logger import logging
import pandas as pd 
from sklearn.model_selection import train_test_split
from dataclasses import dataclass

In [5]:
@dataclass
class DataIngestionConfig:
    train_data_path:str = os.path.join("artifacts",'train.csv')
    test_data_pat:str = os.path.join('artifacts','test.csv')
    raw_data_path:str = os.path.join('artifacts','data.csv')


In [8]:
ingestion_config = DataIngestionConfig()

In [9]:
ingestion_config.raw_data_path

'artifacts\\data.csv'

In [11]:
os.makedirs(os.path.dirname(ingestion_config.train_data_path),exist_ok = True)

In [14]:
df = pd.read_csv(r'D:\test_project\ml-project\notebook\data\stud.csv')
df.to_csv(ingestion_config.raw_data_path,index = False, header= True)
#df.head(2)

In [6]:
class DataIngestion:
    def __init__(self):
        self.ingestion_config = DataIngestionConfig()

    def initiate_data_ingestion(self):
        logging.info("Entered the data ingestion method or component")
        try:
            df = pd.read_csv(r'D:\test_project\ml-project\notebook\data\stud.csv')
            logging.info("read the dataset as dataframe")
            os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True)
            df.to_csv(self.ingestion_config.raw_data_path,index = False, header= True)
            logging.info("Train test split initiated")
            train_set,test_set =  train_test_split(df,test_size = 0.2,random_state = 42)
            train_set.to_csv(self.ingestion_config.train_data_path,index = False,header = True)
            test_set.to_csv(self.ingestion_config.test_data_path,index = False,header = True)
            logging.info("Ingestion of the data is completed") 


            return (
                self.ingestion_config.train_data_path,
                self.ingestion_config.test_data_path,
                self.ingestion_config.raw_data_path,
                
            )
 


        except Exception as e:
            raise CustomException(e,sys)