In [1]:
from src.NEFT50.Utils import Create_Directory,read_yaml_file
from src.NEFT50.loggers import logger
from src.NEFT50.Exception import CustomException
from src.NEFT50.Constants import CONFIG_FILEPATH,PARAM_FILEPATH
import os,sys

In [2]:
from pathlib import Path
from dataclasses import dataclass
#step3)update the entity file --->is nothing we r defining the class variable
#which was used in yaml file and futhure taking rtn as function

@dataclass
class DataIngestionConfig():
    #defining class variable along with dtypes
    root_dir_path:Path
    train_test_path: Path
    raw_file_path:Path


In [3]:
#step4)update the configurationmanager file which was present in src/config/configuration.py
#In this file we are reading yaml file ,create directory and also 
#assigning the value to the class variable and taking rtn as function

class ConfigurationManager():
    #initializing the instance variable 
    def __init__(self,config_filepath=CONFIG_FILEPATH,param_filepath=PARAM_FILEPATH):
        #reading the yaml file
        self.config = read_yaml_file(config_filepath) #rtn value as configdictatonary
        self.param = read_yaml_file(param_filepath) #rtn value as configdictatonary
        print(self.config)

        #creating main directory in project structure
        Create_Directory([self.config.artifacts_root]) #it will create artifact directory

    #creating method to initialize value to dataingestion
    def data_ingestion(self) ->DataIngestionConfig:
        #initializing local variable
        config = self.config.data_ingestion #here we r accessing dataingestion block from yaml file

        #creating dataingestion root_dir_path
        Create_Directory([config.root_dir_path])

        #creating an object of DataIngestionConfig class and initialize class variable value to it 
        data_ingestion_config = DataIngestionConfig(
            root_dir_path=config.root_dir_path,
            train_test_path=config.train_test_path,
            raw_file_path = config.raw_file_path
         
        )
        return data_ingestion_config


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
#step5)update the components files!!! in this file 
class DataIngestion():
    #constructor method initialize the class variable to object
    def __init__(self,ingestionconfig:DataIngestionConfig):
        self.ingestionconfig = ingestionconfig
    
    def train_test_data(self):
        raw_data = self.ingestionconfig.raw_file_path

        logger.info(f"Loading the csv file {raw_data}")

        df_raw = pd.read_csv(raw_data)
        
        df_raw['date'] = pd.to_datetime(df_raw['date'])
        
        # Extract target column name
        df_raw['target'] = df_raw['close'].shift(-1) #shift(-1) moves the close column up by 1 row
        df_raw.dropna(inplace=True)

        logger.info(f"splitting the Raw dataset")

        train_df,test_df = train_test_split(df_raw,test_size=0.2,random_state=42)

        #saving training and testing data
        train_df.to_csv(
            os.path.join(self.ingestionconfig.train_test_path, "train.csv"),
            index=False,
            encoding='utf-8'
        )
        test_df.to_csv(
            os.path.join(self.ingestionconfig.train_test_path, "test.csv"),
            index=False,
            encoding='utf-8'
)



In [6]:
os.chdir('../')
%pwd

'd:\\NEFT50_RegressionModel'

In [7]:
#step6)update the training pipeline file
try:
    #creating an object of configurationmanager class
    cm = ConfigurationManager()

    data_ingestion_config = cm.data_ingestion()

    #creating an object of DataIngestion component class
    di = DataIngestion(ingestionconfig = data_ingestion_config)

    di.train_test_data()


except Exception as e:
    raise CustomException(e,sys)

[2025-10-06 15:16:14,091]-INFO-19-Reading the YAML file config\config.yaml
{'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir_path': 'artifacts/data_ingestion', 'train_test_path': 'artifacts/data_ingestion/', 'raw_file_path': 'D:\\NEFT50_RegressionModel\\INDIA VIX_minute.csv'}, 'data_transformation': {'root_dir_path': 'artifacts/data_transformation', 'save_obj_dirpath': 'artifacts/data_transformation/preprocessor.pkl', 'csv_dir_path': 'artifacts/data_ingestion/'}}
[2025-10-06 15:16:14,095]-INFO-23-YAML file read successfully: config\config.yaml
[2025-10-06 15:16:14,096]-INFO-19-Reading the YAML file param.yaml
{'test_key': 'test_value'}
[2025-10-06 15:16:14,098]-INFO-23-YAML file read successfully: param.yaml
{'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir_path': 'artifacts/data_ingestion', 'train_test_path': 'artifacts/data_ingestion/', 'raw_file_path': 'D:\\NEFT50_RegressionModel\\INDIA VIX_minute.csv'}, 'data_transformation': {'root_dir_path': 'artifacts/data_