In [1]:
from src.Wine.Utils import create_directory,read_yaml,download_data_from_s3
from src.Wine.loggers import logger
from src.Wine.Exception import CustomException
from src.Wine.Constants import *
import os,sys

In [2]:
from pathlib import Path
from dataclasses import dataclass
#step3)update the entity file --->is nothing we r defining the class variable
#which was used in yaml file and futhure taking rtn as function

@dataclass
class DataIngestionConfig():
    #defining class variable along with dtypes
    root_dir_path:Path
    train_test_path: Path
    Zip_local_dir_path: Path
    unzip_dir_path: Path


In [3]:
#step4)update the configurationmanager file which was present in src/config/configuration.py
#In this file we are reading yaml file ,create directory and also 
#assigning the value to the class variable and taking rtn as function

class ConfigurationManager():
    #initializing the instance variable 
    def __init__(self,config_filepath=CONFIG_FILEPATH,param_filepath=PARAM_FILEPATH,schema_filepath=SCHEMA_FILEPATH):
        #reading the yaml file
        self.config = read_yaml(config_filepath) #rtn value as configdictatonary
        self.param = read_yaml(param_filepath) #rtn value as configdictatonary
        self.schema = read_yaml(schema_filepath) #rtn value as configdictatonary

        #creating main directory in project structure
        create_directory([self.config.artifacts_root]) #it will create artifact directory

    #creating method to perform dataingestion
    def data_ingestion(self) ->DataIngestionConfig:
        #initializing local variable
        config = self.config.data_ingestion #here we r accessing dataingestion block from yaml file

        #creating dataingestion root_dir_path
        create_directory([config.root_dir_path])

        #creating an object of DataIngestionConfig class and initialize class variable value to it 
        data_ingestion_config = DataIngestionConfig(
            root_dir_path=config.root_dir_path,
            train_test_path=config.train_test_path,
            Zip_local_dir_path=config.Zip_local_dir_path,
            unzip_dir_path=config.unzip_dir_path
        )
        return data_ingestion_config


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
#step5)update the components files!!! in this file 
class DataIngestion():
    #constructor method initialize the class variable to object
    def __init__(self,ingestionconfig:DataIngestionConfig):
        self.ingestionconfig = ingestionconfig

        download_data_from_s3(unzip_dir_path=self.ingestionconfig.unzip_dir_path,local_dir_path=self.ingestionconfig.Zip_local_dir_path)
    
    def train_test_data(self):
        raw_data = self.ingestionconfig.unzip_dir_path

        # Assuming raw_data_path is a directory containing the CSV file(s)
        raw_data_file = os.path.join(raw_data, 'WineQT.csv')

        logger.info(f"Loading the csv file {raw_data_file}")

        df_raw = pd.read_csv(raw_data_file)

        logger.info(f"splitting the Raw dataset")

        train_df,test_df = train_test_split(df_raw,test_size=0.2,random_state=42)

        #saving training and testing data
        train_df.to_csv(os.path.join(self.ingestionconfig.train_test_path,"train.csv"))
        test_df.to_csv(os.path.join(self.ingestionconfig.train_test_path,"test.csv"))



In [6]:
os.chdir('../')
%pwd

'd:\\Wine_ML_AlGO\\WineQualityModel'

In [7]:
#step6)update the training pipeline file
try:
    #creating an object of configurationmanager class
    cm = ConfigurationManager()

    data_ingestion_config = cm.data_ingestion()

    #creating an object of DataIngestion class
    di = DataIngestion(ingestionconfig = data_ingestion_config)

    di.train_test_data()


except Exception as e:
    raise CustomException(e,sys)

[2024-09-26 21:23:38,893]-33-Reading yaml file config\config.yaml
[2024-09-26 21:23:38,915]-33-Reading yaml file param.yaml
[2024-09-26 21:23:38,918]-33-Reading yaml file schema.yaml
[2024-09-26 21:23:38,925]-46-Creating Directory
[2024-09-26 21:23:38,941]-50-Directory artifacts created
[2024-09-26 21:23:38,943]-46-Creating Directory
[2024-09-26 21:23:38,945]-50-Directory artifacts/data_ingestion created
[2024-09-26 21:23:39,159]-1278-Found credentials in shared credentials file: ~/.aws/credentials
WineQT.zip




Extracted all files to artifacts/data_ingestion/
[2024-09-26 21:23:42,805]-15-Loading the csv file artifacts/data_ingestion/WineQT.csv
[2024-09-26 21:23:43,020]-19-splitting the Raw dataset
