In [1]:
import os
%pwd

'c:\\Users\\pachp\\Desktop\\projects\\customer_churn\\research'

In [2]:
os.chdir("../")
%pwd

'c:\\Users\\pachp\\Desktop\\projects\\customer_churn'

In [3]:
# Entity
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    raw_data_path: Path
    train_data_path: Path
    test_data_path: Path    

In [4]:
from customer_churn.constants import *
from customer_churn.utils.common_utils import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            raw_data_path=config.raw_data_path,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path 
        )

        return data_ingestion_config

In [6]:
import os
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from box import ConfigBox
from box.exceptions import BoxValueError
from customer_churn import logger
from customer_churn.utils.common_utils import read_sql_data

In [7]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def initiate_data_ingestion(self):
        logger.info("Entered the Data Ingestion")
        try:
            #df = pd.read_csv("notebook/data/EDA.csv")
            df = read_sql_data()
            logger.info("Read the data as DataFrame from MySQL.")


            os.makedirs(os.path.dirname(self.config.raw_data_path), exist_ok=True)

            
            df.to_csv(self.config.raw_data_path, index=False, header=True)
            logger.info("Reading Completed.")

           
            logger.info("train_test_split initiated...............")
            train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

            train_set.to_csv(self.config.train_data_path, index=False, header=True)
            test_set.to_csv(self.config.test_data_path, index=False, header=True)
            logger.info("data ingestion completed.")

            return(
                self.config.train_data_path,
                self.config.test_data_path
            )

        except BoxValueError:
            raise ValueError("Error occured in Data Ingestion...............")
        except Exception as e:
            raise e

In [8]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.initiate_data_ingestion()
except Exception as e:
    raise e

[2024-03-13 17:47:59,850: INFO: common_utils: yaml file: config\config.yaml loaded successfully]
[2024-03-13 17:47:59,856: INFO: common_utils: yaml file: params.yaml loaded successfully]
[2024-03-13 17:47:59,861: INFO: common_utils: created directory at: artifacts]
[2024-03-13 17:47:59,865: INFO: common_utils: created directory at: artifacts/data_ingestion]
[2024-03-13 17:47:59,868: INFO: 704152861: Entered the Data Ingestion]
[2024-03-13 17:47:59,870: INFO: common_utils: Reading SQL database started]
[2024-03-13 17:47:59,882: INFO: common_utils: Connection Established]


  df=pd.read_sql_query('Select * from churn',mydb)


   CustomerID  Count        Country       State         City  Zip Code  \
0  3668-QPYBK      1  United States  California  Los Angeles     90003   
1  9237-HQITU      1  United States  California  Los Angeles     90005   
2  9305-CDSKC      1  United States  California  Los Angeles     90006   
3  7892-POOKP      1  United States  California  Los Angeles     90010   
4  0280-XJGEX      1  United States  California  Los Angeles     90015   

                 Lat Long   Latitude   Longitude  Gender  ...        Contract  \
0  33.964131, -118.272783  33.964131 -118.272783    Male  ...  Month-to-month   
1   34.059281, -118.30742  34.059281 -118.307420  Female  ...  Month-to-month   
2  34.048013, -118.293953  34.048013 -118.293953  Female  ...  Month-to-month   
3  34.062125, -118.315709  34.062125 -118.315709  Female  ...  Month-to-month   
4  34.039224, -118.266293  34.039224 -118.266293    Male  ...  Month-to-month   

  Paperless Billing             Payment Method  Monthly Charges Tota