In [1]:
import os 

In [2]:
%pwd

'/Users/ravina/Desktop/CustomerChurnPrediction/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/ravina/Desktop/CustomerChurnPrediction'

In [5]:
import pandas as pd

In [6]:
df=pd.read_csv("artifacts/data_ingestion/Customer-Churn-Records.csv")

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   RowNumber           10000 non-null  int64  
 1   CustomerId          10000 non-null  int64  
 2   Surname             10000 non-null  object 
 3   CreditScore         10000 non-null  int64  
 4   Geography           10000 non-null  object 
 5   Gender              10000 non-null  object 
 6   Age                 10000 non-null  int64  
 7   Tenure              10000 non-null  int64  
 8   Balance             10000 non-null  float64
 9   NumOfProducts       10000 non-null  int64  
 10  HasCrCard           10000 non-null  int64  
 11  IsActiveMember      10000 non-null  int64  
 12  EstimatedSalary     10000 non-null  float64
 13  Exited              10000 non-null  int64  
 14  Complain            10000 non-null  int64  
 15  Satisfaction Score  10000 non-null  int64  
 16  Card 

In [8]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited', 'Complain',
       'Satisfaction Score', 'Card Type', 'Point Earned'],
      dtype='object')

In [9]:
import sys 
from dataclasses import dataclass

import numpy as np
import pandas as pd

from sklearn.preprocessing import OneHotEncoder

In [10]:
from sklearn.pipeline import Pipeline


In [11]:
from dataclasses import dataclass 
from pathlib import Path

@dataclass(frozen=True)  #this is not python class but dataclass, here you can define the veriables without using self keyword.
class DataValidationConfig:
    root_dir:Path
    STATUS_FILE:str
    unzip_data_dir:Path
    all_schema:dict


    

In [12]:
from CustomerChurnPrediction.constants import *
from CustomerChurnPrediction.utils.common import read_yaml,create_directories

In [13]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        

        create_directories([self.config.artifacts_root])
    
    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS
        

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            STATUS_FILE=config.STATUS_FILE,
            unzip_data_dir = config.unzip_data_dir,
            all_schema=schema,
        )

        return data_validation_config


In [14]:
import os 
from CustomerChurnPrediction import logger


In [15]:
class DataValidation:
    def __init__(self, config:DataValidationConfig):
        self.config=config

    def validate_all_columns(self)-> bool:
        try:
            validation_status=None

            data=pd.read_csv(self.config.unzip_data_dir)
            all_cols=list(data.columns)

            all_schema=self.config.all_schema.keys()

            for col in all_cols:
                if col not in all_schema:
                    validation_status=False
                    with open(self.config.STATUS_FILE,"w") as f:
                        f.write(f"Validation status:{validation_status}")
                else:
                    validation_status=True
                    with open(self.config.STATUS_FILE,"w") as f:
                        f.write(f"validation status: {validation_status}")
            return validation_status           
        except Exception as e:
            raise e

In [16]:
try: 
    config=ConfigurationManager()
    data_validation_config=config.get_data_validation_config()
    data_validation=DataValidation(config=data_validation_config)
    data_validation.validate_all_columns()
except Exception as e:
    raise e

[2024-03-24 19:16:09,408:INFO:yaml file:config/config.yaml loaded successfully]
[2024-03-24 19:16:09,410:INFO:yaml file:params.yaml loaded successfully]
[2024-03-24 19:16:09,412:INFO:yaml file:Schema.yaml loaded successfully]
[2024-03-24 19:16:09,413:INFO:created directory at:artifacts]
[2024-03-24 19:16:09,413:INFO:created directory at:artifacts/data_validation]
