In [42]:
import os 

In [43]:
%pwd

'C:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot'

In [44]:
os.chdir("../.")

In [45]:
%pwd

'C:\\Users\\RICH-FILES\\Desktop\\ml'

In [46]:
project_dir = "C:/Users/RICH-FILES/Desktop/ml/AI-powered-Bank-Product-Recommender-Chatbot"
os.chdir(project_dir)

In [47]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    """
    Data Transformation Configuration
    """
    root_dir: Path
    transformed_data_file: Path
    customer_path: Path
    product_path: Path

In [48]:
from BankProducts.constants import *
from BankProducts.utils.common import read_yaml, create_directories

In [49]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,    
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH,
        ):
       
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)        
    
        create_directories([self.config.artifacts_root])
        
    def get_data_transformation_config(self)-> DataTransformationConfig:
        """
        Returns Data Transformation Configuration
        """
        config = self.config.data_transformation
        
        create_directories([config.artifacts_root])
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            transformed_data_dir= config.transformed_data_dir,
            product_path= Path(config.product_path),
            customer_path= Path(config.customer_path)
            )
        
        
        return data_transformation_config
    

In [50]:
import os  
from BankProducts import logger
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.data = None
        self.transformed_data = None
        
       
    def join_dataset(self):
        """
        Join the customer and product datasets
        """
        try:
            customer_data = pd.read_csv(self.config.customer_path)
            product_data = pd.read_csv(self.config.product_path)
            
            # Example join operation
            joined_data = pd.merge(customer_data, product_data, on="customer_id")
            
            # Save the joined data
            output_path = os.path.self.config.root_dir
            joined_data.to_csv(output_path, index=False)
        except Exception as e:
            print(f"Error joining datasets: {e}")
            
        

    def load_data(self):
        """
        Load data from the specified path
        """
        try:
            self.data = pd.read_csv(self.config.root_dir)
            logger.info("Data loaded successfully")
        except Exception as e:
            logger.error(f"Error loading data: {e}")
            raise

    def transform_data(self):
        """
        Transform the data as per the requirements
        """
        try:
            # Example transformation: splitting data into train and test sets
            train_data, test_data = train_test_split(self.data, test_size=0.2)
            self.transformed_data = {
                "train": train_data,
                "test": test_data
            }
            logger.info("Data transformed successfully")
        except Exception as e:
            logger.error(f"Error transforming data: {e}")
            raise

    def save_transformed_data(self):
        """
        Save the transformed data to the specified path
        """
        try:
            for key, value in self.transformed_data.items():
                output_path = os.path.join(self.config.transformed_data_dir, f"{key}_data.csv")
                value.to_csv(output_path, index=False)      
                logger.info(f"Transformed data saved to {output_path}")
        except Exception as e:
            logger.error(f"Error saving transformed data: {e}")
            raise
        
    
        

In [52]:
customers  = pd.read_csv("artifacts/data_ingestion/raw_data/bank_customers.csv")
customers.head()
print(customers.shape)
print(customers.columns)    
print(customers.dtypes)
print(customers.isnull().sum())
print(customers.describe())         
print(customers.nunique())   

customers.info()    

(7000, 10)
Index(['customer_id', 'name', 'age', 'gender', 'occupation', 'annual_income',
       'marital_status', 'credit_score', 'existing_products',
       'financial_goals'],
      dtype='object')
customer_id           object
name                  object
age                    int64
gender                object
occupation            object
annual_income        float64
marital_status        object
credit_score           int64
existing_products     object
financial_goals       object
dtype: object
customer_id             0
name                    0
age                     0
gender                  0
occupation              0
annual_income           0
marital_status          0
credit_score            0
existing_products    1802
financial_goals         0
dtype: int64
               age  annual_income  credit_score
count  7000.000000    7000.000000   7000.000000
mean     43.668857  107953.817249    574.838143
std      15.313798   53750.112291    159.857563
min      18.000000   15000.7900

In [53]:
products = pd.read_csv("artifacts/data_ingestion/raw_data/product_catalog.csv")
products.head() 
print(products.shape)
print(products.columns)
print(products.dtypes)
print(products.isnull().sum())
print(products.describe())
print(products.nunique())
print(products.info())
print(products.isnull().sum())
print(products.describe())  

(5, 3)
Index(['product_name', 'description', 'eligibility'], dtype='object')
product_name    object
description     object
eligibility     object
dtype: object
product_name    0
description     0
eligibility     0
dtype: int64
           product_name                                        description  \
count                 5                                                  5   
unique                5                                                  5   
top     Savings Account  A basic savings account with competitive inter...   
freq                  1                                                  1   

                             eligibility  
count                                  5  
unique                                 5  
top     All customers above 18 years old  
freq                                   1  
product_name    5
description     5
eligibility     5
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):


In [None]:
cust = pd.read_csv(customer_path)
cust.head()

NameError: name 'customer_path' is not defined