In [35]:
import os

In [36]:
%pwd

'C:\\Users\\RICH-FILES\\Desktop\\ml\\AI-powered-Bank-Product-Recommender-Chatbot'

In [37]:
os.chdir("../.")

In [38]:
%pwd

'C:\\Users\\RICH-FILES\\Desktop\\ml'

In [39]:
project_dir = "C:/Users/RICH-FILES/Desktop/ml/AI-powered-Bank-Product-Recommender-Chatbot"
os.chdir(project_dir)

In [None]:
from dataclasses import dataclass
from pathlib import Path


    
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    raw_data_dir: Path
    data_file: Path
    customers_csv: Path
    local_data_file: Path
    unzip_dir: Path
    source_URL: str
    

    

In [41]:
from BankProducts.constants import *
from BankProducts.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,    
        params_filepath = PARAMS_FILE_PATH,
        #schema_filepath: str = SCHEMA_FILE_PATH,
        ):
       
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        #self.schema = read_yaml(schema_filepath)        
    
        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
    
    
        create_directories([self.config.artifacts_root])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),
            raw_data_dir=Path(config.raw_data_dir),
            data_file=Path(config.data_file),
            customers_csv=Path(config.customers_csv),
            local_data_file=Path(config.local_data_file),
            unzip_dir=Path(config.unzip_dir),
            source_URL=config.source_URL
            
        )
        
        return data_ingestion_config

In [43]:
# Import necessary libraries
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine
from BankProducts import logger
import urllib.request as request
import zipfile
from dotenv import load_dotenv
import os
from BankProducts.utils.common import get_size
import sqlite3

In [44]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url = self.config.source_URL,
                filename = self.config.local_data_file
            )
            logger.info(f"{filename} download! with following info: \n{headers}")
        else:
            logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")



    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into the data directory
        Function returns None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)
        
    def initiate_data_ingestion(self):
        logger.info("Starting data ingestion process...")

        # Load environment variables from .env file
        load_dotenv()

        #  Get DB credentials securely from environment variables
        db_user = os.getenv("DB_USER")
        db_password = os.getenv("DB_PASSWORD")
        db_host = os.getenv("DB_HOST")
        db_port = os.getenv("DB_PORT")
        db_name = os.getenv("DB_NAME")

        # Create database engine
        engine = create_engine(f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")

        # Load CSV into a DataFrame
        try:
            df = pd.read_csv(self.config.unzip_dir)  # Update path
        except FileNotFoundError:
            logger.error("CSV file not found. Please check the file path.")
            return

        #  Load data into PostgreSQL
        try:
            df.to_sql("bank_transactions", engine, if_exists="replace", index=False)
            logger.info(" Dataset successfully loaded into PostgreSQL.")
        except Exception as e:
            logger.error(f" Failed to load data into PostgreSQL: {e}")


    def extract_and_save_data(self):
        
        logger.info("Extracting and saving data...")
        
        # creating a query to fetch data from the PostgreSQL database
        query = """        SELECT * FROM bank_transactions;
        """
        # Load environment variables from .env file     
        load_dotenv()
        #  Get DB credentials securely from environment variables
        db_user = os.getenv("DB_USER")
        db_password = os.getenv("DB_PASSWORD")
        db_host = os.getenv("DB_HOST")
        db_port = os.getenv("DB_PORT")
        db_name = os.getenv("DB_NAME")
        # Create database engine
        engine = create_engine(f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}")
        # Check if the table exists
        with engine.connect() as connection:
            result = connection.execute("SELECT to_regclass('bank_transactions')")
            table_exists = result.scalar() is not None
        if not table_exists:
            logger.error("Table 'bank_transactions' does not exist in the database.")
            return
        # If the table exists, execute the query to fetch data
        logger.info("Fetching data from the 'bank_transactions' table...")  
        # Create a connection to the database
        
        # Load into a DataFrame
        df = pd.read_sql(query, engine)

        # Export to CSV
        df.to_csv("customer_data.csv", index=False)

        print(" Data successfully exported to 'customer_data.csv'")

In [45]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
    data_ingestion.initiate_data_ingestion()
    data_ingestion.extract_and_save_data()
    
except Exception as e:
    logger.exception(e)
    raise e


[2025-06-01 19:26:59,588: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-01 19:26:59,597: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-01 19:26:59,599: INFO: common: created directory at: artifacts]
[2025-06-01 19:26:59,601: INFO: common: created directory at: artifacts]
[2025-06-01 19:26:59,603: ERROR: 1798020651: 'DataIngestionConfig' object has no attribute 'source_URL']
Traceback (most recent call last):
  File "C:\Users\RICH-FILES\AppData\Local\Temp\ipykernel_16136\1798020651.py", line 5, in <module>
    data_ingestion.download_file()
  File "C:\Users\RICH-FILES\AppData\Local\Temp\ipykernel_16136\2756961210.py", line 8, in download_file
    url = self.config.source_URL,
AttributeError: 'DataIngestionConfig' object has no attribute 'source_URL'


AttributeError: 'DataIngestionConfig' object has no attribute 'source_URL'