In [1]:
%pwd

'c:\\Users\\karthikeya\\Insurance_Premium_Prediction\\notebooks'

In [3]:
import os
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\karthikeya\\Insurance_Premium_Prediction'

In [None]:
import os
import sys
from src.logger import logger
import pandas as pd

from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from src.utils import DatabaseHandler
import sqlite3
from src.db_paths import db_path, table_name, query

In [13]:
@dataclass
class DatabaseConfig:
    database_path: str = db_path
    table_name: str = table_name
    sql_query: str = query

In [None]:

class DatabaseHandler:
    def __init__(self):
        """
        Initialize the database handler with the DatabaseConfig to the SQLite database.
        """
        self.db_config = DatabaseConfig()

    def initating_data_extraction_from_database(self) ->pd.DataFrame:
        try:
            self.connection = sqlite3.connect(self.db_config.database_path)
            self.cursor = self.connection.cursor()
            logger.log("Successfully connected to the SQLite database.")
        except sqlite3.Error as e:
            print(f"Error connecting to database: {e}")

        try:
            return pd.read_sql_query(self.db_config.sql_query, self.connection)
        except Exception as e:
            logger.log(f"Error reading SQLite to DataFrame: {e}")

    def disconnect(self) -> None:
        """
        Disconnect from the SQLite database.
        """
        try:
            if self.connection:
                self.connection.close()
                logger.log("Disconnected from the SQLite database.")

        except Exception as e:
            logger.error(f"Error closing SQLite database connection: {e}")
            raise e


In [None]:
@dataclass
class DataIngestionConfig:
    train_data_path: str = os.path.join("artifacts", "train_data.csv")
    test_data_path: str = os.path.join("artifacts", "test_data.csv")
    raw_data_path: str = os.path.join("artifacts","raw_data.csv")

In [None]:
class DataIngestion:

    def __init__(self) -> None:
        self.ingestion_config = DataIngestionConfig()

    def initiate_data_ingestion(self):
        logger.info("Entered the data ingestion method")
    
        try :
            logger.info("Establishing Connection with SQLite databse")
            db_handler = DatabaseHandler()
            raw_data = db_handler.initating_data_extraction_from_database()
            logger.info("Successfuly read the raw data as dataframe")

            db_handler.disconnect()
            logger.info("Disconnected from SQLite database")

            
            logger.info("Train Test Split Initiated")

            train_set,test_set = train_test_split(raw_data, test_size=0.3, random_state=42)

            raw_data.to_csv(self.ingestion_config.raw_data_path, index=False, header=True)
            train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True)
            test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True)

            logger.info("Data ingestion is complete")

            return(
                self.ingestion_config.raw_data_path,
                self.ingestion_config.train_data_path,
                self.ingestion_config.test_data_path
            )

        except Exception as e:
            logger.error("Probelm initating the data ingestion method {e}")
            raise e