In [1]:
%pwd

'c:\\Users\\karthikeya\\Insurance_Premium_Prediction\\notebooks'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\karthikeya\\Insurance_Premium_Prediction'

In [None]:
import os
import sys
from src.logger import logger
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from src.utils import DatabaseHandler
import sqlite3
from src.db_paths import db_path, table_name, query

In [5]:
@dataclass
class DatabaseConfig:
    database_path: str = db_path
    table_name: str = table_name
    sql_query: str = query

In [6]:

class DatabaseHandler:
    def __init__(self):
        """
        Initialize the database handler with the DatabaseConfig to the SQLite database.
        """
        self.db_config = DatabaseConfig()

    def initating_data_extraction_from_database(self) ->pd.DataFrame:
        try:
            self.connection = sqlite3.connect(self.db_config.database_path)
            self.cursor = self.connection.cursor()
            logger.info("Successfully connected to the SQLite database.")
        except sqlite3.Error as e:
            print(f"Error connecting to database: {e}")

        try:
            df= pd.read_sql_query(self.db_config.sql_query, self.connection)
            return df
        except Exception as e:
            logger.info(f"Error reading SQLite to DataFrame: {e}")

    def disconnect(self) -> None:
        """
        Disconnect from the SQLite database.
        """
        try:
            if self.connection:
                self.connection.close()
                logger.info("Disconnected from the SQLite database.")

        except Exception as e:
            logger.info(f"Error closing SQLite database connection: {e}")
            raise e


In [7]:
@dataclass
class DataIngestionConfig:
    train_data_path: str = os.path.join("artifacts", "train_data.csv")
    test_data_path: str = os.path.join("artifacts", "test_data.csv")
    raw_data_path: str = os.path.join("artifacts","raw_data.csv")

In [None]:
class DataIngestion:

    def __init__(self):
        self.ingestion_config = DataIngestionConfig()

    def initiate_data_ingestion(self):
        logger.info("Entered the data ingestion method")
    
        try :
            logger.info("Establishing Connection with SQLite databse")
            db_handler = DatabaseHandler()
            raw_data = db_handler.initating_data_extraction_from_database()
            
            print(raw_data)

            logger.info("Successfuly read the raw data as dataframe")

            db_handler.disconnect()
            logger.info("Disconnected from SQLite database")

            
            logger.info("Train Test Split Initiated")

            train_set,test_set = train_test_split(raw_data, test_size=0.3, random_state=42)
            os.makedirs(os.path.join("artifacts",'self.ingestion.config.raw_data_path'), exist_ok=True)
            raw_data.to_csv(self.ingestion_config.raw_data_path, index=False, header=True)
            train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True)
            test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True)

            logger.info("Data ingestion is complete")

            return(
                self.ingestion_config.raw_data_path,
                self.ingestion_config.train_data_path,
                self.ingestion_config.test_data_path
            )

        except Exception as e:
            logger.info("Probelm initating the data ingestion method {e}")
            raise e

In [11]:
DI = DataIngestion()
DI.initiate_data_ingestion()

[2024-11-15 15:35:04,470, INFO, 4205154732, Entered the data ingestion method ]
[2024-11-15 15:35:04,472, INFO, 4205154732, Establishing Connection with SQLite databse ]
[2024-11-15 15:35:04,474, INFO, 2633698196, Successfully connected to the SQLite database. ]
         age  gender    bmi children smoker     region      medical_history  \
0       46.0    male  21.45      5.0    yes  southeast             Diabetes   
1       25.0  female  25.38      2.0    yes  northwest             Diabetes   
2       38.0    male  44.88      2.0    yes  southwest                        
3       25.0    male  19.89      0.0     no  northwest                        
4       49.0    male  38.21      3.0    yes  northwest             Diabetes   
...      ...     ...    ...      ...    ...        ...                  ...   
999995  59.0    male  46.67      2.0     no  northeast  High blood pressure   
999996  33.0    male  36.83      2.0     no  northeast                        
999997  39.0    male  39.8

('artifacts\\raw_data.csv',
 'artifacts\\train_data.csv',
 'artifacts\\test_data.csv')