In [1]:
import os

In [2]:
%pwd

'/home/rohwid/Pacmann/lazada-id-reviews/notebooks'

In [3]:
# Change to the main directory
# So, it's executed from main directory
os.chdir("../")

In [4]:
with open('.env') as f:
    os.environ.update(
        line.strip().split('=') for line in f
)

In [5]:
%pwd

'/home/rohwid/Pacmann/lazada-id-reviews'

### Data Ingestion Config

This code will be apply in `src/LadazaIDReviews/entity/config_entity.py`

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionSQLConfig:
    root_dir: Path
    source_URI: str
    reviews_table: str
    reviews_path: Path
    items_table: str
    items_path: Path
    category_table: str
    category_path: Path


### SQL Data Ingestion Config Manager

This code will be apply in `src/LazadaIDReviews/config/configurations.py`.

In [7]:
from LazadaIDReviews.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from LazadaIDReviews.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_sql_config(self) -> DataIngestionSQLConfig:
        """read data ingestion config file and store as config entity
        then apply the dataclasses
        
        Returns:
            config: DataIngestionConfig type
        """
        data_ingest_config = self.config.ingest_from_sql

        create_directories([data_ingest_config.root_dir])

        config = DataIngestionSQLConfig(
            root_dir=data_ingest_config.root_dir,
            source_URI=os.environ["POSTGRES_URI"],
            reviews_table=data_ingest_config.reviews_table,
            reviews_path=Path(data_ingest_config.reviews_path),
            items_table=data_ingest_config.items_table,
            items_path=Path(data_ingest_config.items_path),
            category_table=data_ingest_config.category_table,
            category_path=Path(data_ingest_config.category_path) 
        )

        return config

### Perform data ingestion

This code in `src/LazadaIDReview/components/data_ingestion.py`

In [9]:
import pandas as pd

from sqlalchemy import create_engine 
from tqdm import tqdm

from LazadaIDReviews import logger

class DataIngestionSQL:
    def __init__(self, config: DataIngestionSQLConfig):
        self.config = config

    def sql_to_csv(self) -> None:
        """get data from the SQL database
        """
        try:
            db = create_engine(self.config.source_URI)  
            conn = db.connect()

            logger.info(f"Querying reviews data from SQL Database.")
            df_reviews = pd.read_sql_table("reviews", conn)
            
            logger.info(f"Querying items data from SQL Database.")
            df_items = pd.read_sql_table("items", conn)
            
            logger.info(f"Querying category data from SQL Database.")
            df_category = pd.read_sql_table("category", conn)
            
            logger.info(f"Dump data from SQL Database to CSV.")
            df_reviews.to_csv(self.config.reviews_path, index=False)
            df_items.to_csv(self.config.items_path, index=False)
            df_category.to_csv(self.config.category_path, index=False)
                
            logger.info(f"Data dumped from SQL query into {self.config.root_dir} directory")
            conn.close()
        except Exception as e:
            conn.close()
            logger.error(e)
            raise e

### Run Ingest from SQL Database

This code in `src/LazadaIDReview/pipeline/step_01_data_ingestion.py`

In [10]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_sql_config()
    
    data_ingestion = DataIngestionSQL(config=data_ingestion_config)
    data_ingestion.sql_to_csv()
except Exception as e:
    logger.error(e)
    raise e

[2024-07-03 20:27:10,826: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-03 20:27:10,831: INFO: common: yaml file: metrics/params.yaml loaded successfully]
[2024-07-03 20:27:10,832: INFO: common: created directory at: artifacts]
[2024-07-03 20:27:10,834: INFO: common: created directory at: artifacts/data-ingestion]
[2024-07-03 20:27:12,702: INFO: 2982854695: Querying reviews data from SQL Database.]
[2024-07-03 20:27:35,471: INFO: 2982854695: Querying items data from SQL Database.]
[2024-07-03 20:27:39,889: INFO: 2982854695: Querying category data from SQL Database.]
[2024-07-03 20:27:42,149: INFO: 2982854695: Dump data from SQL Database to CSV.]
[2024-07-03 20:27:43,054: INFO: 2982854695: Data dumped from SQL query into artifacts/data-ingestion directory]
