In [14]:
%reload_ext autoreload
%autoreload 2
import os
import sys
from dotenv import load_dotenv, find_dotenv
from dataclasses import dataclass
from pathlib import Path
load_dotenv(find_dotenv())
sys.path.append(os.getenv("PROJECT_FOLDER"))
from src.utils.common import logger, read_yaml, create_directories

@dataclass(frozen=True)
class DataLabelingConfig:
    root_dir: Path
    source_path: Path
    target_dir: Path 

In [21]:
# os.chdir("..")

In [22]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath: str = os.getenv("CONFIG_FILE_PATH"),
        params_filepath: str = os.getenv("PARAMS_FILE_PATH"),
        schema_filepath: str = os.getenv("SCHEMA_FILE_PATH"),
    ):
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))
        self.schema = read_yaml(Path(schema_filepath))
        create_directories([self.config.artifacts_root])

    def get_data_labeling_config(self) -> DataLabelingConfig:
        """
        Get configuration for data labeling
        
        Returns:
            DataLabelingConfig: Configuration for data labeling
        """
        config = self.config.data_labeling

        create_directories([config.root_dir])

        data_labeling_config = DataLabelingConfig(
            root_dir = config.root_dir,
            source_path  = config.source_path,
            target_dir = config.target_dir,
        )
        return data_labeling_config

In [23]:
import pandas as pd

class DataLabeling:
    def __init__(self, config: DataLabelingConfig):
        """
        Instantiate `DataIngestion` class

        Args:
            config (DataIngestionConfig): configuration for data ingestion
        """
        self.config = config

    def label_data(self):
        """Label data"""
        source_path = self.config.source_path
        df = pd.read_csv(source_path)
        df['sentiment'] = df['overall'].apply(lambda x: 1 if x >= 3 else 0)  # convert overall to sentiment
        df = df.drop(columns=['overall'])
        df.to_csv(self.config.target_dir+"/sample_data.csv", index=False)

In [24]:
try:
    configuration_manager = ConfigurationManager()
    data_labeling = DataLabeling(config=configuration_manager.get_data_labeling_config())
    data_labeling.label_data()
except Exception as e:
    logger.error(e)


2024-03-06 21:02:14,628 - sentiment-classifier-logger - INFO - yaml file: C:\Users\USER\Documents\GitHub\customer-product-reviews-sentiment-classifier\config\config.yaml loaded successfully
2024-03-06 21:02:14,630 - sentiment-classifier-logger - INFO - yaml file: C:\Users\USER\Documents\GitHub\customer-product-reviews-sentiment-classifier\params.yaml loaded successfully
2024-03-06 21:02:14,634 - sentiment-classifier-logger - INFO - yaml file: C:\Users\USER\Documents\GitHub\customer-product-reviews-sentiment-classifier\schema.yaml loaded successfully
2024-03-06 21:02:14,636 - sentiment-classifier-logger - INFO - Created directory at: artifacts
2024-03-06 21:02:14,637 - sentiment-classifier-logger - INFO - Created directory at: artifacts/data_labeling
