In [1]:
from pathlib import Path
import pandas as pd
import glob

from typing import List, Dict

In [2]:
# Relative paths
current_dir = Path.cwd()
data_dir = current_dir / "data"
export_dir = current_dir / "export"

In [24]:
class ExtractFile:
    """ Extract csv files then read as a dataframe. """
    def __init__(self, file_path:Path):
        if not isinstance(file_path, Path):
            file_path = Path(file_path)
        
        self.file_path = file_path
        self.files = self.file_path.glob("*.csv")
        
    def result(self) -> pd.DataFrame:
        dfs = []
        for file in self.files:
            df = pd.read_csv(file)
            dfs.append(df)
        result_df = pd.concat(dfs, ignore_index=True)
        
        return result_df
    
    
class TransformDF:
    """ Transform the Dataframe. """
    def __init__(self, input_df:pd.DataFrame):
        self.df = input_df.copy()
        
    def _convert_str_num(self, input_str:str) -> str:
        """ Convert string to number """
        result_str = input_str.replace("sold", "")
        if result_str.__contains__("K"):
            result_str = float(result_str.replace("K", "")) * 1000
        elif result_str.__contains__("M"):
            result_str = float(result_str.replace("M", "")) * 1000000
        return int(result_str)
    
    def _convert_dtype(self) -> None:
        self.df["quantity_sold"] = self.df["quantity_sold"].apply(self._convert_str_num).astype(int)
        
    def result(self):
        self._convert_dtype()
        self.df.sort_values(by="quantity_sold", inplace=True, ascending=False)
        self.df.reset_index(drop=True, inplace=True)
        return self.df
    
    
class ExtractExportDF:
    def __init__(self, input_df:pd.DataFrame) -> None:
        self.df = input_df.copy()
        
        self._head_num = 10
        self._export_dir = Path.cwd() / "export"
    
    def get_by_qs(self, export:bool=False):
        """ Get the data by quantity sold """
        self.df.sort_values(by="quantity_sold", inplace=True, ascending=False)
        self.df.reset_index(drop=True, inplace=True)
        
        # Export or not
        if export:
            self.df.head(10).to_csv(self._export_dir / f"Data-by-QS.csv")
        return self.df
    
    def get_by_qs_cat(self, export:bool=False):
        prod_categories = self.df["product_category"].unique().tolist()
        
        result_df = []
        for current_cat in prod_categories:
            current_df = self.df[self.df["product_category"]==current_cat].head(10)
            result_df.append(current_df)
            
            if export:
                current_df.to_csv(f"{self._export_dir / current_cat}.csv", index=False)
        
        return result_df

In [26]:
df = ExtractFile(data_dir).result()
df = TransformDF(df).result()

data_df = ExtractExportDF(df)
data_df.get_by_qs(export=True)
data_df.get_by_qs_cat(export=True)