In [1]:
import json
import os
import os.path as osp
import pandas as pd

from typing import Dict, List
from abc import ABC, ABCMeta, abstractmethod

In [2]:
json_path = './DataSources/data/source1.json'
csv_path = './DataSources/data/source2.csv'

In [3]:
class JsonParser:
    
    def __init__(self):
        pass
    
    def parse(self, filepath, **pd_kwargs):
        return pd.read_json(filepath, **pd_kwargs)
    

    
class CSVParser:
    
    def __init__(self):
        pass
    
    def parse(self, filepath, **pd_kwargs):
        return pd.read_csv(filepath, **pd_kwargs)
    
    
    
    


In [175]:
class DataSource:
    
    def __init__(self, filepath, **pd_kwargs):
        self.filepath = filepath
        self.pd_kwargs = pd_kwargs
        
        
    def parse(self, parser):
        df = parser.parse(self.filepath, **self.pd_kwargs)
        return Dataframe(df)
    
    
    
    
class UK(DataSource):
    
    def __init__(self, **kwargs):
        kwargs.setdefault('encoding', 'utf-8')
        super().__init__('./DataSources/data/source1.json', **kwargs)

In [176]:
class ParserFactory:
    
    def get_parser(self, format_):
        if format_ in ['.csv']:
            return CSVParser()
        elif format_ in ['.json']:
            return JsonParser()
        else:
            return ValueError(f'{format_} not supported.')

        
        
        
        
# class ObjectParser:
    
#     def parse(self, datasource: DataSource):     
#         extension = osp.splitext(datasource.filepath)[1]
#         parser = factory.get_parser(extension)
#         return datasource.parse(parser)
    
    
class Dataframe:
    
    def __init__(self, df):
        self.df = df
    
    @property
    def columns(self):
        return self.df.columns
    
    def keep_cols(self, keep_cols):
        self.df = self.df[keep_cols]
        return self
    
    def remove_nan(self, col):
        self.df = self.df[self.df[col] == self.df[col]]
        return self
    
    
    def concat_cols(self, concat_cols: List):
        concat_string = '__'.join(concat_cols)
        self.df[concat_string] = self.df[concat_cols].agg(' '.join, axis=1)
        return self
        
    

In [181]:
class DataSource:
#     def __init__(self, filepath, **pd_kwargs):
#         self.filepath = filepath
#         self.pd_kwargs = pd_kwargs

    def get_df(self):
        extension = osp.splitext(self.filepath)[1]
        parser = ParserFactory().get_parser(extension)
        df = parser.parse(self.filepath, **self.pd_kwargs)
        return Dataframe(df)
    


class UK(DataSource):
    def __init__(self, **pd_kwargs):
        self.filepath = './DataSources/data/source1.json'
        self.pd_kwargs = pd_kwargs
        self.keep_cols = ['name', 'surname']
        self.concat_cols = ['name', 'surname']
        
        
        
    def run(self):
        df_obj = self.get_df()
        df_obj.concat_cols(self.keep_cols)
        return df_obj
        
        


class US(DataSource):
    def __init__(self):
        self.filepath = './DataSources/data/source2.csv'
        self.pd_kwargs = {}
        self.keep_cols = ['first_name', 'last_name']
        self.concat_cols = ['name', 'surname']
        
    
    def run(self):
        df_obj = self.get_df()
        df_obj.concat_cols(self.keep_cols)
        return df_obj

In [184]:
UK(**{'encoding': 'utf-8'}).run().df

Unnamed: 0,name,surname,name__surname
0,Thanos,Sdr,Thanos Sdr
1,Panagiotis,Sdr,Panagiotis Sdr


In [172]:
US().run().df

Unnamed: 0,first_name,last_name,first_name__last_name
0,Bob,Ross,Bob Ross
1,Maria,Ross,Maria Ross
