In [48]:
from pathlib import Path
import json
import xml.etree.ElementTree as ET    

class JsonExtractor():
    def __init__(self, file_path_json: Path):
        self.data = {}
        with open(file_path_json) as f:
            self.data = json.load(f)
    @property
    def parsed_data(self):
        return self.data
    
class XmlExtractor():
    def __init__(self, file_xml_PATH: Path):
        self.tree = ET.parse(source=file_xml_PATH)
    @property
    def parsed_data(self):
        return self.tree

In [49]:
import os

# 1.  get project root directory
project_dir_STR = os.getcwd()

# 2.  get data file paths
# 2a. movies json path
movies_json_PATH = project_dir_STR/Path("movies.json")

# 2b. person xml path
person_xml_relo_PATH = project_dir_STR/Path("person.xml")
# person_xml_abs_PATH = Path("/home/tonydevs/learn/design_patterns/tony_files/3-Creational-Design-Patterns/3-1-Factory-Pattern/ExtractFactoryApp/person.xml")

# 3. create data instances
movies_json_INSTANCE = JsonExtractor(movies_json_PATH)
person_xml_INSTANCE = XmlExtractor(person_xml_relo_PATH)

In [50]:
# 3. create factory 
from pathlib import Path
def data_extractor_factory(file_path: Path):
    ext = file_path.name.split(".")[-1] # json or xml
    
    if ext == 'json':
        print("processing json")
        return JsonExtractor(file_path)
    elif ext == 'xml':
        print("processing xml")
        return XmlExtractor(file_path)
    else:
        # raise ValueError(f"Apologies Master, I do not compute 🤖 .{ext}")
        raise ValueError(f"Do not compute 🤖 .{ext!r}")


In [62]:
movies_factory_json_INSTANCE = data_extractor_factory(movies_json_PATH)
data_list = movies_factory_json_INSTANCE.parsed_data
data_list[0]


processing json


{'title': 'After Dark in Central Park',
 'year': 1900,
 'director': None,
 'cast': None,
 'genre': None}

In [63]:
person_factory_xml_INSTANCE = data_extractor_factory(person_xml_relo_PATH)
person_factory_xml_INSTANCE.parsed_data.getroot()

processing xml


<Element 'persons' at 0x7f9de0148310>

In [35]:
# data_instance = data_extractor_factory(file_path=movies_json_PATH)
# data_instance = data_extractor_factory(file_path=person_xml_relo_PATH)

data_instance = data_extractor_factory(file_path=project_dir_STR/Path("yolo.txt"))

# data_instance.parsed_data.getroot()

ValueError: Apologies Master, I do not compute 🤖 .'txt'

In [5]:
# movies_json_PATH.name 'movies.json' (str)
# person_xml_relo_PATH.name 'person.xml'

# ext = movies_json_PATH.name.split(".")[-1]
ext = person_xml_relo_PATH.name.split(".")[-1]
ext


'xml'

In [19]:
def extract_factory(filepath: Path):
    ext = filepath.name.split(".")[-1]
    if ext == "json":
        return JSONDataExtractor(filepath)
    elif ext == "xml":
        return XMLDataExtractor(filepath)
    else:
        raise ValueError("Cannot extract data")

In [20]:
# import os
# current_directory = os.getcwd()
# current_directory

In [21]:
def extract(case: str):
    import os
    current_directory = os.getcwd()
    # dir_path = Path(__file__).parent
    dir_path = Path(current_directory)
    if case == "json":
        path = dir_path / Path("movies.json")
        factory = extract_factory(path)
        data = factory.parsed_data
        for movie in data:
            print(f"- {movie['title']}")
            director = movie["director"]
            if director:
                print(f"   Director: {director}")
            genre = movie["genre"]
            if genre:
                print(f"   Genre: {genre}")
    elif case == "xml":
        path = dir_path / Path("person.xml")
        factory = extract_factory(path)
        data = factory.parsed_data
        search_xpath = ".//person[lastName='Liar']"
        items = data.findall(search_xpath)
        for item in items:
            first = item.find("firstName").text
            last = item.find("lastName").text
            print(f"- {first} {last}")
            for pn in item.find("phoneNumbers"):
                pn_type = pn.attrib["type"]
                pn_val = pn.text
                phone = f"{pn_type}: {pn_val}"
                print(f"   {phone}")

In [None]:
if __name__ == "__main__": # working ok
    print("* JSON case *")
    extract(case="json")
    print("* XML case *")
    extract(case="xml")

In [None]:
# * JSON case *
# - After Dark in Central Park
# - Boarding School Girls' Pajama Parade
# - Buffalo Bill's Wild West Parad
# - Caught
# - Clowns Spinning Hats
# - Capture of Boer Battery by British
#    Director: James H. White
#    Genre: Short documentary
# - The Enchanted Drawing
#    Director: J. Stuart Blackton
# - Family Troubles
# - Feeding Sea Lions
# * XML case *
# - Jimy Liar
#    home: 212 555-1234
# - Patty Liar
#    home: 212 555-1234
#    mobile: 001 452-8819