## Import

In [1]:
import os
import regex as re
from kedro.io import DataCatalog
from kedro.config import ConfigLoader
from kedro.runner import ParallelRunner
from kedro.framework.project import settings
from kedro.framework.session import KedroSession
from kedro.extras.datasets.json import JSONDataSet

In [2]:
dates = [f"{day}_04_2023" for day in range(20, 27)]

## Data ingestion

### Set data ingestion parameters

In [57]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_ingestion_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\ingestion.yml"

In [58]:
with open(root_ingestion_params, "r+") as f:
    contents = f.read()
    
    n_dates_in_file = len(re.findall(r"(ingestion.date\d: .*\n)", contents))
    m_dates_to_write = len(dates)
    
    if n_dates_in_file < m_dates_to_write:
        if n_dates_in_file == 0:
            s = ""
            for i in range(0, m_dates_to_write):
                s += f'ingestion.date{i}: \n'
            contents = "\n".join([s])
        else:
            start = n_dates_in_file-1
            s = f'ingestion.date{start}: \n'
            for i in range(n_dates_in_file, m_dates_to_write):
                s += f'ingestion.date{i}: \n'
            contents = re.sub(fr"(ingestion.date{n_dates_in_file-1}: .*\n)", s, contents)
            
    for i, date in enumerate(dates):
        contents = re.sub(fr"(ingestion.date{i}: .*\n)", f'ingestion.date{i}: "{date}"\n', contents)
    
    if n_dates_in_file > m_dates_to_write:
        for i in range(m_dates_to_write, n_dates_in_file):
            contents = re.sub(fr"(ingestion.date{i}: .*\n)", f'ingestion.date{i}: \n', contents)

    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [59]:
for line in contents.split("\n"):
    print(line)

ingestion.date0: "20_04_2023"
ingestion.date1: "21_04_2023"
ingestion.date2: "22_04_2023"
ingestion.date3: "23_04_2023"
ingestion.date4: "24_04_2023"
ingestion.date5: "25_04_2023"
ingestion.date6: "26_04_2023"




## Data cleaning

### Set data cleaning parameters

In [60]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_cleaning_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\cleaning.yml"

In [61]:
bergen_polygon_vertex = [
     [5.161214, 60.372825],
     [5.211224, 60.398977],
     [5.255800, 60.409478],
     [5.240007, 60.479588],
     [5.259292, 60.528707],
     [5.322314, 60.545026],
     [5.542953, 60.421316],
     [5.486513, 60.348389],
     [5.343004, 60.257903],
     [5.256487, 60.240867],
     [5.227651, 60.242074],
     [5.190497, 60.291077],
     [5.197846, 60.325154],
     [5.183965, 60.337078],
     [5.169675, 60.340815],
     [5.161214, 60.372825]]

In [62]:
with open(root_cleaning_params, "r+") as f:
    contents = f.read()
    
    n_dates_in_file = len(re.findall(r"(cleaning.date\d: .*\n)", contents))
    m_dates_to_write = len(dates)
    
    if n_dates_in_file < m_dates_to_write:
        if n_dates_in_file == 0:
            s = ""
            for i in range(0, m_dates_to_write):
                s += f'cleaning.date{i}: \n'
            contents = "\n".join([s])
        else:
            start = n_dates_in_file-1
            s = f'cleaning.date{start}: \n'
            for i in range(n_dates_in_file, m_dates_to_write):
                s += f'cleaning.date{i}: \n'
            contents = re.sub(fr"(cleaning.date{n_dates_in_file-1}: .*\n)", s, contents)
            
    for i, date in enumerate(dates):
        contents = re.sub(fr"(cleaning.date{i}: .*\n)", f'cleaning.date{i}: "{date}"\n', contents)
    
    if n_dates_in_file > m_dates_to_write:
        for i in range(m_dates_to_write, n_dates_in_file):
            contents = re.sub(fr"(cleaning.date{i}: .*\n)", f'cleaning.date{i}: \n', contents)
    
    if "cleaning.polygon_vertex" not in contents:
        contents += f"\ncleaning.polygon_vertex: {bergen_polygon_vertex}\n"
    else:
        contents = re.sub(r"(cleaning.polygon_vertex: .*\n)", f"cleaning.polygon_vertex: {bergen_polygon_vertex}\n", contents)
        
    f.seek(0)
    f.truncate()
    f.write(contents)

In [63]:
for line in contents.split("\n"):
    print(line)

cleaning.date0: "20_04_2023"
cleaning.date1: "21_04_2023"
cleaning.date2: "22_04_2023"
cleaning.date3: "23_04_2023"
cleaning.date4: "24_04_2023"
cleaning.date5: "25_04_2023"
cleaning.date6: "26_04_2023"

cleaning.polygon_vertex: [[5.161214, 60.372825], [5.211224, 60.398977], [5.2558, 60.409478], [5.240007, 60.479588], [5.259292, 60.528707], [5.322314, 60.545026], [5.542953, 60.421316], [5.486513, 60.348389], [5.343004, 60.257903], [5.256487, 60.240867], [5.227651, 60.242074], [5.190497, 60.291077], [5.197846, 60.325154], [5.183965, 60.337078], [5.169675, 60.340815], [5.161214, 60.372825]]



In [64]:
import os
from kedro.config import ConfigLoader
from kedro.framework.project import settings

In [65]:
def retrieve_catalog_path():
    project_dir = get_project_directory()
    return f"{project_dir}\\conf\\base\\catalog.yml"

def retrieve_global_parameters():
    project_dir = get_project_directory()
    conf_path = f"{project_dir}\\{settings.CONF_SOURCE}"
    conf_loader = ConfigLoader(conf_source=conf_path, env="local")
    conf_params = conf_loader["catalog"]
    return conf_params

In [66]:
conf_catalog = retrieve_global_parameters()

In [67]:
conf_catalog['cleaning.20_04_2023.trigger_20_04_2023']

{'type': 'pickle.PickleDataSet',
 'filepath': 'data/02_interediate/cleaning_20_04_2023_trigger_20_04_2023.pkl'}