## Import

In [7]:
import os
import regex as re
from kedro.io import DataCatalog
from kedro.config import ConfigLoader
from kedro.runner import ParallelRunner
from kedro.framework.project import settings
from kedro.framework.session import KedroSession
from kedro.extras.datasets.json import JSONDataSet

In [16]:
dates = [f"{day}_04_2023" for day in range(20, 31)]

## Data ingestion

### Set data ingestion parameters

In [19]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_ingestion_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\ingestion.yml"

In [20]:
with open(root_ingestion_params, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(ingestion.date\d: .*\n)", contents))
    elements_to_write = len(dates)
    
    if elements_in_file < elements_to_write:
        if elements_in_file == 0:
            s = ""
            for i in range(0, elements_to_write):
                s += f'ingestion.date{i}: \n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            s = f'ingestion.date{start}: \n'
            for i in range(elements_in_file, elements_to_write):
                s += f'ingestion.date{i}: \n'
            contents = re.sub(fr"(ingestion.date{elements_in_file-1}: .*\n)", s, contents)
            
    for i, date in enumerate(dates):
        contents = re.sub(fr"(ingestion.date{i}: .*\n)", f'ingestion.date{i}: "{date}"\n', contents)
    
    if elements_in_file > elements_to_write:
        for i in range(elements_to_write, elements_in_file+1):
            contents = re.sub(fr"(ingestion.date{i}: .*\n)", f'ingestion.date{i}: \n', contents)

    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [21]:
for line in contents.split("\n"):
    print(line)

ingestion.date0: "20_04_2023"
ingestion.date1: "21_04_2023"
ingestion.date2: "22_04_2023"
ingestion.date3: "23_04_2023"
ingestion.date4: "24_04_2023"
ingestion.date5: "25_04_2023"
ingestion.date6: "26_04_2023"
ingestion.date7: "27_04_2023"
ingestion.date8: "28_04_2023"
ingestion.date9: "29_04_2023"
ingestion.date10: "30_04_2023"
ingestion.date10: "30_04_2023"
ingestion.date10: "30_04_2023"




## Data cleaning

### Set data cleaning parameters

In [17]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_cleaning_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\cleaning.yml"

In [18]:
bergen_polygon_vertex = [
     [5.161214, 60.372825],
     [5.211224, 60.398977],
     [5.255800, 60.409478],
     [5.240007, 60.479588],
     [5.259292, 60.528707],
     [5.322314, 60.545026],
     [5.542953, 60.421316],
     [5.486513, 60.348389],
     [5.343004, 60.257903],
     [5.256487, 60.240867],
     [5.227651, 60.242074],
     [5.190497, 60.291077],
     [5.197846, 60.325154],
     [5.183965, 60.337078],
     [5.169675, 60.340815],
     [5.161214, 60.372825]]

In [19]:
with open(root_cleaning_params, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(cleaning.date\d: .*\n)", contents))
    elements_to_write = len(dates)
    
    if elements_in_file < elements_to_write:
        s = ""
        if elements_in_file == 0:
            for i in range(0, elements_to_write):
                s += f'cleaning.date{i}: \n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            s = f'cleaning.date{start}: \n'
            for i in range(elements_in_file, elements_to_write):
                s += f'cleaning.date{i}: \n'
            contents = re.sub(fr"(cleaning.date{elements_in_file-1}: .*\n)", s, contents)
            
    for i, date in enumerate(dates):
        contents = re.sub(fr"(cleaning.date{i}: .*\n)", f'cleaning.date{i}: "{date}"\n', contents)
    
    if elements_in_file > elements_to_write:
        for i in range(elements_to_write, elements_in_file+1):
            contents = re.sub(fr"(cleaning.date{i}: .*\n)", f'cleaning.date{i}: \n', contents)
    
    if "cleaning.polygon_vertex" not in contents:
        contents += f"\ncleaning.polygon_vertex: {bergen_polygon_vertex}\n"
    else:
        contents = re.sub(r"(cleaning.polygon_vertex: .*\n)", f"cleaning.polygon_vertex: {bergen_polygon_vertex}\n", contents)
        
    f.seek(0)
    f.truncate()
    f.write(contents)

In [21]:
for line in contents.split("\n"):
    print(line)

cleaning.date0: "20_04_2023"
cleaning.date1: "21_04_2023"
cleaning.date2: "22_04_2023"
cleaning.date3: "23_04_2023"
cleaning.date4: "24_04_2023"
cleaning.date5: "25_04_2023"
cleaning.date6: "26_04_2023"
cleaning.date7: "27_04_2023"
cleaning.date8: "28_04_2023"
cleaning.date9: "29_04_2023"
cleaning.date10: "30_04_2023"
cleaning.date10: "30_04_2023"

cleaning.polygon_vertex: [[5.161214, 60.372825], [5.211224, 60.398977], [5.2558, 60.409478], [5.240007, 60.479588], [5.259292, 60.528707], [5.322314, 60.545026], [5.542953, 60.421316], [5.486513, 60.348389], [5.343004, 60.257903], [5.256487, 60.240867], [5.227651, 60.242074], [5.190497, 60.291077], [5.197846, 60.325154], [5.183965, 60.337078], [5.169675, 60.340815], [5.161214, 60.372825]]



## Convert to GeoPandasDf

### Set data parameters

In [2]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_convert_to_gdf_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\convert_to_gdf.yml"

In [6]:
dates = [f"{day}_04_2023" for day in range(20, 31)]

In [9]:
with open(root_convert_to_gdf_params, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(convert_to_gdf.date\d: .*\n)", contents))
    elements_to_write = len(dates)
    
    if elements_in_file < elements_to_write:
        s = ""
        if elements_in_file == 0:
            for i in range(0, elements_to_write):
                s += f'convert_to_gdf.date{i}: \n    day: \n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            s = f'convert_to_gdf.date{start}: \n    day: \n'
            for i in range(elements_in_file, elements_to_write):
                s += f'convert_to_gdf.date{i}: \n    day: \n'
            contents = re.sub(fr"(convert_to_gdf.date{elements_in_file-1}: .*\n    day: .*\n)", s, contents)
            
    for i, date in enumerate(dates):
        contents = re.sub(fr"(convert_to_gdf.date{i}: .*\n    day: .*\n)", 
                          f'convert_to_gdf.date{i}: \n    day: "{date}"\n', contents)
    
    if elements_in_file > elements_to_write:
        for i in range(elements_to_write, elements_in_file):
            contents = re.sub(fr"(convert_to_gdf.date{i}: .*\n    day: .*\n)", 
                              f'convert_to_gdf.date{i}: \n    day: \n', contents)

    f.seek(0)
    f.truncate()
    f.write(contents)

In [10]:
for line in contents.split("\n"):
    print(line)

convert_to_gdf.date0: 
    day: "20_04_2023"
convert_to_gdf.date1: 
    day: "21_04_2023"
convert_to_gdf.date2: 
    day: "22_04_2023"
convert_to_gdf.date3: 
    day: "23_04_2023"
convert_to_gdf.date4: 
    day: "24_04_2023"
convert_to_gdf.date5: 
    day: "25_04_2023"
convert_to_gdf.date6: 
    day: "26_04_2023"
convert_to_gdf.date7: 
    day: "27_04_2023"
convert_to_gdf.date8: 
    day: "28_04_2023"
convert_to_gdf.date9: 
    day: "29_04_2023"
convert_to_gdf.date10: 
    day: "30_04_2023"
convert_to_gdf.date10: 
    day: "30_04_2023"



## Create average GeoDataFrames

### Set average GeoDataFrames parameters

In [11]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_create_average_gdfs_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\create_average_gdfs.yml"

In [12]:
days = [f"{day}_04_2023" for day in range(20, 31)]

In [23]:
with open(root_create_average_gdfs_params, "r+") as f:
    contents = f.read()
    if re.findall(r"(create_average_gdfs.days: .*\n)", contents) == []:
        contents = "\n".join([contents, f"create_average_gdfs.days: {days}\n"])
    else:
        contents = re.sub(r"(create_average_gdfs.days: .*\n)", f"create_average_gdfs.days: {days}\n", contents)
    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [24]:
for line in contents.split("\n"):
    print(line)


create_average_gdfs.days: ['20_04_2023', '21_04_2023', '22_04_2023', '23_04_2023', '24_04_2023', '25_04_2023', '26_04_2023', '27_04_2023', '28_04_2023', '29_04_2023', '30_04_2023']



## Create worst average GeoDataFrames

### Set worst average GeoDataFrames parameters

In [5]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_create_worst_average_gdfs_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\build_worst_average_gdfs.yml"

In [2]:
days = [f"{day}_04_2023" for day in range(20, 31)]

In [6]:
with open(root_create_worst_average_gdfs_params, "r+") as f:
    contents = f.read()
    if re.findall(r"(build_worst_average_gdfs.days: .*\n)", contents) == []:
        contents = "\n".join([contents, f"build_worst_average_gdfs.days: {days}\n"])
    else:
        contents = re.sub(r"(build_worst_average_gdfs.days: .*\n)", f"build_worst_average_gdfs.days: {days}\n", contents)
    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [7]:
for line in contents.split("\n"):
    print(line)

# This is a boilerplate parameters config generated for pipeline 'build_worst_average_gdfs'
# using Kedro 0.18.7.
#
# Documentation for this file format can be found in "Parameters"
# Link: https://docs.kedro.org/en/0.18.7/kedro_project_setup/configuration.html#parameters

build_worst_average_gdfs.days: ['20_04_2023', '21_04_2023', '22_04_2023', '23_04_2023', '24_04_2023', '25_04_2023', '26_04_2023', '27_04_2023', '28_04_2023', '29_04_2023', '30_04_2023']



## Build average Graph and Adj Matrix

In [2]:
times = ["all_day", "morning", "midday", "afternoon"]

### Average graph

In [1]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_build_average_graph_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\build_average_graphs.yml"

In [6]:
with open(root_build_average_graph_params, "r+") as f:
    contents = f.read()
    for i in range(0, len(times)):
        if re.findall(fr"(build_average_graphs.{times[i]}: .*\n)", contents) == []:
            contents = "\n".join([contents, f"build_average_graphs.{times[i]}: {times[i]}\n"])
        else:
            contents = re.sub(r"(build_average_graphs.{times[i]}: .*\n)", f"build_average_graphs.{times[i]}: {days}\n", contents)
    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [7]:
for line in contents.split("\n"):
    print(line)

# This is a boilerplate parameters config generated for pipeline 'build_average_graphs'
# using Kedro 0.18.7.
#
# Documentation for this file format can be found in "Parameters"
# Link: https://docs.kedro.org/en/0.18.7/kedro_project_setup/configuration.html#parameters

build_average_graphs.all_day: all_day

build_average_graphs.morning: morning

build_average_graphs.afternoon: afternoon

build_average_graphs.evening: evening



### Adj matrix

In [9]:
root_build_adjacency_matrix_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\build_adjacency_matrix.yml"

In [10]:
with open(root_build_adjacency_matrix_params, "r+") as f:
    contents = f.read()
    for i in range(0, len(times)):
        if re.findall(fr"(build_adjacency_matrix.{times[i]}: .*\n)", contents) == []:
            contents = "\n".join([contents, f"build_adjacency_matrix.{times[i]}: {times[i]}\n"])
        else:
            contents = re.sub(r"(build_adjacency_matrix.{times[i]}: .*\n)", f"build_adjacency_matrix.{times[i]}: {days}\n", contents)
    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [11]:
for line in contents.split("\n"):
    print(line)

# This is a boilerplate parameters config generated for pipeline 'build_adjacency_matrix'
# using Kedro 0.18.7.
#
# Documentation for this file format can be found in "Parameters"
# Link: https://docs.kedro.org/en/0.18.7/kedro_project_setup/configuration.html#parameters

build_adjacency_matrix.all_day: all_day

build_adjacency_matrix.morning: morning

build_adjacency_matrix.midday: midday

build_adjacency_matrix.afternoon: afternoon



## Solution comparison

### Set data parameters

In [5]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_solution_comparison_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\solution_comparison.yml"

In [2]:
time_solution = "all_day_free_flow"
data_key = ["time_solution", "time_scenario", "weight", "worst"]

time_scenarios = [("all_day", "weight2","False"), ("all_day", "weight","False"), ("morning", "weight","False"), ("midday", "weight","False"), ("afternoon", "weight","False"),
                  ("all_day", "weight2","True"), ("all_day", "weight","True"), ("morning", "weight","True"), ("midday", "weight","True"), ("afternoon", "weight","True")]
data = [{"time_solution": time_solution, "time_scenario": time_scenario[0], "weight": time_scenario[1], "worst": time_scenario[2]} for time_scenario in time_scenarios]

In [19]:
with open(root_solution_comparison_params, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(solution_comparison\d: .*\n)", contents))
    elements_to_write = len(data)
    
    if elements_in_file < elements_to_write:
        s = ""
        if elements_in_file == 0:
            for i in range(0, elements_to_write):
                s += f'solution_comparison{i}: \n    time_solution:\n    time_scenario:\n    weight:\n    worst:\n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            s = f'solution_comparison{start}: \n    time_solution:\n    time_scenario:\n    weight:\n    worst:\n'
            for i in range(elements_in_file, elements_to_write):
                s += f'solution_comparison{i}: \n    time_solution:\n    time_scenario:\n    weight:\n    worst:\n'
            contents = re.sub(fr"(solution_comparison{elements_in_file-1}: .*\n    time_solution: .*\n    time_scenario: .*\n    weight: .*\n    worst: .*\n)", s, contents)
            
    for i, d in enumerate(data):
        contents = re.sub(fr"(solution_comparison{i}: .*\n    time_solution: .*\n    time_scenario: .*\n    weight: .*\n    worst: .*\n)", 
                          f'solution_comparison{i}: \n    time_solution: "{d[data_key[0]]}"\n    time_scenario: "{d[data_key[1]]}"\n    weight: "{d[data_key[2]]}"\n    worst: "{d[data_key[3]]}"', contents)
        print(f'solution_comparison{i}: \n    time_solution: "{d[data_key[0]]}"\n    time_scenario: "{d[data_key[1]]}"\n    weight: "{d[data_key[2]]}"\n    worst: "{d[data_key[3]]}"')
        
    if elements_in_file > elements_to_write:
        for i in range(elements_to_write, elements_in_file):
            contents = re.sub(fr"(solution_comparison{i}: .*\n    time_solution: .*\n    time_scenario: .*\n    weight: .*\n    worst: .*\n)", 
                              f'solution_comparison{i}: \n    time_solution:\n    time_scenario:\n    weight:\n    worst:\n', contents)

    f.seek(0)
    f.truncate()
    f.write(contents)

solution_comparison0: 
    time_solution: "all_day_free_flow"
    time_scenario: "all_day"
    weight: "weight2"
    worst: "False"
solution_comparison1: 
    time_solution: "all_day_free_flow"
    time_scenario: "all_day"
    weight: "weight"
    worst: "False"
solution_comparison2: 
    time_solution: "all_day_free_flow"
    time_scenario: "morning"
    weight: "weight"
    worst: "False"
solution_comparison3: 
    time_solution: "all_day_free_flow"
    time_scenario: "midday"
    weight: "weight"
    worst: "False"
solution_comparison4: 
    time_solution: "all_day_free_flow"
    time_scenario: "afternoon"
    weight: "weight"
    worst: "False"
solution_comparison5: 
    time_solution: "all_day_free_flow"
    time_scenario: "all_day"
    weight: "weight2"
    worst: "True"
solution_comparison6: 
    time_solution: "all_day_free_flow"
    time_scenario: "all_day"
    weight: "weight"
    worst: "True"
solution_comparison7: 
    time_solution: "all_day_free_flow"
    time_scenario: "

In [18]:
for line in contents.split("\n"):
    print(line)

solution_comparison0: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison1: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison2: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison3: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison4: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison5: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison6: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison7: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison8: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison9: 
    time_solution:
    time_scenario:
    weight:
    worst:
solution_comparison0: 
    time_solution: "all_day_free_flow"
    time_scenario: "all_day"
    weight: "weight2"
    worst: "False"solution_comparison1: 
    ti

## Data visualization

### Set data cleaning parameters

In [4]:
root_general_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters.yml"
root_visualization_params = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\parameters\visualization.yml"

In [3]:
dates = [[f"{day}_04_2023"]*3 for day in range(20, 22)]
times = [["morning", "midday", "afternoon"] for day in range(20, 22)]

In [5]:
with open(root_visualization_params, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(visualization.date\d: .*\n)", contents))
    m_dates_to_write = len(dates)
    
    if elements_in_file < m_dates_to_write:
        s=""
        if elements_in_file == 0:
            for i in range(0, m_dates_to_write):
                for j in range(0, len(dates[i])):
                    s += f'visualization.date{i}{j}: \n    day: \n    time: \n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            for j in range(0, len(dates[start])):
                s += f'visualization.date{start}{j}: \n    day: \n    time: \n'
            for i in range(elements_in_file, m_dates_to_write):
                for j in range(0, len(dates[i])):
                    s += f'visualization.date{i}{j}: \n    day: \n    time: \n'
            contents = re.sub(fr"(visualization.date{elements_in_file-1}: .*\n)", s, contents)
            
    for i, (date, time) in enumerate(zip(dates, times)):
        for j, (d, t) in enumerate(zip(date, time)):
            contents = re.sub(fr"(visualization.date{i}{j}: .*\n    day: .*\n    time: .*\n)", 
                          f'visualization.date{i}{j}: \n    day: "{d}"\n    time: "{t}"\n', contents)
    
    if elements_in_file > m_dates_to_write:
        for i in range(m_dates_to_write, elements_in_file+1):
            for j in range(0, len(dates[i])):
                contents = re.sub(fr"(visualization.date{i}{j}: .*\n    day: .*\n    time: .*\n)", 
                              f'visualization.date{i}{j}: \n    day: \n    time: \n', contents)

    
    f.seek(0)
    f.truncate()
    f.write(contents)
    

In [6]:
for line in contents.split("\n"):
    print(line)

visualization.date00: 
    day: "20_04_2023"
    time: "morning"
visualization.date01: 
    day: "20_04_2023"
    time: "midday"
visualization.date02: 
    day: "20_04_2023"
    time: "afternoon"
visualization.date10: 
    day: "21_04_2023"
    time: "morning"
visualization.date11: 
    day: "21_04_2023"
    time: "midday"
visualization.date12: 
    day: "21_04_2023"
    time: "afternoon"



In [7]:
rooth_catalog_path = r"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\conf\base\catalog.yml"

In [None]:
with open(root_catalog_path, "r+") as f:
    contents = f.read()
    
    elements_in_file = len(re.findall(r"(visualization.date\d: .*\n)", contents))
    m_dates_to_write = len(dates)
    
    if elements_in_file < m_dates_to_write:
        s=""
        if elements_in_file == 0:
            for i in range(0, m_dates_to_write):
                for j in range(0, len(dates[i])):
                    s += f'visualization.date{i}{j}: \n    day: \n    time: \n'
            contents = "\n".join([s])
        else:
            start = elements_in_file-1
            for j in range(0, len(dates[start])):
                s += f'visualization.date{start}{j}: \n    day: \n    time: \n'
            for i in range(elements_in_file, m_dates_to_write):
                for j in range(0, len(dates[i])):
                    s += f'visualization.date{i}{j}: \n    day: \n    time: \n'
            contents = re.sub(fr"(visualization.date{elements_in_file-1}: .*\n)", s, contents)
            
    for i, (date, time) in enumerate(zip(dates, times)):
        for j, (d, t) in enumerate(zip(date, time)):
            contents = re.sub(fr"(visualization.date{i}{j}: .*\n    day: .*\n    time: .*\n)", 
                          f'visualization.date{i}{j}: \n    day: "{d}"\n    time: "{t}"\n', contents)
    
    if elements_in_file > m_dates_to_write:
        for i in range(m_dates_to_write, elements_in_file+1):
            for j in range(0, len(dates[i])):
                contents = re.sub(fr"(visualization.date{i}{j}: .*\n    day: .*\n    time: .*\n)", 
                              f'visualization.date{i}{j}: \n    day: \n    time: \n', contents)

    
    f.seek(0)
    f.truncate()
    f.write(contents)