## Import libraries

In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import os

from shapely.geometry import box
from shapely.ops import transform , unary_union, cascaded_union, linemerge
from shapely import affinity

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.patches import Wedge 
import matplotlib.patches as mpatches

import random
from PIL import Image

## Import dataset

In [None]:
#Create room info dataframe
#Call CSV file of dataset, and import dataset using Pandas
path_sim = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/SwissDataset_v3.0.0_clean_v3_room.csv'
Swiss_sim = pd.read_csv(path_sim)

#Create (pandas) dataframe
df_sim = pd.DataFrame(Swiss_sim)

In [None]:
#Create room info dataframe
#Call CSV file of dataset, and import dataset using Pandas
path_info = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/SwissDataset_v3.0.0_clean_v3_info.csv'
Swiss_info = pd.read_csv(path_info)

#Create (pandas) dataframe
df_info = pd.DataFrame(Swiss_info)
df_res = df_info

In [None]:
#Create room info dataframe
#Call CSV file of dataset, and import dataset using Pandas
path_geom = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/SwissDataset_v3.0.0_geometries.csv'
Swiss_geom = pd.read_csv(path_geom)

#Create (pandas) dataframe
df_geom = pd.DataFrame(Swiss_geom)

#Filter out features
df_geom = df_geom[df_geom["entity_type"] != "feature"]

1st round of cleaning dataframe

In [None]:
def process_dataframe(df):
    # Reduce the number of variables in entity_subtype, separators
    df.loc[df['entity_subtype'] == 'COLUMN', 'entity_subtype'] = 'WALL'
    df.loc[df['entity_subtype'] == 'ENTRANCE_DOOR', 'entity_subtype'] = 'DOOR'

    # Put all the outdoor spaces under one name
    categories_to_outdoor = ['BALCONY', 'LOGGIA', 'TERRACE', 'WINTERGARTEN', 'PATIO', 'GARDEN', 'RAILING']
    for category in categories_to_outdoor:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'OUTDOOR_SPACE'
    df.loc[df['entity_subtype'] == 'RAILING', 'entity_subtype'] = 'OUTDOOR_SPACE'

    # Put all the living space functions under one name
    categories_to_living = ['LIVING_DINING', 'DINING', 'KITCHEN_DINING']
    for category in categories_to_living:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'DINING'

    # Reduce the number of variables in entity_subtype, separators
    df.loc[df['entity_subtype'] == 'COLUMN', 'entity_subtype'] = 'WALL'
    df.loc[df['entity_subtype'] == 'ENTRANCE_DOOR', 'entity_subtype'] = 'DOOR'

    # Put all the other functions under one name
    categories_to_other = ['SHAFT', 'NOT_DEFINED']
    for category in categories_to_other:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'OTHER'

    # Put all the void functions under one name
    categories_to_void = ['OUTDOOR_VOID', 'LIGHTWELL', 'VOID']
    for category in categories_to_void:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'VOID'

    # Put all the circulation functions under one name
    categories_to_circulation = ['ELEVATOR', 'CORRIDORS_AND_HALLS', 'ELEVATOR_FACILITIES', 'STAIRCASE']
    for category in categories_to_circulation:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'CIRCULATION'

    # Put all the public functions under one name
    categories_to_public = ['BASEMENT_COMPARTMENT', 'OFFICE', 'PRAM', 'PRAM_AND_BIKE_STORAGE_ROOM', 
                            'BIKE_STORAGE', 'COUNTER_ROOM', 'BASEMENT', 'TECHNICAL_AREA', 'HEATING',  'WASH_AND_DRY_ROOM',
                            'CLOAKROOM', 'SALESROOM', 'GARAGE', 'OIL_TANK', 'HOUSE_TECHNICS_FACILITIES', 'OFFICE_SPACE',
                            'OFFICE_TECH_ROOM', 'WAREHOUSE', 'CARPARK', 'SANITARY_ROOMS', 'OPEN_PLAN_OFFICE', 'MEETING_ROOM',
                            'BREAK_ROOM', 'ARCHIVE', 'ELECTRICAL_SUPPLY', 'MEDICAL_ROOM', 'WAITING_ROOM', 'COMMON_KITCHEN',
                            'VEHICLE_TRAFFIC_AREA', 'AIR', 'FACTORY_ROOM', 'RECEPTION_ROOM', 'COMMUNITY_ROOM', 'WORKSHOP',
                            'CANTEEN', 'SHELTER', 'COLD_STORAGE', 'TRANSPORT_SHAFT', 'RADATION_THERAPY', 'PHYSIO_AND_REHABILITATION',
                            'WATER_SUPPLY', 'DEDICATED_MEDICAL_ROOM', 'SPORTS_ROOMS', 'SHOWROOM', 'GAS', 'TEACHING_ROOM', 'ARCADE',
                            'LOGISTICS', 'OPERATIONS_FACILITIES', 'LOBBY', 'FOYER']
    for category in categories_to_public:
        df.loc[df['entity_subtype'] == category, 'entity_subtype'] = 'PUBLIC'

    return df

In [None]:
df_geom = process_dataframe(df_geom)
print(df_geom['entity_subtype'].unique())

In [None]:
df_geom.head()

In [None]:
df_res = process_dataframe(df_res)

subtypes_to_exclude = ['OTHER', 'VOID', 'OUTDOOR_SPACE']
df_res = df_res[~df_res['entity_subtype'].isin(subtypes_to_exclude)]

print(df_res['entity_subtype'].unique())

In [None]:
df_sim = process_dataframe(df_sim)

subtypes_to_exclude = ['OTHER', 'VOID', 'OUTDOOR_SPACE']
df_sim = df_sim[~df_sim['entity_subtype'].isin(subtypes_to_exclude)]

print(df_sim['entity_subtype'].unique())

Create color code & sort dataframe

In [None]:
# Define the desired sort order as a list with corresponding colours
# Create a dictionary to map categories to their corresponding colors
category_to_color = {'ROOM':            '#F7D08A',  # sunset / light yellow
                     'WALL':            '#F6F5F4',  # smoke white
                     'DOOR':            '#E5DEDC',  #timerwolf
                     'OUTSIDE_DOOR':    '#94BDAA',  #cambridge blue
                     'WINDOW':          '#B5E3F1',  #light blue
                    }

# Extract the categories from the data and map them to their corresponding colors
categories = list(category_to_color.keys())
colors = [category_to_color[category] for category in categories]

# Create a color map from the used colors
color_map = ListedColormap(colors)

In [None]:
# Define the desired sort order as a list with corresponding colours
# Create a dictionary to map categories to their corresponding colors
category_to_color1 = {'BALCONY':     'whitesmoke',
                      'LOGGIA':     'whitesmoke',
                      'WALL':        'grey',
                      'DOOR':        'lightgrey',
                      'WINDOW':      'lightgrey'}

# Extract the categories from the data and map them to their corresponding colors
categories1 = list(category_to_color1.keys())
colors_grey = [category_to_color1[category] for category in categories1]

# Create a color map from the used colors
color_map_daylight = ListedColormap(colors_grey)

2nd round of cleaning datafream

---



Create 2 dataframes, one with only residential types, and one with also the
public functions present
And change dataframes to Geopandas dataframe with usable geometry column

In [None]:
#Create geoseries with polygons -> change wkt to polygon
# Check if the 'geometry' column is already a GeoSeries object
if df_res['geometry'].dtype == 'geometry':
  gs_res = df_res['geometry']
  gs_geom = df_geom['geometry']
else:
  gs_res = gpd.GeoSeries.from_wkt(df_res['geometry'])
  gs_geom = gpd.GeoSeries.from_wkt(df_geom['geometry'])

#Create new Geodataframes with polygons
gdf_res = gpd.GeoDataFrame(df_res, geometry=gs_res, crs=None)
gdf_geom = gpd.GeoDataFrame(df_geom, geometry=gs_geom, crs=None)

#Create dataframe - simulation results

In [None]:
df_sim.head(2)

Create dataframe with view results

In [None]:
# Select only desired columns for view dataframe
df_view = df_sim.loc[:, ['area_id', 'apartment_id', 'entity_subtype', 'view_site_mean',
                         'view_ground_mean', 'view_landscape_nature_mean', 'view_landscape_urban_mean', 'view_sky_mean']]

df_view.head()

Get the min and max values out of the simulation data, so the greyscales can be adjusted to this

In [None]:
# Get the min and max values of each column
view_cols = ['view_site_mean', 'view_ground_mean', 'view_landscape_nature_mean', 'view_landscape_urban_mean', 'view_sky_mean']
min_max_list = [(col, df_view[col].min(), df_view[col].max()) for col in view_cols]

# Create a new dataframe with column name, min, and max
min_max_df_view = pd.DataFrame(min_max_list, columns=['column_name', 'min', 'max'])
# Round 'min' and 'max' columns to one decimal place
min_max_df_view['min'] = min_max_df_view['min'].round(1)
min_max_df_view['max'] = min_max_df_view['max'].round(1)

min_max_df_view

In [None]:
df_daylight = df_sim.loc[:, ['area_id', 'apartment_id', 'entity_subtype', 'daylight_21Mar1200_median', 'daylight_21Jun1200_median', 'daylight_21Dec1200_median']]
df_daylight.head()

In [None]:
# Get the min and max values of each column
daylight_cols = ['daylight_21Mar1200_median', 'daylight_21Jun1200_median', 'daylight_21Dec1200_median']
min_max_list = [(col, df_daylight[col].min(), df_daylight[col].max()) for col in daylight_cols]

# Create a new dataframe with column name, min, and max
min_max_df_daylight = pd.DataFrame(min_max_list, columns=['column_name', 'min', 'max'])

min_max_df_daylight

#Definitions

In [None]:
def room_info(gdf_res, area_id):
  #locate the specific apartement in the dataframe
  gdf_room = gdf_res.loc[gdf_res["area_id"]== area_id]

  app_id = gdf_room['apartment_id']

  #Find the corresponding site, building, floor and unit id
  room_code_columns = ['site_id', 'building_id', 'floor_id', 'unit_id']
  room_code = [int(val) for col in room_code_columns for val in gdf_room[col].unique()]
  room_code_values = room_code + [app_id]

  #Create one string for the apartment code
  room_code_str = '-'.join(str(e) for e in room_code_values)

  #Add in the app_id to have all the info in one place
  room_code.append(app_id)

  return gdf_room, room_code, room_code_str

In [None]:
def floor_info(gdf_geom, app_id):
    # Create gdf_floor by filtering gdf_geom
    app_data = gdf_geom[gdf_geom['apartment_id'] == app_id].iloc[0]
    floor_id = app_data['floor_id']
    building_id = app_data['building_id']
    elevation_id = app_data['elevation']

    #generate dataframes with floor geometry
    gdf_floor = gdf_geom[gdf_geom['floor_id'] == floor_id].copy()
    gdf_outdoor_space = gdf_floor[gdf_floor['entity_subtype'] == 'OUTDOOR_SPACE']


    #Find the higher floors, first find the corresponding building 
    gdf_building_area = gdf_geom[(gdf_geom['building_id'] == building_id) & (gdf_geom['entity_type'] == 'area')].copy()

    # Get unique elevation values from the filtered DataFrame and sort them
    building_elevations_sorted = sorted(gdf_building_area['elevation'].unique())

    # Initialize lists to store elevation values and corresponding floor IDs
    higher_elevations = []
    higher_floor_ids = []

    # Check if the current elevation is not the highest value
    if elevation_id < max(building_elevations_sorted):
        # Iterate through the sorted unique elevations list
        for elevation in building_elevations_sorted:
            # Check if the elevation is higher than the current elevation
            if elevation > elevation_id:
                # Found a higher elevation
                higher_elevations.append(elevation)
        
        # Find the floor IDs of the floors above the current floor
        higher_floor_ids = gdf_building_area.loc[gdf_building_area['elevation'].isin(higher_elevations), 'floor_id'].tolist()

    #generate dataframes with floor geometry
    gdf_higher_floors = gdf_geom[gdf_geom['floor_id'].isin(higher_floor_ids)].copy()
    gdf_overhang = pd.DataFrame()

    if not gdf_higher_floors.empty:
        #create a buffer to exclude small imperfections in geometry
        gdf_floor_buffered = gdf_floor.copy()
        gdf_floor_buffered['geometry'] = gdf_floor['geometry'].buffer(0.01)

        #Find the difference between the current floor and the floors above
        gdf_overhang = gpd.overlay(gdf_higher_floors, gdf_floor_buffered, how='difference')

    return gdf_floor, gdf_outdoor_space, gdf_overhang

In [None]:
def bbox_rect(gdf_room):
  #Create a dataframe with only rooms and create a small buffer for geometry
  gdf_room_area = gdf_room[gdf_room['entity_subtype'] == 'ROOM']

  #Take the unary union of the buffered polygons and create the minimum rotated rectangle
  poly_rooms = gdf_room_area["geometry"].unary_union
  gdf_poly_rooms = gpd.GeoDataFrame(geometry=[poly_rooms])
  rect_rooms = poly_rooms.minimum_rotated_rectangle

  #Calculate the current dimensions of the rectangle in meters
  x_info, y_info = rect_rooms.exterior.xy
  x_distance = max(x_info) - min(x_info)
  y_distance = max(y_info) - min(y_info)

  #Scale the rectangle tothe specified unit size = 15x15
  xfact = 15 / x_distance
  yfact = 15 / y_distance
  rect_rooms_buffered = affinity.scale(rect_rooms, xfact=xfact, yfact=yfact)

  #Create a bounding box around the rect_rooms_buffered with size 15x15
  rect = box(*rect_rooms_buffered.bounds)
  gdf_rect = gpd.GeoDataFrame(geometry=[rect])

  # Calculate the environmental circles
  rect_centroid = rect.centroid
  img_centre = (rect_centroid.x, rect_centroid.y)

  return gdf_rect, img_centre

# Visualization Swiss dataset

In [None]:
# Set the path to the directory where you want to save the plot
path_png = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5_png'
path_npy_pred = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.pred_npy'
path_npy0 = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.0_npy'
path_npy1 = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.1_npy'
path_npy2 = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.2_npy'
path_npy3 = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.3_npy'
path_npy4 = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v5.4_npy'

In [None]:
'''
sites_pred = [386, 1067, 1105, 11726, 3641, 3958, 11756, 2291, 401, 194, 277, 66, 2161, 789]
gdf_res_pred = gdf_res.loc[gdf_res['site_id'].isin(sites_pred)]
unique_app_ids_pred = gdf_res_pred['apartment_id'].unique()'''

In [None]:
#exclude the already plotted apartments so the loop does not include those again 
# Use list comprehension to get a list of all image file names in the directory
image_file_names0 = [f for f in os.listdir(path_npy0) if f.endswith('.npy')]
image_file_names1 = [f for f in os.listdir(path_npy1) if f.endswith('.npy')]
image_file_names2 = [f for f in os.listdir(path_npy2) if f.endswith('.npy')]
image_file_names3 = [f for f in os.listdir(path_npy3) if f.endswith('.npy')]
image_file_names4 = [f for f in os.listdir(path_npy4) if f.endswith('.npy')]
image_file_names_pred = [f for f in os.listdir(path_npy_pred) if f.endswith('.npy')]

combined_image_file_names_training = image_file_names0 + image_file_names1 + image_file_names2 + image_file_names3 + image_file_names4
combined_image_file_names = combined_image_file_names_training + image_file_names_pred

# Extract apartment IDs
plotted_apartment_ids = [file_name.split('_')[0] for file_name in combined_image_file_names]
plotted_area_ids = [int(file_name.split('_')[1].split('.')[0]) for file_name in combined_image_file_names]
pred_apartment_ids = [file_name.split('_')[0] for file_name in image_file_names_pred]
train_apartment_ids = [apartment_id for apartment_id in plotted_apartment_ids if apartment_id not in pred_apartment_ids]

# Get unique apartment IDs and area IDs
unique_plotted_apartment_ids = list(set(plotted_apartment_ids))
unique_pred_apartment_ids = list(set(pred_apartment_ids))
unique_train_apartment_ids = list(set(train_apartment_ids))

print(f'training: app={len(unique_train_apartment_ids)}, rooms={len(combined_image_file_names_training)}')
print(f'prediction: app={len(unique_pred_apartment_ids)}, rooms={len(image_file_names_pred)}')

In [None]:
df_total = df_sim

In [None]:
tot_site = df_sim['site_id'].nunique()
tot_building = df_sim['building_id'].nunique()
tot_app = df_sim['apartment_id'].nunique()
tot_rooms = df_sim.shape[0]

In [None]:
df_train = df_sim.loc[df_sim['apartment_id'].isin(unique_train_apartment_ids)]
train_site = df_train['site_id'].nunique()
train_building = df_train['building_id'].nunique()
train_app = df_train['apartment_id'].nunique()
train_rooms = len(combined_image_file_names_training)

In [None]:
df_pred = df_sim.loc[df_sim['apartment_id'].isin(unique_pred_apartment_ids)]
pred_site = df_pred['site_id'].nunique()
pred_building = df_pred['building_id'].nunique()
pred_app = df_pred['apartment_id'].nunique()
pred_rooms = len(image_file_names_pred)

In [None]:
# Create a dictionary with your data
data_overview = {   'Category': ['Sites', 'Buildings', 'Apartments', 'Rooms'],
                    'Total _overviewset': [tot_site, tot_building, tot_app, tot_rooms],  
                    'Training Dataset': [train_site, train_building, train_app, train_rooms],  
                    'Prediction Dataset': [pred_site, pred_building, pred_app, pred_rooms]
}

# Convert the dictionary into a Pandas DataFrame
df_dataset_overview = pd.DataFrame(data_overview)
print(df_dataset_overview)

In [None]:
df_to_plot = df_sim.loc[~df_sim['apartment_id'].isin(unique_plotted_apartment_ids)]
toplot_site = df_to_plot['site_id'].nunique()
toplot_building = df_to_plot['building_id'].nunique()
toplot_app = df_to_plot['apartment_id'].nunique()

print(f'to plot: sites={toplot_site}, buildings={toplot_building}, apps={toplot_app}')

In [None]:
max_elevation = df_sim['elevation'].max()
min_elevation = df_sim['elevation'].min()
print(f'elevation: min={min_elevation}, max={max_elevation}')

max_windowfloor = df_sim['window_floor_ratio'].max()
min_windowfloor = df_sim['window_floor_ratio'].min()
print(f'window to floor ratio: min={min_windowfloor}, max={max_windowfloor}')

In [None]:
max_view_sky = df_sim['view_sky_p80'].max()
print(f'max view sky: {max_view_sky}')
max_view_ground = df_sim['view_ground_p80'].max()
print(f'max view ground: {max_view_ground}')

max_daylight_Mar = df_sim['daylight_21Mar1200_median'].max()
print(f'max daylight Mar: {max_daylight_Mar}')
max_daylight_Jun = df_sim['daylight_21Jun1200_median'].max()
print(f'max daylight Jun: {max_daylight_Jun}')
max_daylight_Dec = df_sim['daylight_21Dec1200_median'].max()
print(f'max daylight Dec: {max_daylight_Dec}')

In [None]:
# Calculate the normalized elevation column
df_total['elevation_normalized'] = (df_total['elevation'] - min_elevation) / (max_elevation - min_elevation)

# Calculate the values for the new column and replace NaN with 0
df_total['window_floor_ratio_normalized'] = df_total['window_floor_ratio'] / max_windowfloor
df_total['window_floor_ratio_normalized'].fillna(0, inplace=True)

# Normalise the dalight and sky view values
df_total['view_sky_p80_normalized'] = df_total['view_sky_p80'] / max_view_sky
df_total['view_ground_p80_normalized'] = df_total['view_ground_p80'] / max_view_ground
df_total['daylight_21Mar1200_median_normalized'] = df_total['daylight_21Mar1200_median'] / max_daylight_Mar
df_total['daylight_21Jun1200_median_normalized'] = df_total['daylight_21Jun1200_median'] / max_daylight_Jun
df_total['daylight_21Dec1200_median_normalized'] = df_total['daylight_21Dec1200_median'] / max_daylight_Dec

In [None]:
# Calculate the normalized elevation column
df_train['elevation_normalized'] = (df_train['elevation'] - min_elevation) / (max_elevation - min_elevation)

# Calculate the values for the new column and replace NaN with 0
df_train['window_floor_ratio_normalized'] = df_train['window_floor_ratio'] / max_windowfloor
df_train['window_floor_ratio_normalized'].fillna(0, inplace=True)

# Normalise the dalight and sky view values
df_train['view_sky_p80_normalized'] = df_train['view_sky_p80'] / max_view_sky
df_train['view_ground_p80_normalized'] = df_train['view_ground_p80'] / max_view_ground
df_train['daylight_21Mar1200_median_normalized'] = df_train['daylight_21Mar1200_median'] / max_daylight_Mar
df_train['daylight_21Jun1200_median_normalized'] = df_train['daylight_21Jun1200_median'] / max_daylight_Jun
df_train['daylight_21Dec1200_median_normalized'] = df_train['daylight_21Dec1200_median'] / max_daylight_Dec

In [None]:
# Calculate the normalized elevation column
df_pred['elevation_normalized'] = (df_pred['elevation'] - min_elevation) / (max_elevation - min_elevation)

# Calculate the values for the new column and replace NaN with 0
df_pred['window_floor_ratio_normalized'] = df_pred['window_floor_ratio'] / max_windowfloor
df_pred['window_floor_ratio_normalized'].fillna(0, inplace=True)

# Normalise the dalight and sky view values
df_pred['view_sky_p80_normalized'] = df_pred['view_sky_p80'] / max_view_sky
df_pred['view_ground_p80_normalized'] = df_pred['view_ground_p80'] / max_view_ground
df_pred['daylight_21Mar1200_median_normalized'] = df_pred['daylight_21Mar1200_median'] / max_daylight_Mar
df_pred['daylight_21Jun1200_median_normalized'] = df_pred['daylight_21Jun1200_median'] / max_daylight_Jun
df_pred['daylight_21Dec1200_median_normalized'] = df_pred['daylight_21Dec1200_median'] / max_daylight_Dec

In [None]:
# Create a figure and axis for the boxplots
fig, ax = plt.subplots(figsize=(18, 11))

# Define properties for the fliers (outliers)
flierprops = dict(marker='o', markerfacecolor='#808081', markersize=3, alpha=0.2)

# Set the positions for the boxplots
positions = [0.15, 0.90, 1.80, 2.55, 3.3, 4.2, 4.95]

# Define dataframes and colors
dataframes = [df_pred, df_train, df_total]
colors = ['#F7D08A', '#B0647E', '#084C61']

# Define labels
labels = ['Ground view\np80', 'Sky view\np80', 'Daylight 21 Dec\n12:00 median', 'Daylight 21 Jun\n12:00 median',
          'Daylight 21 Mar\n12:00 median', 'Window to\nFloor Ratio', 'Elevation']

# Loop through the data and create boxplots
for i, feature in enumerate(['view_ground_p80_normalized', 'view_sky_p80_normalized',
                            'daylight_21Dec1200_median_normalized', 'daylight_21Jun1200_median_normalized',
                            'daylight_21Mar1200_median_normalized', 'window_floor_ratio_normalized', 'elevation_normalized']):
    for j, df in enumerate(dataframes):
        ax.boxplot(df[feature], positions=[positions[(i)] + j * 0.2], patch_artist=True, vert=False, widths=0.15,
                   flierprops=flierprops, medianprops={'color': 'black'})
        for boxplot in ax.boxplot(df[feature], positions=[positions[i] + j*0.2], patch_artist=True, vert=False, widths=0.15,
                                  flierprops=flierprops, medianprops={'color': 'black'})['boxes']:
            boxplot.set(facecolor=colors[j])

# Set titles for the boxplots
ax.set_title('Distribution feature & label values over datasets')
positions_labels = [0.35, 1.1, 2.0, 2.75, 3.5, 4.4, 5.15]
ax.set_yticks(positions_labels)
ax.set_yticklabels(labels)
ax.set_xlim(-0.01, 1.01)
ax.set_ylim(0,5.5)
ax.set_xlabel('Normalised values')

# Create custom legend
legend_handles = []
dataset_tags = ['Total', 'Training', 'Prediction']
colors_tags = ['#084C61', '#B0647E', '#F7D08A']

for i, dataset in enumerate(dataset_tags):
    legend_handles.append(mpatches.Patch(color=colors_tags[i], label=dataset))
plt.legend(handles=legend_handles, loc='upper right', bbox_to_anchor=(1, 1.05), fontsize='small', ncol=3)

# Show the plot
boxplot_path = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_data/Boxplots_v4/'
plt.savefig(f'{boxplot_path}boxplot_dataset_featurelabel_distribution.png', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
'''
plotted_buildings = df_train['building_id'].unique()
toplot_buildings = df_to_plot['building_id'].unique()

# Find building_ids that are in toplot_buildings but not in plotted_buildings
building_ids_not_plotted = set(toplot_buildings) - set(plotted_buildings)

# If you want the result as a list, you can convert the set back to a list
building_ids_not_plotted_list = list(building_ids_not_plotted)
df_filtered_to_plot = df_to_plot[df_to_plot['building_id'].isin(building_ids_not_plotted_list)]
# Print the result
print(building_ids_not_plotted_list)'''

In [None]:
'''
plotted_sites = df_train['site_id'].unique()
toplot_sites = df_to_plot['site_id'].unique()

# Find site_ids that are in toplot_sites but not in plotted_sites
site_ids_not_plotted = set(toplot_sites) - set(plotted_sites)

# If you want the result as a list, you can convert the set back to a list
site_ids_not_plotted_list = list(site_ids_not_plotted)
df_filtered_to_plot = df_to_plot[df_to_plot['site_id'].isin(site_ids_not_plotted_list)]
# Print the result
print(site_ids_not_plotted_list)'''

In [None]:
pred_sites = [1067, 735, 983, 66, 1105, 3958, 1026, 194, 2133, 2291, 401, 1024,
                386, 11726, 277, 11749, 11756, 789, 2161, 3641]

#df_filtered_to_plot = df_to_plot[df_to_plot['site_id'].isin(pred_sites)]

In [None]:
stop

In [None]:
# Exclude values in unique_plotted_apartment_ids from unique_app_ids
to_plot_app_ids = df_to_plot['apartment_id'].unique()
to_plot_app_ids = list(to_plot_app_ids)
print(len(to_plot_app_ids))

# Randomly select apartment IDs from to_plot_app_ids
random_selected_apartment_ids = random.sample(to_plot_app_ids, 17)

# run loop to generate room images
for app_id in random_selected_apartment_ids:
#for app_id in app_ids:
  #Get dataframe of apartment in which room lies
  gdf_apartment = gdf_res.loc[gdf_res["apartment_id"]== app_id]
  gdf_apartment_doors = gdf_res.loc[(gdf_res["apartment_id"] == app_id) & (gdf_res["entity_subtype"] == 'DOOR')]
  #print(app_id)

  #find current and next floor_id
  gdf_floor, gdf_outdoor_space, gdf_overhang = floor_info(gdf_geom, app_id)
  
  # Check if there is any geometry in the 'difference' result
  if not gdf_overhang.empty:
    gdf_overhang_buffered = gdf_overhang.copy()
    gdf_overhang_buffered['geometry'] = gdf_overhang['geometry'].buffer(0.01)
    union_overhang = gdf_overhang_buffered.unary_union

  #for area_id in apartment:
  for area_id in gdf_apartment['area_id'].unique():
    #locate the specific apartement in the dataframe
    gdf_room = gdf_apartment.loc[gdf_apartment["area_id"]== area_id]
  
    # check if the dataframe is not empty
    if not gdf_room.empty:
      #Generate the full room geometry
      # Check if 'door_connection1' or 'door_connection2' is equal to area_id
      mask = (gdf_apartment_doors['door_connection1'] == area_id) | (gdf_apartment_doors['door_connection2'] == area_id)

      # Add the matching rows to gdf_room_doors
      gdf_room_doors = gdf_apartment_doors[mask]
      gdf_room = pd.concat([gdf_room, gdf_room_doors], ignore_index=False)
      
      # Update 'entity_subtype' to 'ROOM' when it's not equal to 'window', 'door', or 'outside_door'
      gdf_room.loc[~gdf_room['entity_subtype'].isin(['WINDOW', 'DOOR', 'OUTSIDE_DOOR']), 'entity_subtype'] = 'ROOM'

      #find properties about view simulations of room
      df_sim_room = df_sim.loc[(df_sim["area_id"] == area_id) & (df_sim["apartment_id"] == app_id)]
      if not df_sim_room.empty:
        # Set the figure size in inches to match the desired pixel size (224x224)
        fig, ax = plt.subplots(figsize=(10, 10), dpi=100)

        #Plot the above floors
        if not gdf_overhang.empty:
          gpd.GeoSeries([union_overhang]).plot(ax=ax, color='#E0DFDE')

        #plot the floor of the apartment
        gdf_floor.plot(ax=ax, color='#F6F5F4')
        
        #plot the outdoor spaces
        if not gdf_outdoor_space.empty:
          gdf_outdoor_space.plot(ax=ax, color='#ECEAE9')
        
        #plot the room geomerty 
        gdf_room.plot(column="entity_subtype", cmap=color_map, legend=False, ax=ax, legend_kwds={'loc': 'lower right'}, categories=categories)
        
        #Find boundary rectangle of 15x15 meters
        gdf_rect, img_centre = bbox_rect(gdf_room)
        #gdf_rect.boundary.plot(ax=ax, color='grey', linestyle=(5, (10, 3)), linewidth=1)
        
        #Get information about view landscape layer
        view_nature = df_sim_room['view_landscape_nature_mean'].iloc[0]    
        view_urban = df_sim_room['view_landscape_urban_mean'].iloc[0]
        view_landscape = view_nature + view_urban
        
        # Only plot the diagram if the landscape layer is visible according to research 
        if view_landscape > 0.477: 
          #print(view_landscape)
          #Get data for circle
          radius = view_landscape/2.5
          split_angle = ((view_nature/view_landscape) * 360) + 90

          #print(f'total: {view_landscape}, nature: {view_nature} - {view_nature/view_landscape}%, urban: {view_urban}')

          #Plot the view_landscape circle
          circle_white = Wedge(center=img_centre, r=radius, theta1=0, theta2=360, facecolor='white', alpha=0.3)
          ax.add_patch(circle_white)
          circle_nature = Wedge(center=img_centre, r=radius, theta1=90, theta2=split_angle, facecolor='#AACFB5', alpha=0.4)
          ax.add_patch(circle_nature)
          circle_urban = Wedge(center=img_centre, r=radius, theta1=split_angle, theta2=450, facecolor='#CF8BA3', alpha=0.4)
          ax.add_patch(circle_urban)


        #set image settings
        bounds = gdf_rect.total_bounds
        ax.set_xlim(bounds[0], bounds[2])
        ax.set_ylim(bounds[1], bounds[3])
        ax.axis('off')

        #Remove the borders of the plot
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)


        # Set the filename based on the site_id, app_id, and area_id
        area_id1 = area_id.astype(int)
        filename_png = f"{app_id}_{area_id1}.png"
        filename_npy = f"{app_id}_{area_id1}.npy"
        
        # Save the plot to a PNG file with the specified filename and path
        plt.savefig(os.path.join(path_png, filename_png), bbox_inches='tight', pad_inches=0, dpi=200)
        plt.close(fig)

        # Load the PNG image and save it as a NumPy array in the same folder
        image_path = os.path.join(path_png, filename_png)
        
        # Open the image file using a with statement to ensure it's closed properly
        with Image.open(image_path) as image:
            # Resize the image to the needed size
            new_size = (224, 224)
            image = image.convert('RGB')
            resized_image = image.resize(new_size)

        #create and save the numpy array
        image_array = np.array(resized_image)
        numpy_save_path = os.path.join(path_npy_pred, filename_npy)
        np.save(numpy_save_path, image_array)

      else:
        print('No simulation data')
    else:
      print('Empty room')

In [None]:
# Define the path to the NumPy file you want to load
path_npy_app = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/10_Images_04Dataset_layouts/05_Room_Feature_v4_npy/3c3b1d6ca8b4b9092480b8c75f9eaa81_619322.npy'

# Load the NumPy array
loaded_array = np.load(path_npy_app)
print(loaded_array.shape)
# Create a Matplotlib figure and axis
fig, ax = plt.subplots()

# Plot the loaded NumPy array (assuming it represents an image)
im = ax.imshow(loaded_array)

# Show the plot
plt.tight_layout()
plt.show()
