##Import libraries

In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import os
import math

import shapely.geometry as sg
import shapely.wkt
from shapely.geometry import Polygon, MultiPolygon, box, LineString, Point
from shapely.ops import transform , unary_union, cascaded_union, linemerge
from shapely import affinity

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import ListedColormap, Normalize, BoundaryNorm, TwoSlopeNorm
from matplotlib.collections import PatchCollection
from matplotlib.cm import ScalarMappable, RdYlGn

from skimage import measure

import random
from PIL import Image

from ipywidgets import widgets
import time
import itertools

## Import dataset

Create dataframes for geometry and simulation results

In [None]:
#Create geometry dataframe
#Call CSV file of dataset, and import dataset using Pandas
path_geom = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/SwissDataset_v3.0.0_geometries.csv'
Swiss_geom = pd.read_csv(path_geom)

#Create (pandas) dataframe
df_geom_tot = pd.DataFrame(Swiss_geom)

In [None]:
#Create simulation dataframe
#Call CSV file of dataset, and import dataset using Pandas
path_sim = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/SwissDataset_v3.0.0_simulations.csv'
Swiss_sim = pd.read_csv(path_sim)

#Create (pandas) dataframe
df_sim = pd.DataFrame(Swiss_sim)

In [None]:
#Drop duplicates
df_sim = df_sim.drop_duplicates()
df_geom_tot = df_geom_tot.drop_duplicates()

#Filter out features
df_geom_tot = df_geom_tot[df_geom_tot["entity_type"] != "feature"]

## Remove the apartments with multiple floors -> outside scope of thesis
# Count the number of unique unit_ids per apartment_id
apartment_unit_counts = df_geom_tot.groupby('apartment_id')['unit_id'].nunique()

# Get the apartment_ids with multiple unique unit_ids
apartments_with_multiple_floors = apartment_unit_counts[apartment_unit_counts > 1].index

# Remove rows with apartment_ids that have multiple unique unit_ids
df_geom_tot = df_geom_tot[~df_geom_tot['apartment_id'].isin(apartments_with_multiple_floors)]

In [None]:
#Reduce the number of varables in entity_subtype, separators
df_geom_tot.loc[df_geom_tot['entity_subtype'] == 'COLUMN', 'entity_subtype'] = 'WALL'
df_geom_tot.loc[df_geom_tot['entity_subtype'] == 'ENTRANCE_DOOR', 'entity_subtype'] = 'DOOR'

# Put all the outdoor spaces under one name
categories_to_outdoor = ['BALCONY', 'LOGGIA', 'TERRACE', 'WINTERGARTEN', 'PATIO', 'GARDEN', 'RAILING']
for category in categories_to_outdoor:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'OUTDOOR_SPACE'
df_geom_tot.loc[df_geom_tot['entity_subtype'] == 'RAILING', 'entity_subtype'] = 'OUTDOOR_SPACE'

# Put all the living space functions under one name
categories_to_living = ['LIVING_DINING', 'DINING', 'KITCHEN_DINING']
for category in categories_to_living:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'DINING'

#Reduce the number of varables in entity_subtype, separators
df_geom_tot.loc[df_geom_tot['entity_subtype'] == 'COLUMN', 'entity_subtype'] = 'WALL'
df_geom_tot.loc[df_geom_tot['entity_subtype'] == 'ENTRANCE_DOOR', 'entity_subtype'] = 'DOOR'

# Put all the other functions under one name
categories_to_other = ['SHAFT', 'NOT_DEFINED']
for category in categories_to_other:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'OTHER'

# Put all the other functions under one name
categories_to_void = ['OUTDOOR_VOID', 'LIGHTWELL', 'VOID']
for category in categories_to_void:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'VOID'

# Put all the circulation functions under one name
categories_to_circulation = ['ELEVATOR', 'CORRIDORS_AND_HALLS', 'ELEVATOR_FACILITIES', 'STAIRCASE']
for category in categories_to_circulation:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'CIRCULATION'

# Put all the public functions under one name
categories_to_public = ['BASEMENT_COMPARTMENT', 'OFFICE', 'PRAM', 'PRAM_AND_BIKE_STORAGE_ROOM', 
                        'BIKE_STORAGE', 'COUNTER_ROOM', 'BASEMENT', 'TECHNICAL_AREA', 'HEATING',  'WASH_AND_DRY_ROOM',
                        'CLOAKROOM', 'SALESROOM', 'GARAGE', 'OIL_TANK', 'HOUSE_TECHNICS_FACILITIES', 'OFFICE_SPACE',
                        'OFFICE_TECH_ROOM', 'WAREHOUSE', 'CARPARK', 'SANITARY_ROOMS', 'OPEN_PLAN_OFFICE', 'MEETING_ROOM',
                        'BREAK_ROOM', 'ARCHIVE', 'ELECTRICAL_SUPPLY', 'MEDICAL_ROOM', 'WAITING_ROOM', 'COMMON_KITCHEN',
                        'VEHICLE_TRAFFIC_AREA', 'AIR', 'FACTORY_ROOM', 'RECEPTION_ROOM', 'COMMUNITY_ROOM', 'WORKSHOP',
                        'CANTEEN', 'SHELTER', 'COLD_STORAGE', 'TRANSPORT_SHAFT', 'RADATION_THERAPY', 'PHYSIO_AND_REHABILITATION',
                        'WATER_SUPPLY', 'DEDICATED_MEDICAL_ROOM', 'SPORTS_ROOMS', 'SHOWROOM', 'GAS', 'TEACHING_ROOM', 'ARCADE',
                        'LOGISTICS', 'OPERATIONS_FACILITIES', 'LOBBY', 'FOYER']
for category in categories_to_public:
    df_geom_tot.loc[df_geom_tot['entity_subtype'] == category, 'entity_subtype'] = 'PUBLIC'

print(df_geom_tot['entity_subtype'].unique())

In [None]:
res_room_type = ['BATHROOM', 'LIVING_ROOM', 'ROOM', 'KITCHEN', 'CORRIDOR', 'DINING', 'STOREROOM', 'BEDROOM', 'STUDIO']

#Create dataframe - geometry

In [None]:
#Get all the unique site_ids in the dataframe
unique_site_ids = df_geom_tot['site_id'].unique().tolist()

# Specify the number of unique site IDs per sublist -> 100 sites cause this is what's possible to loop through in one day
num_per_sublist = 100

# Create separate lists for each subset
for i in range(0, len(unique_site_ids), num_per_sublist):
    sublist = unique_site_ids[i:i+num_per_sublist]
    sublist_name = f"site_ids_{i+num_per_sublist}"
    globals()[sublist_name] = sublist

    # Print the current subset
    print(f"Subset: {sublist_name} -> {sublist}")

In [None]:
# Assign site_ids_100 to the sublist variable
number = 700
sublist = site_ids_700

# Filter the DataFrame to include only rows with site_ids_100
df_geom = df_geom_tot[df_geom_tot['site_id'].isin(sublist)]

# show the filtered DataFrame
df_geom.nunique()

In [None]:
begin_num = number - num_per_sublist
name = f'{begin_num}-{number}'
print(name)

1st round of cleaning dataframe

In [None]:
print(df_geom['entity_subtype'].unique())

In [None]:
# set the "entity_subtype" column as a string type
df_geom['entity_subtype'] = df_geom['entity_subtype'].astype(str)

In [None]:
df_geom.tail(-2)

Create color code & sort dataframe

In [None]:
# Define the desired sort order as a list with corresponding colours
# Create a dictionary to map categories to their corresponding colors
category_to_color = {
                     'OTHER':         '#377080',  # midnight green
                     'BATHROOM':      '#65949F',  # blue (munsell)                      
                     'ROOM':          '#C2DBDC',  # light blue
                     'BEDROOM':       '#C2DBDC',  # light blue
                     'VOID':          '#F1FFFA',  # mint cream

                     'LIVING_ROOM':   '#F6A96F',  # sandy brown
                     'KITCHEN':       '#F7D08A',  # sunset / light yellow
                     'DINING':        '#f7bd7d',  # fawn / light orange
                     'STOREROOM':     '#F3DFAA',  # vanilla

                     'CIRCULATION':   '#B0647E',  # china rose
                     'CORRIDOR':      '#D7A6B3',  # orchid pink
                     'PUBLIC':        '#F3E2E7',  # lavender blush 
                     
                     'OUTDOOR_SPACE': '#A9D8B0',  # tea green

                     'WALL':          '#808081',  #grey
                     'DOOR':          '#d3d3d3',  #lightgrey
                     'OUTSIDE_DOOR':  '#94BDAA',  #cambridge blue
                     'WINDOW':        '#B5E3F1'   #light blue
                     }

# Extract the categories from the data and map them to their corresponding colors
categories = list(category_to_color.keys())
colors = list(category_to_color.values())

# Create a color map from the used colors
color_map = ListedColormap(colors)

2nd round of cleaning datafream

---



Create 2 dataframes, one with only residential types, and one with also the
public functions present
And change dataframes to Geopandas dataframe with usable geometry column

In [None]:
#Residential dataframe
df_res = df_geom[df_geom["unit_usage"] == "RESIDENTIAL"]
df_res['outside_connection'] = ""
df_res['door_connection1'] = ""
df_res['door_connection2'] = ""
print(df_res['entity_subtype'].unique())

In [None]:
df_res.nunique()

In [None]:
#Create geoseries with polygons -> change wkt to polygon
# Check if the 'geometry' column is already a GeoSeries object
if df_res['geometry'].dtype == 'geometry':
  gs_res = df_res['geometry']
  gs_geom = df_geom['geometry']
else:
  gs_res = gpd.GeoSeries.from_wkt(df_res['geometry'])
  gs_geom = gpd.GeoSeries.from_wkt(df_geom['geometry'])

#Create new Geodataframes with polygons
gdf_res = gpd.GeoDataFrame(df_res, geometry=gs_res, crs=None)
gdf_geom = gpd.GeoDataFrame(df_geom, geometry=gs_geom, crs=None)

In [None]:
gdf_res.head(2)

#Create dataframe - simulation results

In [None]:
#Create df with information needed out simulation cvs
df_sim_info = df_sim.loc[:, ['area_id', 'apartment_id', 'layout_area','layout_perimeter', 'layout_biggest_rectangle_length', 'layout_biggest_rectangle_width']]

# Calculate the room depth ratio
df_sim_info['room_depth_ratio'] = (df_sim_info['layout_biggest_rectangle_length'] / df_sim_info['layout_biggest_rectangle_width']).round(3)

df_sim_info.head(2)

##Create dataframe to start storing all needed information

In [None]:
# Create a new dataframe with desired columns
# Create a new dataframe with desired columns and filter conditions
df_info = gdf_res[(gdf_res['entity_type'] == 'area') | (gdf_res['entity_subtype'] == 'WINDOW') | (gdf_res['entity_subtype'] == 'DOOR')].copy()
df_info = df_info[['apartment_id', 'site_id', 'area_id', 'entity_subtype', 'geometry', 'elevation', 'height', 'outside_connection', 'door_connection1', 'door_connection2']]

# Add empty columns for info about window orientation and areas
df_info['nr_window_sides'] = ""
df_info['orientation'] = ""
df_info['orientation_percentage'] = ""
df_info['window_height'] = ""
df_info['window_length'] = ""
df_info['window_area'] = ""
df_info['wall_area'] = ""
df_info['window_wall_ratio'] = ""

df_info.head(2)

In [None]:
df_info_doors = df_info[(df_info['entity_subtype'] == 'DOOR')]
df_info_doors.head()

#Definitions window orientation

In [None]:
def point_movement(x1, y1, x2_other, y2_other):
  movement = []

  if x2_other < x1:
    movement.append('right')
  if x2_other > x1:
    movement.append('left')
  if x2_other == x1:
    movement.append('no')

  if y2_other < y1:
    movement.append('top')
  if y2_other > y1:
    movement.append('down')
  if y2_other == y1:
    movement.append('no')

  return movement

In [None]:
def find_orientation(angle, movement):
  # Angle North and South
  if -22.5 <= angle <= 22.5 or 157.5 <= angle or angle <= -157.5:
    orientation = 'South' if movement[1] == 'top' else 'North'

  # Angle East and West
  elif 67.5 <= angle <= 112.5 or -112.5 <= angle <= -67.5:
    orientation = 'East' if movement[0] == 'left' else 'West'

  # Angle N/S-East and N/S-West
  elif (22.5 <= angle <= 67.5 or -157.5 <= angle <= -112.5) and movement[0] == 'left':
    orientation = 'South-East' if movement[1] == 'top' else 'North-East'
  elif (22.5 <= angle <= 67.5 or -157.5 <= angle <= -112.5) and movement[0] == 'right':
    orientation = 'South-West' if movement[1] == 'top' else 'North-West'

  # Angle N/S-East and N/S-West
  elif (112.5 <= angle <= 157.5 or -67.5 <= angle <= -22.5) and movement[0] == 'left':
    orientation = 'South-East' if movement[1] == 'top' else 'North-East'
  elif (112.5 <= angle <= 157.5 or -67.5 <= angle <= -22.5) and movement[0] == 'right':
    orientation = 'South-West' if movement[1] == 'top' else 'North-West'

  else:
    orientation = f'other_{angle}_{movement}'

  return orientation


In [None]:
def window_line_length(window):
    #create empty list to store data
    longest_lines = []

    #Get the 4 window sides of polygon
    coords = window.exterior.coords
    line1 = LineString([coords[0], coords[1]])
    line2 = LineString([coords[1], coords[2]])
    line3 = LineString([coords[2], coords[3]])
    line4 = LineString([coords[3], coords[0]])

    #Get lengths of window sides, and determines longest side = window length
    line_lengths = [round(line1.length, 5), round(line2.length, 5), round(line3.length, 5), round(line4.length, 5)]
    window_length = max(line_lengths)

    #Get geometry of lines
    lines_coords = [line1.coords, line2.coords, line3.coords, line4.coords]

    #Only store the 2 longest sides of window
    for i in range(len(line_lengths)):
        if line_lengths[i] == window_length:
            longest_lines.append(lines_coords[i])

    return window_length, longest_lines

In [None]:
def small_window_line_length(window):
    #create empty list to store data
    longest_lines = []

    #Get the 4 window sides of polygon
    coords = window.exterior.coords
    line1 = LineString([coords[0], coords[1]])
    line2 = LineString([coords[1], coords[2]])
    line3 = LineString([coords[2], coords[3]])
    line4 = LineString([coords[3], coords[0]])

    #Get lengths of window sides, and determines longest side = window length
    line_lengths = [round(line1.length, 5), round(line2.length, 5), round(line3.length, 5), round(line4.length, 5)]
    window_length = min(line_lengths)

    #Get geometry of lines
    lines_coords = [line1.coords, line2.coords, line3.coords, line4.coords]

    #Only store the 2 longest sides of window
    for i in range(len(line_lengths)):
        if line_lengths[i] == window_length:
            longest_lines.append(lines_coords[i])

    return window_length, longest_lines

#Feature creation

Connect area_ids

---
1. Find all data per apartment_id
2. Loop over rooms in apartment
3. Buffer room to find geometry that belongs to room
4. Append area_id of room to the windows and doors belonging to the room

In [None]:
#Stop here when half file

In [None]:
#To try for one apartment
#app_ids = ['0a31e9e1152f226e5104ec79b726f052', '0ec0791b6c93b251a33f6289947f1f5d', '2aa789e7bf1114b4832b65c36e67b161', '2bbae2c8871ae1b111db2974347b8d28', '05d19084d6b9d308a4cfda040ee77c2b', '26dec908c4bd5d336c67ee41f5dade93', '062b1890c079f88c174b203fe83a92f2', 'd8e7ec1637e4822c3080085ac030bed6']
#app_ids = ['d8e7ec1637e4822c3080085ac030bed6']
#for app_id in app_ids:

# Locate rows where entity_subtype is 'WINDOW' and area_id is NaN
filtered_df = gdf_res[(gdf_res['entity_subtype'] == 'WINDOW') & pd.isnull(gdf_res['area_id'])]
print(len(filtered_df['apartment_id'].unique()))

for app_id in filtered_df['apartment_id'].unique():

  #Get dataframe of apartment in which room lies, to find unique area_ids
  gdf_apartment = gdf_res.loc[gdf_res["apartment_id"]== app_id]

  # Combine area geometries and buffer them
  area_geometries = gdf_apartment[gdf_apartment['entity_type'] == 'area']['geometry']
  buffered_geometries = area_geometries.buffer(0.8)
  combined_geometry = buffered_geometries.unary_union

  # Check windows outside the combined geometry and update area_id
  mask = (gdf_apartment['entity_subtype'] == 'WINDOW') & (gdf_apartment['area_id'].isna())
  outside_windows = gdf_apartment[mask].loc[~gdf_apartment[mask].geometry.within(combined_geometry)]
  gdf_apartment.loc[outside_windows.index, 'area_id'] = 'OUTSIDE'

  #To check if it is correct on image
  #gdf_apartment.plot(column='entity_subtype', cmap=color_map, legend=False, figsize=(10,10), categories=categories)
  unique_area_ids = gdf_apartment.loc[gdf_apartment['entity_type'] == 'area', 'area_id'].unique()

  for area_id in unique_area_ids:
    #locate the specific apartement in the dataframe
    gdf_room = gdf_apartment.loc[gdf_apartment["area_id"]== area_id]

    # check if the dataframe is not empty
    if not gdf_room.empty:

      #Find all the geomerties that belong to the room
      gdf_room["geometry_buffered"] = gdf_room["geometry"].buffer(0.8)
      gdf_room_complete = gdf_apartment[gdf_apartment.geometry.within(gdf_room["geometry_buffered"].geometry.iloc[0])]

      #Connect the windows to the room with the area_id
      if gdf_apartment.loc[gdf_apartment["area_id"]== area_id, 'entity_subtype'].isin(res_room_type).any():
        gdf_room_complete.loc[gdf_room_complete['entity_subtype'] == 'WINDOW', 'area_id'] = area_id

      # Add area_ids to the leftover windows connected to an outdoor space
      if gdf_apartment.loc[gdf_apartment["area_id"]== area_id, 'entity_subtype'].isin(['OUTDOOR_SPACE']).any():
          mask = (gdf_room_complete['entity_subtype'] == 'WINDOW') & (gdf_room_complete['area_id'].isna())
          gdf_room_complete.loc[mask, 'area_id'] = area_id


      # Connect the doors to the room with the area_id
      door_rows = gdf_room_complete.loc[gdf_room_complete['entity_subtype'] == 'DOOR']

      for index, row in door_rows.iterrows():
        if row['door_connection1'] == '':
          gdf_room_complete.at[index, 'door_connection1'] = area_id 

        elif row['door_connection2'] == '':
          gdf_room_complete.at[index, 'door_connection2'] = area_id 

        if gdf_apartment.loc[gdf_apartment["area_id"]== area_id, 'entity_subtype'].isin(['OUTDOOR_SPACE']).any():
          gdf_room_complete.at[index, 'outside_connection'] = area_id

      columns_to_update = ['area_id', 'outside_connection', 'door_connection1', 'door_connection2']

      # Update gdf_apartment, df_info, and df_res with values from gdf_room_complete
      for column in columns_to_update:
        gdf_apartment.update(gdf_room_complete[[column]])
        gdf_res.update(gdf_room_complete[[column]])
        df_info.update(gdf_room_complete[[column]])
        
        
    else:
      print('SKIP, empty')

    #To check values per apartment
    #print(gdf_apartment)

In [None]:
# Replace entity_subtype with 'OUTSIDE_DOOR' based on conditions in df_info
df_info.loc[((df_info['entity_subtype'] == 'DOOR') & (df_info['outside_connection'] != '')), 'entity_subtype'] = 'OUTSIDE_DOOR'
df_info_doors = df_info[(df_info['entity_subtype'] == 'OUTSIDE_DOOR')]

for index, row in df_info_doors.iterrows():
    if row['door_connection1'] != row['outside_connection']:
        area_id_door = row['door_connection1']
        df_info.at[index, 'area_id'] = area_id_door
    elif row['door_connection2'] != row['outside_connection']:
        area_id_door = row['door_connection2']
        df_info.at[index, 'area_id'] = area_id_door

In [None]:
# Replace entity_subtype with 'OUTSIDE_DOOR' based on conditions in df_res
gdf_res.loc[((gdf_res['entity_subtype'] == 'DOOR') & (gdf_res['outside_connection'] != '')), 'entity_subtype'] = 'OUTSIDE_DOOR'
gdf_res_doors = gdf_res[(gdf_res['entity_subtype'] == 'OUTSIDE_DOOR')]

for index, row in gdf_res_doors.iterrows():
    if row['door_connection1'] != row['outside_connection']:
        area_id_door = row['door_connection1']
        gdf_res.at[index, 'area_id'] = area_id_door
    elif row['door_connection2'] != row['outside_connection']:
        area_id_door = row['door_connection2']
        gdf_res.at[index, 'area_id'] = area_id_door

In [None]:
df_info_doors = df_info[(df_info['entity_subtype'] == 'DOOR')]

path_half_doors = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites_halfdoors.csv'
df_info_doors.to_csv(path_half_doors, index=True)

In [None]:
df_info = df_info[(df_info['entity_subtype'] != 'DOOR')]
print(df_info['entity_subtype'].unique())

In [None]:
# Save the DataFrame to an Excel CSV file
path_half = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites_half.csv'
df_info.to_csv(path_half, index=True)

In [None]:
HARD STOP -> manual check

In [None]:
#1. First check
df_info_uncomplete = df_info.loc[df_info['area_id'].isna()]
unsucessful_ids = df_info_uncomplete['apartment_id'].unique()
print(len(unsucessful_ids))
unsucessful_ids_15 = unsucessful_ids[:15]

wrong_ids_list = []

fig_size = (8, 8)  # Specify the figure size here

# Define button click event handlers with closures
def create_handler(wrong_code):
    def on_button_clicked(b):
        wrong_ids_list.append(wrong_code)

    return on_button_clicked

# Define a function to process apartments
def process_apartments():
    for app_id in unsucessful_ids_15:
        # Get dataframe of apartment in which room lies, to find unique area_ids
        gdf_apartment = gdf_res.loc[gdf_res["apartment_id"] == app_id]
        room_wrong_geom = df_info_uncomplete[df_info_uncomplete['apartment_id'] == app_id]

        nan_count = room_wrong_geom.shape[0]
        outside_count = df_info[(df_info['apartment_id'] == app_id) & (df_info['area_id'] == 'OUTSIDE')].shape[0]

        fig, ax = plt.subplots(figsize=fig_size)  # Set the figure size

        gdf_apartment.plot(column='entity_subtype', cmap=color_map, legend=False, ax=ax, categories=categories)
        
        # Plot df_info_uncomplete in red
        room_wrong_geom.plot(ax=ax, color='red')

        plt.title(f"{app_id} ({nan_count} NaN, {outside_count} outside)")

        # Create button widgets
        wrong1_button = widgets.Button(description="shared window/door")
        wrong2_button = widgets.Button(description="outside apartment")
        wrong3_button = widgets.Button(description="other problem")

        # Attach button click event handlers
        wrong1_button.on_click(create_handler('wrong1'))
        wrong2_button.on_click(create_handler('wrong2'))
        wrong3_button.on_click(create_handler('wrong3'))

        # Create an output widget to display the result
        output = widgets.Output()

        # Display the buttons and output widget
        display(widgets.HBox([wrong1_button, wrong2_button, wrong3_button]))
        display(output)

        # Display the plot
        plt.show()

# Run the function to process apartments
process_apartments()

In [None]:
print(wrong_ids_list)
#print(unsucessful_ids_15)

wrong1_ids = [] #shared windows
wrong2_ids = [] #outside windows
wrong3_ids = [] #other reason why wrong

for wrong_code, app_id in zip(wrong_ids_list, unsucessful_ids_15):
    if wrong_code == 'wrong1':
        wrong1_ids.append(app_id)
    elif wrong_code == 'wrong2':
        wrong2_ids.append(app_id)
    elif wrong_code == 'wrong3':
        wrong3_ids.append(app_id)

print('wrong1', wrong1_ids)
print('wrong2', wrong2_ids)
print('wrong3', wrong3_ids)

In [None]:
mask1 = (df_info['apartment_id'].isin(wrong1_ids)) & ((df_info['entity_subtype'] == 'WINDOW') | (df_info['entity_subtype'] == 'OUTSIDE_DOOR')) & (df_info['area_id'].isna())
df_info.loc[mask1, 'area_id'] = 'SHARED'

mask2 = (df_info['apartment_id'].isin(wrong2_ids)) & ((df_info['entity_subtype'] == 'WINDOW') | (df_info['entity_subtype'] == 'OUTSIDE_DOOR')) & (df_info['area_id'].isna())
df_info.loc[mask2, 'area_id'] = 'OUTSIDE'

mask3 = (df_info['apartment_id'].isin(wrong3_ids)) & ((df_info['entity_subtype'] == 'WINDOW') | (df_info['entity_subtype'] == 'OUTSIDE_DOOR')) & (df_info['area_id'].isna())
df_info.loc[mask3, 'area_id'] = 'WRONG'

In [None]:
# Save half way before beginnng next part
path_half = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites_half.csv'
df_info.to_csv(path_half, index=True)

Window length & orientation

---
1. Find all data per apartment_id
2. Loop over rooms in apartment based on area_id
3. Loop over all windows in room
4. Find longest line of window = window_length
5. Calculate window angle
6. Find movement between 2 longest window lines
7. Find correct orientation based on angle and movement
8. Append window_length & window_orientation to df_info

In [None]:
'''
#If code does not work any more, continue from half dataframe
path_half = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites_half.csv'
df_info_half = pd.read_csv(path_half, index_col=0)

#Create (pandas) dataframe
df_info_half = pd.DataFrame(df_info_half)

# Add empty columns for info about window orientation and areas
df_info = df_info_half

df_info['nr_window_sides'] = ""
df_info['orientation'] = ""
df_info['orientation_percentage'] = ""
df_info['window_height'] = ""
df_info['window_length'] = ""
df_info['window_area'] = ""
df_info['wall_area'] = ""
df_info['window_wall_ratio'] = ""


columns_to_update = ['area_id', 'outside_connection', 'door_connection1', 'door_connection2']

# Update gdf_apartment, df_info, and df_res with values from gdf_room_complete
for column in columns_to_update:
    gdf_res.update(df_info[[column]]) 
    
gdf_res.drop_duplicates() '''

In [None]:
#To try for one apartment
#app_ids = ['0a31e9e1152f226e5104ec79b726f052', '0ec0791b6c93b251a33f6289947f1f5d', '2aa789e7bf1114b4832b65c36e67b161', '2bbae2c8871ae1b111db2974347b8d28', '05d19084d6b9d308a4cfda040ee77c2b', '26dec908c4bd5d336c67ee41f5dade93', '062b1890c079f88c174b203fe83a92f2', 'd8e7ec1637e4822c3080085ac030bed6']
#app_ids = ['f7f2c0f9721bca63a675d423050df29a']

window_subtypes = ['WINDOW', 'OUTSIDE_DOOR']

# Locate rows where entity_subtype is 'WINDOW' and area_id is NaN
filtered_df = df_info[(df_info['entity_subtype'].isin(window_subtypes)) & (df_info['orientation'] == '')]
print(len(filtered_df['apartment_id'].unique()))

#for app_id in app_ids:
for app_id in filtered_df['apartment_id'].unique():
  #print(app_id)

  #Get dataframe of apartment in which room lies, to find unique area_ids
  gdf_apartment = gdf_res.loc[gdf_res["apartment_id"]== app_id]
  gdf_apartment_area = gdf_apartment.loc[gdf_apartment["entity_type"]== 'area']
  unique_areas = gdf_apartment_area['area_id'].unique()
  #print(len(set(unique_areas)))

  #To check on image
  #gdf_apartment.plot(column='entity_subtype', cmap=color_map, legend=False, figsize=(10,10), categories=categories)
  
  for area_id in unique_areas:
    #print(area_id)

    #locate the specific room in the dataframe
    gdf_room = gdf_apartment.loc[gdf_apartment["area_id"]== area_id]

    # check if the dataframe is not empty and only continue for (bed)rooms, kitchens, bathrooms, living rooms, corridors and stoarge rooms
    if ~gdf_room.empty or gdf_room.loc[gdf_room["area_id"]== area_id, 'entity_subtype'].isin(res_room_type).any():

      #get geomerty of windows and room
      window_polygons = gdf_room.loc[(df_info['entity_subtype'].isin(window_subtypes)), 'geometry']
      room_polygons = gdf_room.loc[gdf_room['entity_type'] == 'area', 'geometry']
      
      if not window_polygons.empty:
        #print('use')

        #To check on image
        #fig, ax = plt.subplots()
        
        # Buffer room polygon to ensure capturing intersecting window line
        buffered_room = room_polygons.buffer(0.05)

        #iterate over windows in the room
        for index, window in window_polygons.iteritems():
          window_length, longest_lines = window_line_length(window)

          if window_length < 0.6:
              intersection_count = 0

              for line in longest_lines:
                  line_segment = LineString(line)
                  if buffered_room.intersects(line_segment).any():
                      intersection_count += 1

              if intersection_count > 1:
                window_length, longest_lines = small_window_line_length(window)

          # check which line intersects with room polygons
          for line in longest_lines:
            #Get geometry of the line
            line_segment = LineString(line)

            #Continue only with window line on room side
            if buffered_room.intersects(line_segment).any():

              #Get window line coordinates
              x1, y1 = line_segment.coords[0]
              x2, y2 = line_segment.coords[1]

              #Calculate angle of the window line
              angle = round(math.degrees(math.atan2((y2-y1), (x2-x1))),2)

              #Get line coordinates of other line
              other_line_coords = [c for c in longest_lines if c != line]
              x2_other, y2_other = other_line_coords[0][1]

              #Find to movement of other point to window line point
              movement = point_movement(x1, y1, x2_other, y2_other)

              #determine window orientation based on angle and movement
              orientation = find_orientation(angle, movement)

              mask = ((df_info.index == index) & (df_info['area_id'] == area_id) & (df_info['entity_subtype'].isin(['WINDOW', 'OUTSIDE_DOOR'])))

              df_info.loc[mask, 'orientation'] = orientation
              df_info.loc[mask, 'window_length'] = window_length

              '''
              #To check on image
              print(angle, "|", movement, "|", orientation)

              plt.plot(*line_segment.xy, color='r', alpha=0.5)
              room_polygons.plot(ax=ax, color='gray')

              plt.plot(x1, y1, marker='o', color='green', markersize=2)  # plot the first point in green
              plt.plot(x2_other, y2_other, 'bo', markersize=1)  # plot the other point in blue '''

        #To check on image
        #plt.show()
      else:
        print('no windows')
    else:
      print('skip')

Window area

---
1. Make window_length numeric
2. Calculate actual window height
3. Calculate window area



In [None]:
#1. Convert 'window_length' columns to numeric
df_info['window_length'] = pd.to_numeric(df_info['window_length'], errors='coerce')


#2. Calculate actual window height
  # -> elevation of room - elevation of window = how much higher the window starts from floor
  # -> height in window row - start  window = window_height

# Filter rows where entity_subtype is not equal to 'WINDOW' and copy elevation vlaue of room to floor_height
filtered_df = df_info[~df_info['entity_subtype'].isin(window_subtypes)][['apartment_id', 'area_id', 'elevation']]
filtered_df = filtered_df.rename(columns={'elevation': 'floor_height'})

In [None]:
# Merge filtered_df with df_info on apartment_id and area_id -> so all windows have correct floor_height of room
df_info = df_info.merge(filtered_df, on=['apartment_id', 'area_id'], how='left')

# Calculate window_height by subtracting elevation from floor_height for entity_subtype == 'WINDOW' and substracting this from window elevation
df_info['window_height'] = df_info['height'] - (df_info['elevation'] - df_info['floor_height']).round(3)

# Drop the floor_height column
df_info.drop('floor_height', axis=1, inplace=True)

#Remove the window_heights from the rows with rooms
df_info.loc[~df_info['entity_subtype'].isin(window_subtypes), ['window_height']] = np.nan

#3. Calculate the window area
#Multiply windowheight with window length
df_info['window_area'] = (df_info['height'] * df_info['window_length']).round(5)

df_info.head(2)

Room total window area

---
1. Find sum of window_area per area_id
2. Append window_area values to room rows

In [None]:
# 1. Calculate the sum of window_area per room
# Filter rows with entity_subtype = 'WINDOW' or 'OUTSIDE_DOOR'
window_rows = df_info[df_info['entity_subtype'].isin(window_subtypes)]

# Calculate sum of window_area for each room
window_area_sum = window_rows.groupby(['apartment_id', 'area_id'])['window_area'].sum().round(3)

# 2. Append window_area values to room rows
# Update the 'window_area' column for rows with entity_subtype other than 'WINDOW' or 'OUTSIDE_DOOR' = rooms
df_info.loc[~df_info['entity_subtype'].isin(window_subtypes), 'window_area'] = df_info.loc[~df_info['entity_subtype'].isin(window_subtypes)].apply(
    lambda row: window_area_sum.get((row['apartment_id'], row['area_id']), np.nan), axis=1)

df_info.head(2)

Nr. of window sides in room

---
1. Find number of unique window orientation per area_id
2. Append to rows of rooms

In [None]:
# 1. Find number of unique window orientation per area_id
# Group by apartment_id and area_id and get unique values of orientation and the number of orientation sides
num_unique_orientations = df_info[df_info['entity_subtype'].isin(window_subtypes)].groupby(['apartment_id', 'area_id'])['orientation'].nunique()

# 2. Append to rows of rooms
# Assign number of unique orientations to nr_window_sides
df_info.loc[~df_info['entity_subtype'].isin(window_subtypes), 'nr_window_sides'] = df_info.loc[~df_info['entity_subtype'].isin(window_subtypes)].apply(
    lambda row: num_unique_orientations.get((row['apartment_id'], row['area_id']), np.nan), axis=1)

# Change the NaN of the rooms without windows to zeros
df_info[['nr_window_sides', 'window_length', 'window_area']] = df_info[['nr_window_sides', 'window_length', 'window_area']].fillna(0)

# Convert 'nr_window_sides' columns to numeric
df_info['nr_window_sides'] = pd.to_numeric(df_info['nr_window_sides'], errors='coerce')

df_info.head(2)

Room orientation division

---
1. Find per room the total window_area sum per direction
2. Calculate percentage of orientation
3. For room combine orientation with correct percentage

In [None]:
# 1. Calculate the sum of orientation representation of window area per area_id
# Filter rows with entity_subtype 'WINDOW' or 'OUTSIDE_DOOR' and group by area_id and orientation
direction_sum = df_info[df_info['entity_subtype'].isin(window_subtypes)].groupby(['area_id', 'orientation'])['window_area'].sum()

# Create a new column 'direction_sum' and assign the sum of window_area for each direction per area_id
df_info['direction_sum'] = df_info.apply(lambda row: direction_sum.get((row['area_id'], row['orientation']), np.nan), axis=1)

In [None]:
# 2. Calculate the percentage of window_area per orientation
# Calculate the total window area per area_id
total_window_area = df_info.loc[~df_info['entity_subtype'].isin(window_subtypes)].groupby('area_id')['window_area'].sum()

# Calculate the percentage of direction distribution over total window area
df_info['direction_percentage'] = df_info.apply(lambda row: round(row['direction_sum'] / total_window_area[row['area_id']] * 100, 1) if row['area_id'] and row['orientation'] else np.nan, axis=1)

In [None]:
# 3. For rooms, combine orientation with correct percentage
room_orientations = {}

# Iterate over the rows to aggregate unique orientation and direction_percentage combinations
for _, row in df_info[df_info['entity_subtype'].isin(window_subtypes)].iterrows():
    if pd.notnull(row['area_id']):
        if row['area_id'] not in room_orientations:
            room_orientations[row['area_id']] = set()
        room_orientations[row['area_id']].add((row['orientation'], row['direction_percentage']))

def get_orientation_percentage_comb(row):
    if pd.notnull(row['area_id']) and row['entity_subtype'] not in ['WINDOW', 'OUTSIDE_DOOR']:
        return list(room_orientations.get(row['area_id'], []))
    else:
        return np.nan

# Create a new column 'orientation_percentage_comb' and assign the unique orientation and percentage combinations
df_info['orientation_percentage'] = df_info.apply(get_orientation_percentage_comb, axis=1)

# Convert the lists to NumPy arrays or empty arrays
df_info['orientation_percentage'] = df_info['orientation_percentage'].apply(lambda x: np.array(x) if isinstance(x, list) else np.array([]))

# Drop the direction_sum and direction_percentage columns
df_info = df_info.drop(['direction_sum', 'direction_percentage'], axis=1)

df_info.head(2)

In [None]:
'''
#To check if values are accesable for later analysis
row_5 = df_info.loc[1015]
orientation_percentage = row_5['orientation_percentage']
print(orientation_percentage)  # Print the entire value
print('')

# Access the orientations and percentages separately
orientations = orientation_percentage[:, 0]
percentages = orientation_percentage[:, 1]

print(orientations)  # Print the orientations
print(percentages)  # Print the percentages
'''

Room orientation

---
1. Find unique window orientation per area_id
2. Append to rows of rooms

In [None]:
# 1. Find unique window orientation per area_id
# Group by apartment_id and area_id and get unique values of orientation and the number of orientation sides
unique_orientations = df_info[df_info['entity_subtype'].isin(window_subtypes)].groupby(['apartment_id', 'area_id'])['orientation'].unique()

# 2. Append to rows of rooms
# Assign unique orientations to 'orientation' for rows where entity_subtype != 'WINDOW'
df_info['orientation'] = df_info.apply(lambda row: unique_orientations.get((row['apartment_id'], row['area_id']), np.nan) if row['entity_subtype'] not in ['WINDOW', 'OUTSIDE_DOOR'] else row['orientation'], axis=1)

df_info.head(2)

In [None]:
'''
#To check if values are accesable for later analysis
row_X = df_info.loc[1015]
orientations = row_X['orientation']
print(orientations)  # Print the entire value
print('')

# Access the orientations and percentages separately
for i in range(len(orientations)):
  print(orientations[i])  # Print the percentages
'''

Window to wall & room depth ratio

---
1. Converert area_id to describe outside, shared and wrong windows to numbers
2. Get total wall length & room depth ratio from df_sim_info
3. Calculate wall area
4. Calculate window to wall ratio

In [None]:
df_info['area_id'] = df_info['area_id'].replace('', np.nan)

In [None]:
# 1. Convert area_id to describe outside, shared and wrong windows to numbers
# Replace 'SHARED' with 1, 'OUTSIDE' with 2, and 'WRONG' with 3 in the area_id column
df_info['area_id'].replace(['SHARED', 'OUTSIDE', 'WRONG'], [1, 2, 3], inplace=True)

# 2. Get total wall length & room depth ratio from df_sim_info
# Merge layout_perimeter & room_depth_ratio into df_info based on apartment_id and area_id
df_info = df_info.merge(df_sim_info[['apartment_id', 'area_id', 'room_depth_ratio', 'layout_perimeter', 'layout_area']], on=['apartment_id', 'area_id'], how='left')

# 3. Calculate the wall area & drop layout_perimeter
df_info['wall_area'] = (df_info['layout_perimeter'] * df_info['height']).round(3)

# Remove the wall and depth values from the rows with windows and outside doors
df_info.loc[df_info['entity_subtype'].isin(window_subtypes), ['wall_area', 'room_depth_ratio', 'orientation_percentage', 'layout_area']] = np.nan

# 4. Calculate the window to wall ratio
df_info['window_wall_ratio'] = (df_info['window_area'] / df_info['wall_area']).round(3)
df_info['window_floor_ratio'] = (df_info['window_area'] / df_info['layout_area']).round(3)

# Set the display format for floats to 3 decimal places
pd.set_option('display.float_format', '{:.3f}'.format)

df_info.tail(2)

In [None]:
'''
# use if you started half way!!!
#If code does not work any more, continue from half dataframe
path_half_doors = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites_halfdoors.csv'
df_info_half_doors = pd.read_csv(path_half_doors, index_col=0)

#Create (pandas) dataframe
df_info_doors = pd.DataFrame(df_info_half_doors)
df_info_doors.head() '''

In [None]:
#Bring back the information about doors again
# Concatenate df_info and df_info_doors vertically
df_info_results = pd.concat([df_info, df_info_doors], ignore_index=False)
df_info_results.tail()


In [None]:
## After the df is ready remove unnecessary columns
df_info_results = df_info_results.drop(['layout_perimeter', 'height', 'outside_connection'], axis=1)

df_info_results.head()

In [None]:
# Save the DataFrame to an Excel CSV file
path_final = f'C:/Users/Name/OneDrive - Delft University of Technology/Building Technology-Thesis/Swiss Dataset/SwissDataset_v3.0.0/InfoDataCSV_v2/SwissDataset_v3.0.0_info_{name}sites.csv'
df_info_results.to_csv(path_final, index=False)