In [1]:
import base64
import requests

# OpenAI API Key
api_key = "xxxxx"

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [7]:
import pandas as pd

base_path = "clear/apartment_images/"
gt_path = 'ground_truth.csv'

# convert list of images to path
def image_list(folder_id, image_string):
    images = []
    array = image_string.split(';')
    for item in array:
        image_url = base_path + str(folder_id) + '/image_' + item + '.jpg'
        images.append(image_url)
    return images


In [29]:
# get training ids
import random
single_glazed_ids = [12, 20]
single_glazed_training_sample = random.sample(single_glazed_ids, 1)
print(single_glazed_training_sample)

[12]


In [30]:
double_glazed_ids = [1, 2, 4, 8, 11, 13, 15, 17, 19, 25, 26, 27, 28, 30, 31, 34, 44, 45]
double_glazed_training_sample = random.sample(double_glazed_ids, 11) 
print(double_glazed_training_sample)

[30, 13, 4, 31, 25, 28, 34, 11, 19, 17, 1]


In [52]:
# randomly pick one from this list to use for feature extraction
random.choice(double_glazed_training_sample)

25

In [44]:
high_efficiency_ids = [0, 3, 5, 6, 7, 9, 10, 14, 16, 18, 21, 22, 23, 24, 29, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 46]
high_efficiency_training_sample = random.sample(high_efficiency_ids, 16)
print(high_efficiency_training_sample)

[43, 9, 29, 14, 37, 33, 24, 42, 10, 21, 39, 0, 22, 40, 7, 35]


In [51]:
# randomly pick one from this list to use for feature extraction
random.choice(high_efficiency_training_sample)

0

In [102]:
# Combine all lists and check for duplicates, there shouldn't be!
window_samples_combined = single_glazed_training_sample + double_glazed_training_sample + high_efficiency_training_sample
duplicates = [item for item in set(window_samples_combined) if window_samples_combined.count(item) > 1]
print("Duplicates:", duplicates)

Duplicates: []


In [46]:
# Sort them by increasing id
window_type_training_sample = sorted(window_samples_combined)
print(window_type_training_sample)
print(len(window_type_training_sample))

[0, 1, 4, 7, 9, 10, 11, 12, 13, 14, 17, 19, 21, 22, 24, 25, 28, 29, 30, 31, 33, 34, 35, 37, 39, 40, 42, 43]
28


In [42]:
def llm_get_window_features(images):

    age_prompt = f"""

    You are a surveyor. You are given a set of images that belong to the same building.
    
    Your task is to provide a detailed label of every architectural feature for the building that will help determine whether the glazing in the windows is single, double, or high efficiency. List 50 detailed visible features that are significant for window types.
    
    The building is located in UK. Return the features as a list.
    
    """

    content = [
            {
              "type": "text",
              "text": age_prompt
            }
          ]

    for image in images:
        base64_image = encode_image(image)
        content.append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
            }
        })

    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {api_key}"
    }

    payload = {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": content
        }
      ],
      "max_tokens": 5000
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    # print(response.text)
    print(response.json()['choices'][0]['message']['content'])


def extract_window_features(row_id):
    df = pd.read_csv(gt_path)
    # for index, row in df.iterrows():
        # if index in [0, 11, 22, 46]:
    raw_image_list = df.loc[row_id]['window image']
    print('\n\n\n---row', row_id, df.loc[row_id]['Address'], df.loc[row_id]['Thesquare Building URL'])
    llm_get_window_features(image_list(row_id, raw_image_list))
    # actual_age = row['building age']
    # if pd.isna(actual_age) or actual_age == '':
    #     actual_age = row['raw age data']
    # print('---answer---\n', actual_age)

extract_window_features(0)




---row 0 Altitude Point , 71 Alie St , E1 8NF https://www.thesqua.re/london/serviced-apartments/altitude-point-serviced-apartment-1094397694
Identifying window glazing type involves examining several architectural and physical features. Here’s a list of 50 features to consider:

1. **Window Frame Material**
2. **Window Thickness**
3. **Number of Glass Layers**
4. **Presence of Gas Fillings (e.g., Argon)**
5. **Edge Seal Quality**
6. **Frame Insulation**
7. **Window Style (e.g., sash, casement)**
8. **Low-emissivity (Low-E) Coating**
9. **Visible Spacer Bars**
10. **Presence of Heat-reflective Tints**
11. **Integrated Blinds/Shades**
12. **Frame Color**
13. **Sealant Type**
14. **Locking Mechanisms**
15. **Window Gaskets**
16. **Weatherstripping**
17. **Frame Profile Design**
18. **Presence of Mullions**
19. **Frame to Wall Connection**
20. **Window U-value**
21. **Thermal Breaks in Frames**
22. **Condensation Pattern**
23. **Soundproofing Features**
24. **Window Ventilation Features

In [43]:
extract_window_features(12)




---row 12 Chelsea Cloisters , Sloane Avenue , SW3 3DT https://www.thesqua.re/london/serviced-apartments/chelsea-serviced-apartments-by-mysquare-1429547768
To determine the type of glazing for windows in a building, here are 50 architectural features and indicators to observe:

1. Window frame material (e.g., wood, aluminum, uPVC)
2. Thickness of window frame
3. Type of sealant around windows
4. Presence of gaps or drafts
5. Window style (e.g., sash, casement, bay)
6. Insulation strips in the frame
7. Window coating (e.g., reflective, tinted)
8. Spacer bar presence in the glazing unit
9. Color of spacer bar (indicating metal or warm edge)
10. Clarity or distortion of view through the window
11. Presence of condensation between panes
12. Edge effects in the glass
13. Visible labels or markings on glass
14. Frame condition and maintenance
15. Internal reflection noticed inside glass
16. Exterior cladding around window
17. Weatherproofing details
18. Type of glass (e.g., standard, low-E

In [53]:
extract_window_features(25)




---row 25 Flat 2 Pindoria House , 52 Mintern Street , N1 5EG https://www.thesqua.re/london/serviced-apartments/hoxton-apartments-by-mysquare-1294267743
Here are 50 detailed architectural features that might help determine the type of glazing in the windows:

1. Frame material (e.g., wood, aluminium, uPVC)
2. Frame color
3. Frame style (e.g., traditional, modern)
4. Depth of window frame
5. Thickness of glass
6. Number of panes
7. Presence of gas filling between panes
8. Type of gas used between panes (e.g., argon, krypton)
9. Spacer bar type and material
10. Warm edge technology in spacers
11. Sealant type used
12. Visibility of low-E coating
13. Type of low-E coating (hard coat, soft coat)
14. Window handle type
15. Presence of trickle vents
16. Thickness of window sections
17. Type of beading (internal or external)
18. Glazing bar design
19. Mulling system in multiple window setups
20. Window opening style (e.g., casement, sash, tilt and turn)
21. Type of locking system
22. Presen

In [54]:
window_features_1 = [
    "Window Frame Material",
    "Window Thickness",
    "Number of Glass Layers",
    "Presence of Gas Fillings (e.g., Argon)",
    "Edge Seal Quality",
    "Frame Insulation",
    "Window Style (e.g., sash, casement)",
    "Low-emissivity (Low-E) Coating",
    "Visible Spacer Bars",
    "Presence of Heat-reflective Tints",
    "Integrated Blinds/Shades",
    "Frame Color",
    "Sealant Type",
    "Locking Mechanisms",
    "Window Gaskets",
    "Weatherstripping",
    "Frame Profile Design",
    "Presence of Mullions",
    "Frame to Wall Connection",
    "Window U-value",
    "Thermal Breaks in Frames",
    "Condensation Pattern",
    "Soundproofing Features",
    "Window Ventilation Features",
    "External Capping",
    "Window Glazing Certification (e.g., CE Mark)",
    "Anti-glare Features",
    "Visible Glass Distortion",
    "Glazing Rebate Depth",
    "Reinforcement in Frames",
    "Integrated Security Features",
    "Mounting Depth",
    "Window Weight",
    "Use of Safety Glass",
    "Presence of Micro-ventilation",
    "Dual Seal System",
    "Weather-resistant Coatings",
    "Recessed Glazing Bead",
    "Sash Operation Mechanism",
    "Integrated Window Sensors",
    "Window Drainage Systems",
    "Cleaning Features (e.g., tilt and turn)",
    "Shade Affixation",
    "Inward/Outward Opening",
    "Sliding Mechanism Preparation",
    "Alignment and Leveling",
    "Recyclable Materials",
    "Window Beading Style",
    "Visible Energy Labels",
    "Color Consistency in Frames"
]

In [55]:
window_features_2 = [
    "Window frame material (e.g., wood, aluminum, uPVC)",
    "Thickness of window frame",
    "Type of sealant around windows",
    "Presence of gaps or drafts",
    "Window style (e.g., sash, casement, bay)",
    "Insulation strips in the frame",
    "Window coating (e.g., reflective, tinted)",
    "Spacer bar presence in the glazing unit",
    "Color of spacer bar (indicating metal or warm edge)",
    "Clarity or distortion of view through the window",
    "Presence of condensation between panes",
    "Edge effects in the glass",
    "Visible labels or markings on glass",
    "Frame condition and maintenance",
    "Internal reflection noticed inside glass",
    "Exterior cladding around window",
    "Weatherproofing details",
    "Type of glass (e.g., standard, low-E)",
    "Visual consistency of the glass surface",
    "Presence of vents in the frame",
    "Evidence of recent installation",
    "Historical building characteristics",
    "Glass thickness variations",
    "Misted glass indicating leakage",
    "Size of air gap between panes",
    "Presence of gas fills (e.g., argon, krypton)",
    "Type of blinds or coverings used",
    "External shading or awnings",
    "Orientation of the windows",
    "Neighboring architectural styles",
    "Condition of glazing beads",
    "Type of locking mechanisms",
    "Security features specific to glazing",
    "Window edge or seal discoloration",
    "Compliance marking for standards",
    "Frame design complexity",
    "Presence of thermal breaks",
    "Window sill detailing",
    "Evidence of energy rating stickers",
    "Acoustic properties indicated by design",
    "Double or triple-pane indications",
    "Architectural period and typical materials",
    "Roof overhangs providing shade",
    "Size and shape of window openings",
    "Glazing pattern and divisions",
    "Recessed window placement",
    "Presence of storm windows or additional layers",
    "Compatibility with surrounding architecture",
    "Maintenance accessibility",
    "Overall aesthetic integration"
]

In [56]:
window_features_3 = [
    "Frame material (e.g., wood, aluminium, uPVC)",
    "Frame color",
    "Frame style (e.g., traditional, modern)",
    "Depth of window frame",
    "Thickness of glass",
    "Number of panes",
    "Presence of gas filling between panes",
    "Type of gas used between panes (e.g., argon, krypton)",
    "Spacer bar type and material",
    "Warm edge technology in spacers",
    "Sealant type used",
    "Visibility of low-E coating",
    "Type of low-E coating (hard coat, soft coat)",
    "Window handle type",
    "Presence of trickle vents",
    "Thickness of window sections",
    "Type of beading (internal or external)",
    "Glazing bar design",
    "Mulling system in multiple window setups",
    "Window opening style (e.g., casement, sash, tilt and turn)",
    "Type of locking system",
    "Presence of weatherstripping",
    "Sash depth",
    "Presence of storm windows",
    "Type of glazing film",
    "Reflectivity of glass",
    "Tint or coloration of glass",
    "UV protection rating, if visible",
    "Hinges type and material",
    "Window sill design",
    "Edge finish of glass",
    "Presence of thermal breaks",
    "Visible brand or certification labels",
    "Presence of acoustic glazing features",
    "Visibility of desiccant in spacers",
    "Style of internal blinds, if any",
    "Shading devices (e.g., awnings, overhangs)",
    "Evidence of condensation or fogging",
    "Sound insulation indicators",
    "Energy efficiency labels, if any",
    "Certification or compliance marks",
    "External capping or cladding presence",
    "Corner joint design",
    "Dual seal or triple seal system",
    "Window integrated hardware",
    "Load-bearing capacity indicators",
    "Compliance with fire-rated standards",
    "Style of internal muntins",
    "UV fading on nearby furnishings",
    "Any visible documentation or manufacturer’s tags"
]

In [63]:
from openai import OpenAI
client = OpenAI(api_key=api_key)
def get_completion(prompt):
    response = client.chat.completions.create(
      model="gpt-4o-2024-08-06", #"gpt-4o",
      messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt,
            },
          ],
        }
      ],
      max_tokens=4000,
    )
    # print(response)
    return response.choices[0].message.content

In [67]:
raw_feature_list = window_features_1 + window_features_2 + window_features_3

prompt =  f"""
I have a list of features: {raw_feature_list}. Your task is to remove duplicated items, including features semantically similar.
"""

result = get_completion(prompt)
print(result)

To clean up the list, we'll consolidate any repeated features and merge those that are semantically similar. Here's a refined version of your list:

1. **Window Frame Material** (e.g., wood, aluminum, uPVC)
2. **Window Thickness**
3. **Number of Glass Layers**
4. **Presence of Gas Filling** (e.g., argon, krypton)
5. **Edge Seal Quality**
6. **Frame Insulation**
7. **Window Style** (e.g., sash, casement, bay, tilt and turn)
8. **Low-emissivity (Low-E) Coating** (visibility and type)
9. **Spacer Bar** (presence, type, and material with warm edge technology)
10. **Presence of Heat-reflective Tints**
11. **Integrated Blinds/Shades**
12. **Frame Color** (and consistency)
13. **Sealant Type and Quality**
14. **Locking Mechanisms**
15. **Window Gaskets**
16. **Weatherstripping**
17. **Frame Profile Design**
18. **Presence of Mullions or Muntins**
19. **Frame to Wall Connection**
20. **Window U-value**
21. **Thermal Breaks in Frames**
22. **Condensation Pattern** (including evidence of fogging

In [68]:
unique_feature_list = f"""
1. Window Frame Material (e.g., wood, aluminum, uPVC)
2. Window Thickness
3. Number of Glass Layers
4. Presence of Gas Filling (e.g., argon, krypton)
5. Edge Seal Quality
6. Frame Insulation
7. Window Style (e.g., sash, casement, bay, tilt and turn)
8. Low-emissivity (Low-E) Coating (visibility and type)
9. Spacer Bar (presence, type, and material with warm edge technology)
10. Presence of Heat-reflective Tints
11. Integrated Blinds/Shades
12. Frame Color (and consistency)
13. Sealant Type and Quality
14. Locking Mechanisms
15. Window Gaskets
16. Weatherstripping
17. Frame Profile Design
18. Presence of Mullions or Muntins
19. Frame to Wall Connection
20. Window U-value
21. Thermal Breaks in Frames
22. Condensation Pattern (including evidence of fogging)
23. Soundproofing Features
24. Window Ventilation Features (e.g., trickle vents)
25. External Capping or Cladding
26. Window Glazing Certification (e.g., CE Mark, compliance marks)
27. Anti-glare Features
28. Visible Glass Distortion or Clarity
29. Glazing Rebate Depth
30. Reinforcement in Frames
31. Integrated Security Features
32. Mounting Depth
33. Window Weight
34. Use of Safety Glass
35. Presence of Micro-ventilation
36. Dual Seal or Triple Seal System
37. Weather-resistant Coatings
38. Recessed Glazing Bead
39. Sash Operation Mechanism
40. Integrated Window Sensors
41. Window Drainage Systems
42. Cleaning Features
43. Shade Affixation (e.g., external shading devices like awnings)
44. Inward/Outward Opening Features
45. Sliding Mechanism Preparation
46. Alignment and Leveling
47. Recyclable Materials
48. Window Beading Style (internal or external)
49. Visible Energy Labels (includes energy efficiency and UV protection rating)
50. Frame Condition and Maintenance
51. Interior and Exterior Reflection Characteristics
52. Weatherproofing Details
53. Visual Consistency of the Glass Surface
54. Evidence of Recent Installation
55. Historical Building Integration
56. Size of Air Gap between Panes
57. Type of Blinds or Coverings Used
58. Window Orientation
59. Neighboring Architectural Styles
60. Condition of Glazing Beads
61. Security Features Specific to Glazing
62. Window Edge or Seal Discoloration
63. Frame Design Complexity
64. Window Sill Design and Detailing
65. Acoustic Properties
66. Glazing Pattern and Divisions
67. Recessed Window Placement
68. Presence of Storm Windows or Additional Layers
69. Compatibility with Surrounding Architecture
70. Maintenance Accessibility
71. Overall Aesthetic Integration
72. Depth of Window Frame
73. Thickness of Glass
74. Type of Glazing Film
75. Reflectivity and Tint of Glass
76. Hinges Type and Material
77. Edge Finish of Glass
78. Visible Brand or Certification Labels
79. Presence of Acoustic Glazing Features
80. Visibility of Desiccant in Spacers
81. Style of Internal Blinds, if Any
82. Sound Insulation Indicators
83. Window Integrated Hardware
84. Load-bearing Capacity Indicators
85. Compliance with Fire-rated Standards
86. Style of Internal Muntins
87. UV Fading on Nearby Furnishings
88. Visible Documentation or Manufacturer’s Tags
"""
prompt =  f"""
I have a list of features: {unique_feature_list}. Your task is to cluster these features based on the type of feature.
"""

result = get_completion(prompt)
print(result)


Clustering these features based on their type can help organize them into categories, which could related to the material properties, design and aesthetics, functionality, performance metrics, or compliance and certification aspects. Here’s an attempt to cluster them:

### Material and Construction 

- **Window Frame Material (1)**
- **Window Thickness (2)**
- **Type of Glazing Film (74)**
- **Thickness of Glass (73)**
- **Frame Insulation (6)**
- **Frame Color (12)**
- **Sealant Type and Quality (13)**
- **Frame Profile Design (17)**
- **Reinforcement in Frames (30)**
- **Frame Condition and Maintenance (50)**
- **Frame to Wall Connection (19)**
- **Thermal Breaks in Frames (21)**
- **Frame Design Complexity (63)**
- **Depth of Window Frame (72)**

### Design and Aesthetics

- **Window Style (7)**
- **Frame Color (12)**
- **Window Beading Style (48)**
- **Overall Aesthetic Integration (71)**
- **Window Sill Design and Detailing (64)**
- **Historical Building Integration (55)**
- **Nei

In [71]:
categories = f"""
### Material and Construction 

- **Window Frame Material (1)**
- **Window Thickness (2)**
- **Type of Glazing Film (74)**
- **Thickness of Glass (73)**
- **Frame Insulation (6)**
- **Frame Color (12)**
- **Sealant Type and Quality (13)**
- **Frame Profile Design (17)**
- **Reinforcement in Frames (30)**
- **Frame Condition and Maintenance (50)**
- **Frame to Wall Connection (19)**
- **Thermal Breaks in Frames (21)**
- **Frame Design Complexity (63)**
- **Depth of Window Frame (72)**

### Design and Aesthetics

- **Window Style (7)**
- **Frame Color (12)**
- **Window Beading Style (48)**
- **Overall Aesthetic Integration (71)**
- **Window Sill Design and Detailing (64)**
- **Historical Building Integration (55)**
- **Neighboring Architectural Styles (59)**
- **Recessed Window Placement (67)**
- **Visual Consistency of the Glass Surface (53)**
- **Window Orientation (58)**
- **Shade Affixation (43)**
- **Condition of Glazing Beads (60)**

### Functionality and Usability

- **Presence of Gas Filling (4)**
- **Number of Glass Layers (3)**
- **Spacer Bar (9)**
- **Integrated Blinds/Shades (11)**
- **Locking Mechanisms (14)**
- **Weatherstripping (16)**
- **Window Ventilation Features (24)**
- **Integrated Window Sensors (40)**
- **Window Drainage Systems (41)**
- **Inward/Outward Opening Features (44)**
- **Sliding Mechanism Preparation (45)**
- **Mounting Depth (32)**
- **Presence of Micro-ventilation (35)**
- **Presence of Mullions or Muntins (18)**
- **Glazing Pattern and Divisions (66)**
- **Style of Internal Muntins (86)**
- **Internal/External Blinds or Coverings (57)**
- **Style of Internal Blinds (81)**

### Performance and Metrics

- **Window U-value (20)**
- **Condensation Pattern (22)**
- **Soundproofing Features (23)**
- **Weather-resistant Coatings (37)**
- **Edge Seal Quality (5)**
- **Weatherproofing Details (52)**
- **Acoustic Properties (65)**
- **Sound Insulation Indicators (82)**
- **Load-bearing Capacity Indicators (84)**
- **Compliance with Fire-rated Standards (85)**
- **UV Fading on Nearby Furnishings (87)**
- **Window Weight (33)**

### Security and Safety

- **Window Gaskets (15)**
- **Sealant Type and Quality (13)**
- **Use of Safety Glass (34)**
- **Integrated Security Features (31)**
- **Security Features Specific to Glazing (61)**
- **Visible Documentation or Manufacturer’s Tags (88)**

### Compliance and Certification

- **Window Glazing Certification (26)**
- **Visible Brand or Certification Labels (78)**

### External Features and Maintenance

- **Edge Finish of Glass (77)**
- **External Capping or Cladding (25)**
- **Cleaning Features (42)**
- **Maintenance Accessibility (70)**
- **Evidence of Recent Installation (54)**

### Specialized Features

- **Heat-reflective Tints (10)**
- **Low-emissivity (Low-E) Coating (8)**
- **Anti-glare Features (27)**
- **Visible Energy Labels (49)**
- **Condition of Glazing Beads (60)**
- **Presence of Storm Windows or Additional Layers (68)**
- **Integrated Window Hardware (83)**
- **Reflectivity and Tint of Glass (75)**
- **Presence of Acoustic Glazing Features (79)**
- **Visibility of Desiccant in Spacers (80)**
"""

prompt =  f"""
Given this list {categories}, first clean the list to contain text only, then produce a python array, each subarray for each category.
"""

result = get_completion(prompt)
print(result)

To create a Python array with each subarray containing items from each category, you'll first need to clean the list to remove any formatting symbols and then structure it accordingly. Here's how the list can be translated into a Python array:

```python
window_features = [
    # Material and Construction
    [
        "Window Frame Material",
        "Window Thickness",
        "Type of Glazing Film",
        "Thickness of Glass",
        "Frame Insulation",
        "Frame Color",
        "Sealant Type and Quality",
        "Frame Profile Design",
        "Reinforcement in Frames",
        "Frame Condition and Maintenance",
        "Frame to Wall Connection",
        "Thermal Breaks in Frames",
        "Frame Design Complexity",
        "Depth of Window Frame"
    ],
    # Design and Aesthetics
    [
        "Window Style",
        "Frame Color",
        "Window Beading Style",
        "Overall Aesthetic Integration",
        "Window Sill Design and Detailing",
        "Historical Bui