In [3]:
import base64
import requests

# OpenAI API Key
api_key = "xxxxx"

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
import pandas as pd

base_path = "clear/apartment_images/"
gt_path = 'ground_truth.csv'

# convert list of images to path
def image_list(folder_id, image_string):
    images = []
    array = image_string.split(';')
    for item in array:
        image_url = base_path + str(folder_id) + '/image_' + item + '.jpg'
        images.append(image_url)
    return images


In [15]:
from openai import OpenAI
client = OpenAI(api_key=api_key)
def get_completion(prompt):
    response = client.chat.completions.create(
      model="gpt-4o-2024-08-06", #"gpt-4o",
      messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt,
            },
          ],
        }
      ],
      max_tokens=4000,
    )
    # print(response)
    return response.choices[0].message.content

In [71]:
# choose 3 buildings to extract feature from

low_numbers = [0, 1, 3, 6, 9, 16, 18, 21, 26, 29, 32, 33, 35, 36, 38, 40, 42, 43]

# random pick one from low numbers
item1 = random.sample(low_numbers, 1)[0]
print('item1 id:', item1)

item1 id: 9


In [78]:
# randomly pick one from medium numbers
medium_numbers = [2, 4, 7, 8, 13, 14, 17, 19, 25, 28, 34, 39]

item2 = random.sample(medium_numbers, 1)[0]
print('item2 id:', item2)

item2 id: 17


In [198]:
# randomly pick one from high numbers
high_numbers = [11, 12, 15, 20, 22, 30, 31, 44]

item3 = random.sample(high_numbers, 1)[0]
print('item3 id:', item3)

item3 id: 15


In [229]:
# get training ids
# note make sure training ids contain 3 items selected above for feature extraction

import random

kwh_rows_with_data = [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 42, 43, 44]
kwh_training_sample = random.sample(kwh_rows_with_data, 23) 
print(kwh_training_sample)

[35, 7, 15, 21, 13, 36, 0, 14, 11, 42, 20, 39, 1, 43, 9, 12, 38, 33, 8, 29, 44, 18, 17]


In [230]:
# Combine all lists and check for duplicates, there shouldn't be!
training_samples_combined = kwh_training_sample
duplicates = [item for item in set(training_samples_combined) if training_samples_combined.count(item) > 1]
print("Duplicates:", duplicates)

Duplicates: []


In [231]:
# Sort them by increasing id
training_sample = sorted(training_samples_combined)
print(training_sample)
if len(training_sample) != 28:
    print("insufficient training sample", len(training_sample))
else:
    print("training sample length", len(training_sample))

[0, 1, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, 20, 21, 29, 33, 35, 36, 38, 39, 42, 43, 44]
insufficient training sample 23


In [188]:
custom_prompt = "Your task is to provide a detailed label of every visible architectural feature, appliance and energy consuming device in the images that will help determine the energy consumption in kwh per metre squared. Do not list furnishings or belongings, focus on visible items relevant to energy consumption or saving. List 50, with no explanations."
image_type = 'age image' # kwh uses age. change this for age, window etc. this is used to determine which images to use

def llm_get_features(images):

    prompt = f"""

    You are a surveyor. You are given a set of images that belong to the same building.
    
    {custom_prompt}
    
    The building is located in UK. Return the features as a list.
    
    """

    content = [
            {
              "type": "text",
              "text": prompt
            }
          ]

    for image in images:
        base64_image = encode_image(image)
        content.append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
            }
        })

    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {api_key}"
    }

    payload = {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": content
        }
      ],
      "max_tokens": 5000
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    # print(response.text)
    print(response.json()['choices'][0]['message']['content'])


def extract_features(row_id):
    df = pd.read_csv(gt_path)
    raw_image_list = df.loc[row_id][image_type]
    print('\n\n\n---row', row_id, df.loc[row_id]['Address'], df.loc[row_id]['Thesquare Building URL'])
    llm_get_features(image_list(row_id, raw_image_list))

In [190]:
extract_features(item1)




---row 9 6 Slate House , 11 Keymer Place , E14 7QZ https://www.thesqua.re/london/serviced-apartments/canary-gateway-apartments-226097697
1. Double-glazed windows
2. Sliding glass doors
3. LED ceiling spotlights
4. Electric wall sockets
5. Underfloor heating controls
6. Wall insulation
7. Ceiling insulation
8. Radiators
9. Balcony with solar exposure
10. Brick exterior walls
11. Energy-efficient appliances
12. Smart thermostat
13. Central heating system
14. Energy-efficient lighting
15. External wall insulation
16. Low-E glass
17. Balcony overhangs
18. Composite decking
19. High-efficiency boiler
20. Motion sensor lighting
21. Programmable lighting system
22. Reflective wall paint
23. Solar panels (not visible, but possible installation)
24. Thermally insulated doors
25. Window blinds
26. Balcony railings (glass)
27. Energy-saving modes on devices
28. Energy-efficient fans
29. Ventilation system
30. Building energy management system
31. LED outdoor lighting
32. Sun shading devices
33

In [189]:
extract_features(item2)




---row 17 Discovery Dock East South Quay Square , South Quay Square , E14 9RZ https://www.thesqua.re/london/serviced-apartments/discovery-dock-east-at-canary-wharf-by-mysquare-1337777727
1. Double-glazed windows
2. LED ceiling lights
3. Wall-mounted television
4. Radiators
5. Insulated floors
6. Patio doors
7. Ceiling spotlights
8. Curtains
9. Energy-efficient bulbs
10. Electric sockets
11. Modern shower head
12. Bathroom extractor fan
13. Tap fittings
14. Thermostatic valve
15. Energy-efficient bath
16. Shower rail
17. Insulation in walls
18. Energy-efficient toilet
19. Towel rail heater
20. Wall insulation
21. Flat roofing
22. Thermal blinds
23. Draught-proof doors
24. Thick carpets
25. Wall thermostats
26. Weather stripping on windows
27. Modern faucets
28. Ventilation system
29. Insulation in ceiling
30. Smart lighting system
31. Insulated doors
32. Energy-efficient appliances
33. Door thresholds
34. Energy-efficient door seals
35. Programmable thermostat
36. Zoned heating syste

In [200]:
extract_features(item3)




---row 15 41 Creek road , 41 Creek road , SE8 3BU https://www.thesqua.re/london/serviced-apartments/greenwich-serviced-apartments-by-mysquare-1335277694
1. Double-glazed windows
2. Overhead lighting
3. Pendant lighting
4. Radiator heating
5. Curtains
6. Microwave
7. Electric oven
8. Stovetop
9. Refrigerator
10. Kitchen backsplash
11. Exterior wall insulation
12. Roof insulation
13. Wall insulation
14. Energy-efficient light bulbs
15. Brick facade
16. Smoke detector
17. External door
18. Window blinds
19. Hardwood floors
20. Ceiling insulation
21. Electric kettle
22. Energy-efficient windows
23. Wall-mounted thermostat
24. Skylight
25. Ceiling fan
26. Ventilation fan
27. Extractor hood
28. LED strip lighting
29. Water heater
30. Electric outlets
31. Under-cabinet lighting
32. Energy-efficient appliances
33. Solar panel (if present)
34. Loft insulation
35. Smart meter
36. Boiler
37. Insulated pipes
38. Double wall sockets
39. Smart lighting control
40. Energy-efficient shower
41. Soli

In [206]:
features_1 = [
    "Double-glazed windows",
    "Sliding glass doors",
    "LED ceiling spotlights",
    "Electric wall sockets",
    "Underfloor heating controls",
    "Wall insulation",
    "Ceiling insulation",
    "Radiators",
    "Balcony with solar exposure",
    "Brick exterior walls",
    "Energy-efficient appliances",
    "Smart thermostat",
    "Central heating system",
    "Energy-efficient lighting",
    "External wall insulation",
    "Low-E glass",
    "Balcony overhangs",
    "Composite decking",
    "High-efficiency boiler",
    "Motion sensor lighting",
    "Programmable lighting system",
    "Reflective wall paint",
    "Solar panels (not visible, but possible installation)",
    "Thermally insulated doors",
    "Window blinds",
    "Balcony railings (glass)",
    "Energy-saving modes on devices",
    "Energy-efficient fans",
    "Ventilation system",
    "Building energy management system",
    "LED outdoor lighting",
    "Sun shading devices",
    "Double-skin façade",
    "Airtightness of windows and doors",
    "Building orientation",
    "Smart meters",
    "Energy-efficient building materials",
    "Passive solar heating",
    "Heat recovery ventilators",
    "Light shelves to enhance natural light",
    "Thermal mass",
    "Green roof (potential installation site)",
    "External cladding",
    "Concealed lighting",
    "Condensing boiler",
    "Infrared heating panels",
    "Energy-efficient glazing",
    "Roof insulation",
    "Efficient water heating systems",
    "Reflective aluminum windows"
]

In [207]:
features_2 = [
    "Double-glazed windows",
    "LED ceiling lights",
    "Wall-mounted television",
    "Radiators",
    "Insulated floors",
    "Patio doors",
    "Ceiling spotlights",
    "Curtains",
    "Energy-efficient bulbs",
    "Electric sockets",
    "Modern shower head",
    "Bathroom extractor fan",
    "Tap fittings",
    "Thermostatic valve",
    "Energy-efficient bath",
    "Shower rail",
    "Insulation in walls",
    "Energy-efficient toilet",
    "Towel rail heater",
    "Wall insulation",
    "Flat roofing",
    "Thermal blinds",
    "Draught-proof doors",
    "Thick carpets",
    "Wall thermostats",
    "Weather stripping on windows",
    "Modern faucets",
    "Ventilation system",
    "Insulation in ceiling",
    "Smart lighting system",
    "Insulated doors",
    "Energy-efficient appliances",
    "Door thresholds",
    "Energy-efficient door seals",
    "Programmable thermostat",
    "Zoned heating system",
    "High-efficiency shower",
    "Energy Star certified TV",
    "LED table lamps",
    "Smart radiator valves",
    "Ventilated spaces",
    "Light switches",
    "Shower curtain",
    "Thermoplastic flooring",
    "Energy monitors",
    "Door draught excluder",
    "Wall-mounted mirrors",
    "Water-saving taps",
    "Digital thermostat",
    "Efficient water heater"
]

In [208]:
features_3 = [
    "Double-glazed windows",
    "Overhead lighting",
    "Pendant lighting",
    "Radiator heating",
    "Curtains",
    "Microwave",
    "Electric oven",
    "Stovetop",
    "Refrigerator",
    "Kitchen backsplash",
    "Exterior wall insulation",
    "Roof insulation",
    "Wall insulation",
    "Energy-efficient light bulbs",
    "Brick facade",
    "Smoke detector",
    "External door",
    "Window blinds",
    "Hardwood floors",
    "Ceiling insulation",
    "Electric kettle",
    "Energy-efficient windows",
    "Wall-mounted thermostat",
    "Skylight",
    "Ceiling fan",
    "Ventilation fan",
    "Extractor hood",
    "LED strip lighting",
    "Water heater",
    "Electric outlets",
    "Under-cabinet lighting",
    "Energy-efficient appliances",
    "Solar panel",
    "Loft insulation",
    "Smart meter",
    "Boiler",
    "Insulated pipes",
    "Double wall sockets",
    "Smart lighting control",
    "Energy-efficient shower",
    "Solid wood doors",
    "Insulated attic",
    "Weather stripping on doors",
    "Radiant floor heating",
    "Roof tiles",
    "Chimney",
    "Satellite dish",
    "Garden lighting",
    "Security alarm system",
    "Gable roof"
]

In [210]:
raw_feature_list = features_1 + features_2 + features_3

prompt =  f"""
I have a list of features: {raw_feature_list}. Your task is to remove duplicated items, including features semantically similar.
"""

result = get_completion(prompt)
print(result)

Here's your list with duplicates and semantically similar features removed:

1. Airtightness of windows and doors
2. Balcony with solar exposure
3. Balcony overhangs
4. Balcony railings (glass)
5. Bathroom extractor fan
6. Brick exterior walls
7. Building energy management system
8. Building orientation
9. Ceiling fan
10. Ceiling insulation
11. Ceiling spotlights
12. Central heating system
13. Chimney
14. Composite decking
15. Condensing boiler
16. Curtains
17. Digital thermostat
18. Double-glazed windows
19. Double-skin façade
20. Draught-proof doors
21. Efficient water heater
22. Efficient water heating systems
23. Electric outlets
24. Electric wall sockets
25. Electric kettle
26. Electric oven
27. Energy-efficient appliances
28. Energy-efficient bath
29. Energy-efficient building materials
30. Energy-efficient bulbs (light bulbs)
31. Energy-efficient door seals
32. Energy-efficient fans
33. Energy-efficient glazing
34. Energy-efficient lighting
35. Energy-efficient modes on devices


In [212]:
unique_feature_list = f"""
1. Airtightness of windows and doors
2. Balcony with solar exposure
3. Balcony overhangs
4. Balcony railings (glass)
5. Bathroom extractor fan
6. Brick exterior walls
7. Building energy management system
8. Building orientation
9. Ceiling fan
10. Ceiling insulation
11. Ceiling spotlights
12. Central heating system
13. Chimney
14. Composite decking
15. Condensing boiler
16. Curtains
17. Digital thermostat
18. Double-glazed windows
19. Double-skin façade
20. Draught-proof doors
21. Efficient water heater
22. Efficient water heating systems
23. Electric outlets
24. Electric wall sockets
25. Electric kettle
26. Electric oven
27. Energy-efficient appliances
28. Energy-efficient bath
29. Energy-efficient building materials
30. Energy-efficient bulbs (light bulbs)
31. Energy-efficient door seals
32. Energy-efficient fans
33. Energy-efficient glazing
34. Energy-efficient lighting
35. Energy-efficient modes on devices
36. Energy-efficient shower
37. Energy-efficient toilet
38. Energy-efficient windows
39. Energy monitors
40. External cladding
41. External door
42. External wall insulation
43. Extractor hood
44. Flat roofing
45. Garden lighting
46. Gable roof
47. Green roof (potential installation site)
48. Hardwood floors
49. Heat recovery ventilators
50. High-efficiency boiler
51. High-efficiency shower
52. Infrared heating panels
53. Insulated attic
54. Insulated doors
55. Insulated floors
56. Insulated pipes
57. Insulation in ceiling
58. Insulation in walls
59. LED ceiling spotlights
60. LED strip lighting
61. LED table lamps
62. LED outdoor lighting
63. Light shelves to enhance natural light
64. Light switches
65. Loft insulation
66. Low-E glass
67. Microwave
68. Modern faucets
69. Modern shower head
70. Motion sensor lighting
71. Patio doors
72. Pendant lighting
73. Passive solar heating
74. Programmable lighting system
75. Programmable thermostat
76. Radiant floor heating
77. Radiator heating
78. Radiators
79. Reflective wall paint
80. Reflective aluminum windows
81. Refrigerator
82. Roof insulation
83. Roof tiles
84. Satellite dish
85. Security alarm system
86. Shower curtain
87. Shower rail
88. Skylight
89. Sliding glass doors
90. Smart lighting system
91. Smart meters
92. Smart radiator valves
93. Smart thermostat
94. Solid wood doors
95. Solar panels (not visible, but possible installation)
96. Stovetop
97. Sun shading devices
98. Tap fittings
99. Thermal blinds
100. Thermal mass
101. Thermally insulated doors
102. Thermoplastic flooring
103. Thermostatic valve
104. Thick carpets
105. Towel rail heater
106. Under-cabinet lighting
107. Underfloor heating controls
108. Ventilated spaces
109. Ventilation fan
110. Ventilation system
111. Wall insulation
112. Wall-mounted mirrors
113. Wall-mounted television
114. Wall-mounted thermostat
115. Wall thermostats
116. Water heater
117. Water-saving taps
118. Weather stripping on doors
119. Weather stripping on windows
120. Window blinds
121. Zoned heating system
"""
prompt =  f"""
I have a list of features: {unique_feature_list}. Your task is to cluster these features based on the type of feature. Aim to produce 8 clusters. Every feature must be used. 
"""

result = get_completion(prompt)
print(result)


Clustering the features into 8 distinct groups can be achieved by categorizing them based on their functionality or purpose. Here's one way to cluster them:

### 1. Insulation and Thermal Efficiency
- Airtightness of windows and doors
- Ceiling insulation
- Insulation in ceiling
- Insulation in walls
- Insulated attic
- Insulated doors
- Insulated floors
- Insulated pipes
- Loft insulation
- Roof insulation
- Wall insulation
- Wall thermostats
- Weather stripping on doors
- Weather stripping on windows
- Thermal blinds
- Thermally insulated doors

### 2. Heating and Ventilation
- Central heating system
- Condensing boiler
- Heat recovery ventilators
- High-efficiency boiler
- Infrared heating panels
- Radiant floor heating
- Radiator heating
- Radiators
- Smart radiator valves
- Thermostatic valve
- Underfloor heating controls
- Ventilated spaces
- Ventilation fan
- Ventilation system
- Zoned heating system

### 3. Windows and Doors
- Double-glazed windows
- Draught-proof doors
- Energ

In [214]:
categories = f"""
### 1. Insulation and Thermal Efficiency
- Airtightness of windows and doors
- Ceiling insulation
- Insulation in ceiling
- Insulation in walls
- Insulated attic
- Insulated doors
- Insulated floors
- Insulated pipes
- Loft insulation
- Roof insulation
- Wall insulation
- Wall thermostats
- Weather stripping on doors
- Weather stripping on windows
- Thermal blinds
- Thermally insulated doors

### 2. Heating and Ventilation
- Central heating system
- Condensing boiler
- Heat recovery ventilators
- High-efficiency boiler
- Infrared heating panels
- Radiant floor heating
- Radiator heating
- Radiators
- Smart radiator valves
- Thermostatic valve
- Underfloor heating controls
- Ventilated spaces
- Ventilation fan
- Ventilation system
- Zoned heating system

### 3. Windows and Doors
- Double-glazed windows
- Draught-proof doors
- Energy-efficient door seals
- Energy-efficient glazing
- Energy-efficient windows
- External door
- Reflective aluminum windows
- Sliding glass doors
- Thermal mass
- Window blinds

### 4. Renewable Energy and Solar
- Balcony with solar exposure
- Green roof
- Passive solar heating
- Solar panels

### 5. Smart and Programmable Systems
- Building energy management system
- Digital thermostat
- Energy monitors
- Motion sensor lighting
- Programmable thermostat
- Smart lighting system
- Smart meters
- Smart thermostat
- Wall-mounted thermostat

### 6. Lighting Solutions
- Ceiling spotlights
- Energy-efficient bulbs (light bulbs)
- Energy-efficient lighting
- LED ceiling spotlights
- LED strip lighting
- LED table lamps
- LED outdoor lighting
- Pendant lighting
- Skylight
- Under-cabinet lighting

### 7. Appliances and Kitchen/Bathroom Fixtures
- Bathroom extractor fan
- Efficient water heater
- Efficient water heating systems
- Electric kettle
- Electric oven
- Energy-efficient appliances
- Energy-efficient bath
- Energy-efficient shower
- Energy-efficient toilet
- Extractor hood
- Microwave
- Modern faucets
- Modern shower head
- Refrigerator
- Shower curtain
- Shower rail
- Stovetop
- Tap fittings
- Towel rail heater
- Water heater
- Water-saving taps

### 8. Building Materials and Structure
- Balcony overhangs
- Balcony railings (glass)
- Brick exterior walls
- Composite decking
- External cladding
- Flat roofing
- Gable roof
- Hardwood floors
- Light shelves to enhance natural light
- Roof tiles
- Solid wood doors
- Thermoplastic flooring
- Wall-mounted mirrors
- Wall-mounted television
"""

prompt =  f"""
Given this list {categories}, first clean the list to contain text only, then produce a python array, each subarray for each category.
"""

result = get_completion(prompt)
print(result)

Here's the cleaned list formatted as a Python array, with each category represented as a subarray:

```python
categories = [
    # 1. Insulation and Thermal Efficiency
    [
        "Airtightness of windows and doors",
        "Ceiling insulation",
        "Insulation in ceiling",
        "Insulation in walls",
        "Insulated attic",
        "Insulated doors",
        "Insulated floors",
        "Insulated pipes",
        "Loft insulation",
        "Roof insulation",
        "Wall insulation",
        "Wall thermostats",
        "Weather stripping on doors",
        "Weather stripping on windows",
        "Thermal blinds",
        "Thermally insulated doors"
    ],
    # 2. Heating and Ventilation
    [
        "Central heating system",
        "Condensing boiler",
        "Heat recovery ventilators",
        "High-efficiency boiler",
        "Infrared heating panels",
        "Radiant floor heating",
        "Radiator heating",
        "Radiators",
        "Smart radiator valves",