In [1]:
!git clone https://github.com/emanhamed/Houses-dataset

Cloning into 'Houses-dataset'...
remote: Enumerating objects: 2166, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 2166 (delta 0), reused 0 (delta 0), pack-reused 2165[K
Receiving objects: 100% (2166/2166), 176.26 MiB | 39.50 MiB/s, done.
Resolving deltas: 100% (20/20), done.
Updating files: 100% (2144/2144), done.


In [35]:
import os
import locale
import glob
import pandas as pd
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from keras.models import Sequential, load_model
from keras import layers
from keras.optimizers import Adam
from keras.activations import relu, linear
from keras.losses import mean_absolute_percentage_error
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [33]:
DATASET_PATH: str = '/content/Houses-dataset/Houses Dataset'
MODEL_PATH_TO_SAVE: str = '/content/drive/MyDrive/weights/house_pricing_best.keras'
TEST_IMAGES_DIRECTORY: str = '/content/test_images'

## Utils

In [38]:
def load_house_images(df: pd.DataFrame, input_path: str) -> np.ndarray:
    images = []

    for i in tqdm(df.index.values):

        base_path = os.path.sep.join([input_path, f'{i + 1}_*'])
        house_paths = sorted(list(
            glob.glob(base_path)
        ))

        input_images = []
        output_images = np.zeros((64, 64, 3), dtype='uint8')

        for house_path in house_paths:
            image = cv.imread(house_path)
            image = cv.resize(image, (32, 32))
            image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
            input_images.append(image)

        output_images[0:32, 0:32] = input_images[0]
        output_images[0:32, 32:64] = input_images[1]
        output_images[32:64, 32:64] = input_images[2]
        output_images[32:64, 0:32] = input_images[3]

        images.append(output_images)

    return np.array(images)

In [39]:
def load_house_attributes(input_path: str) -> pd.DataFrame:
    columns = ['bedrooms', 'bathrooms', 'area', 'zipcode', 'price']
    df = pd.read_csv(input_path, sep=" ", header=None, names=columns)

    zipcodes = df['zipcode'].value_counts().keys().tolist()
    counts = df['zipcode'].value_counts().tolist()

    for zipcode, count in tqdm(zip(zipcodes, counts)):
        if count < 25:
            index = df[df['zipcode'] == zipcode].index
            df.drop(index, inplace=True)

    return df

In [40]:
def create_cnn(width: int, height: int, depth: int, filters: tuple[int, int, int] = (16, 32, 64), regress: bool = False) -> Sequential:
    input_shape = (height, width, depth)
    chan_dim = -1

    model: Sequential = Sequential()
    for i, f in tqdm(enumerate(filters)):

        if i == 0:
            model.add(layers.Conv2D(f, (3, 3), padding='same',
                      input_shape=input_shape, activation=relu))

        else:
            model.add(layers.Conv2D(f, (3, 3), padding='same', activation=relu))

        model.add(layers.BatchNormalization(axis=chan_dim))
        model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Flatten())

    model.add(layers.Dense(16, activation=relu))
    model.add(layers.BatchNormalization(axis=chan_dim))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4, activation=relu))

    if regress:
        model.add(layers.Dense(1, activation=linear))

    return model


## Training

In [41]:
print('[INFO] loading house attributes...')
input_path = os.path.sep.join([DATASET_PATH, 'HousesInfo.txt'])
df = load_house_attributes(input_path)
print('\n[INFO] loading house attributes done.')

[INFO] loading house attributes...


49it [00:00, 1228.00it/s]


[INFO] loading house attributes done.





In [42]:
print('[INFO] loading house images...')
images = load_house_images(df, DATASET_PATH)
images = images / 255.
train_attr_x, test_attr_x, train_images_x, test_images_x = train_test_split(
    df, images, test_size=.2, random_state=42
)
print('\n[INFO] loading house images done')

[INFO] loading house images...


100%|██████████| 362/362 [00:10<00:00, 35.46it/s]


[INFO] loading house images done





In [43]:
max_price = train_attr_x['price'].max()
y_train = train_attr_x['price'] / max_price
y_test = test_attr_x['price'] / max_price

In [44]:
print('[INFO] creating the model...')
model = create_cnn(64, 64, 3, regress=True)
model.compile(optimizer=Adam(learning_rate=1e-3), #, decay=1e-3 / 200),
              loss=mean_absolute_percentage_error)
checkpoint = ModelCheckpoint(MODEL_PATH_TO_SAVE, save_best_only=True)
print('\n[INFO] creating the model done')

[INFO] creating the model...


3it [00:00, 33.03it/s]


[INFO] creating the model done





In [45]:
print('[INFO] training model...')
model.fit(
    x=train_images_x,
    y=y_train,
    validation_data=(test_images_x, y_test),
    epochs=200,
    batch_size=8,
    callbacks=[checkpoint]
)
print('[INFO] training model done')

[INFO] training model...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/

In [47]:
model: Sequential = load_model(MODEL_PATH_TO_SAVE)
model.evaluate(test_images_x, y_test)



50.5028076171875

## Inference

In [51]:
def predict(model_save_path: str, input_images_path: str) -> float:
    # del model
    model: Sequential = load_model(model_save_path)

    images = []
    output_image = np.zeros((64, 64, 3), dtype='uint8')

    for img in os.listdir(input_images_path):
        image = cv.imread(os.path.join(input_images_path, img))
        image = cv.resize(image, (32, 32))
        images.append(image)

    output_image[0:32, 0:32] = images[0]
    output_image[0:32, 32:64] = images[1]
    output_image[32:64, 32:64] = images[2]
    output_image[32:64, 0:32] = images[3]
    output_image = cv.cvtColor(output_image, cv.COLOR_BGR2RGB)
    plt.imshow(output_image)

    output_image = np.array(output_image)
    output_image = output_image / 255.

    output_image = output_image.reshape(1, 64, 64, 3)
    prediction = model.predict([output_image])
    return prediction

In [None]:
prediction: float = predict(MODEL_PATH_TO_SAVE, TEST_IMAGES_DIRECTORY)
print(f'House price estimated: {prediction[0][0]}')