In [3]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import cv2

# ================= CONFIGURATION =================
MODEL_PATH = "house_price_model_v2.h5"
DATA_FILE = "cleaned_dataset.csv"
IMG_FOLDER = "data/house_images"
# =================================================

def load_image(img_path):
    img = cv2.imread(img_path)
    if img is None: return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (128, 128))
    img = img / 255.0
    return np.expand_dims(img, axis=0)

def main():
    print("--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---")
    
    # 1. Load Model (compile=False fixes the loading bug)
    print("Loading Brain...")
    try:
        model = tf.keras.models.load_model(MODEL_PATH, compile=False)
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # 2. Load Data
    df = pd.read_csv(DATA_FILE)
    
    # 3. Pick 10 random houses
    sample = df.sample(10)
    
    total_error = 0
    print(f"\n{'ID':<12} | {'REAL PRICE':<12} | {'PREDICTED':<12} | {'DIFF':<10} | {'ERROR %'}")
    print("-" * 75)

    for index, row in sample.iterrows():
        house_id = row['id']
        price = row['price']
        
        # --- PREPARE INPUT 1: IMAGE ---
        img_name = f"image_{house_id}.jpg"
        img_path = os.path.join(IMG_FOLDER, img_name)
        img_array = load_image(img_path)
        
        if img_array is None:
            continue

        # --- PREPARE INPUT 2: STATS ---
        # The model expects 7 features:
        # Beds, Baths, Sqft Living, Sqft Lot, Floors, Waterfront, View
        stats = np.array([[
            row['bedrooms'], 
            row['bathrooms'], 
            row['sqft_living'], 
            row['sqft_lot'],
            row['floors'],
            row['waterfront'],
            row['view']
        ]])

        # --- PREDICT ---
        # We pass a list: [image_data, stats_data]
        prediction = model.predict([img_array, stats], verbose=0)[0][0]
        
        # Calculate Math
        diff = abs(price - prediction)
        error_percent = (diff / price) * 100
        total_error += diff
        
        # Formatting
        p_real = f"${int(price):,}"
        p_pred = f"${int(prediction):,}"
        p_diff = f"${int(diff):,}"
        
        print(f"{str(house_id):<12} | {p_real:<12} | {p_pred:<12} | {p_diff:<10} | {error_percent:.1f}%")

    avg_error = total_error / 10
    print("-" * 75)
    print(f"ðŸ’° AVERAGE ERROR: ${int(avg_error):,}")
    print("=" * 75)

if __name__ == "__main__":
    main()

--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---
Loading Brain...

ID           | REAL PRICE   | PREDICTED    | DIFF       | ERROR %
---------------------------------------------------------------------------
5350201180   | $1,665,000   | $7,564,986,368 | $7,563,321,344 | 454253.5%
7550800915   | $417,200     | $2,912,173,824 | $2,911,756,544 | 697928.2%
2316800020   | $560,000     | $5,937,432,064 | $5,936,871,936 | 1060155.6%
2125400010   | $490,000     | $5,228,766,208 | $5,228,276,224 | 1066995.1%
2459970020   | $360,000     | $4,600,311,808 | $4,599,951,872 | 1277764.5%
3395040920   | $300,000     | $3,471,056,384 | $3,470,756,352 | 1156918.8%
7787920250   | $550,000     | $7,745,223,168 | $7,744,673,280 | 1408122.5%
9477000060   | $434,500     | $5,429,872,640 | $5,429,437,952 | 1249582.9%
104530240    | $225,000     | $4,020,153,344 | $4,019,928,320 | 1786634.8%
1313500070   | $249,000     | $5,003,636,224 | $5,003,387,392 | 2009392.6%
-------------------------------------------------

In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import cv2
from sklearn.preprocessing import MinMaxScaler

# ================= CONFIGURATION =================
MODEL_PATH = "house_price_model_v2.h5"
DATA_FILE = "cleaned_dataset.csv"
IMG_FOLDER = "data/house_images"
# Columns must match EXACTLY what was used in training
FEATURE_COLS = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'lat', 'long']
IMG_SIZE = 128
# =================================================

def load_image(img_path):
    img = cv2.imread(img_path)
    if img is None: return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    return np.expand_dims(img, axis=0)

def main():
    print("--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---")
    
    # 1. Load Model
    print("Loading Brain...")
    try:
        model = tf.keras.models.load_model(MODEL_PATH, compile=False)
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # 2. Load Data & Prepare Scaler
    # We MUST fit the scaler on the whole dataset first, just like in training
    df = pd.read_csv(DATA_FILE)
    scaler = MinMaxScaler()
    scaler.fit(df[FEATURE_COLS].values)
    
    # 3. Pick 10 random houses
    sample = df.sample(10)
    
    total_error = 0
    print(f"\n{'ID':<12} | {'REAL PRICE':<12} | {'PREDICTED':<12} | {'DIFF':<10} | {'ERROR %'}")
    print("-" * 75)

    for index, row in sample.iterrows():
        house_id = row['id']
        price = row['price']
        
        # --- PREPARE INPUT 1: IMAGE ---
        img_name = f"image_{house_id}.jpg"
        img_path = os.path.join(IMG_FOLDER, img_name)
        img_array = load_image(img_path)
        
        if img_array is None:
            continue

        # --- PREPARE INPUT 2: STATS ---
        # Extract raw numbers
        raw_stats = row[FEATURE_COLS].values.reshape(1, -1)
        # SCALER MAGIC: Convert raw numbers to "AI friendly" 0-1 numbers
        stats_scaled = scaler.transform(raw_stats)

        # --- PREDICT ---
        prediction = model.predict([img_array, stats_scaled], verbose=0)[0][0]
        
        # Calculate Math
        diff = abs(price - prediction)
        error_percent = (diff / price) * 100
        total_error += diff
        
        # Formatting
        p_real = f"${int(price):,}"
        p_pred = f"${int(prediction):,}"
        p_diff = f"${int(diff):,}"
        
        print(f"{str(house_id):<12} | {p_real:<12} | {p_pred:<12} | {p_diff:<10} | {error_percent:.1f}%")

    avg_error = total_error / 10
    print("-" * 75)
    print(f"ðŸ’° AVERAGE ERROR: ${int(avg_error):,}")
    print("=" * 75)

if __name__ == "__main__":
    main()

--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---
Loading Brain...

ID           | REAL PRICE   | PREDICTED    | DIFF       | ERROR %
---------------------------------------------------------------------------
3512100110   | $275,436     | $721,783     | $446,347   | 162.1%
623039026    | $645,000     | $746,153     | $101,153   | 15.7%
722059070    | $235,000     | $542,268     | $307,268   | 130.8%
2023049218   | $105,500     | $430,834     | $325,334   | 308.4%
4449800345   | $584,000     | $1,244,548   | $660,548   | 113.1%
9465910070   | $480,000     | $764,975     | $284,975   | 59.4%
6362900007   | $395,000     | $711,812     | $316,812   | 80.2%
6743700090   | $490,000     | $690,231     | $200,231   | 40.9%
7763400035   | $253,500     | $546,437     | $292,937   | 115.6%
7207900080   | $424,950     | $847,731     | $422,781   | 99.5%
---------------------------------------------------------------------------
ðŸ’° AVERAGE ERROR: $335,839


In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import cv2
from sklearn.preprocessing import MinMaxScaler

# ================= CONFIGURATION =================
MODEL_PATH = "house_price_model_v3.h5"
DATA_FILE = "cleaned_dataset.csv"
IMG_FOLDER = "data/house_images"
# Columns must match EXACTLY what was used in training
FEATURE_COLS = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'lat', 'long']
IMG_SIZE = 128
# =================================================

def load_image(img_path):
    img = cv2.imread(img_path)
    if img is None: return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    return np.expand_dims(img, axis=0)

def main():
    print("--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---")
    
    # 1. Load Model
    print("Loading Brain...")
    try:
        model = tf.keras.models.load_model(MODEL_PATH, compile=False)
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # 2. Load Data & Prepare Scaler
    # We MUST fit the scaler on the whole dataset first, just like in training
    df = pd.read_csv(DATA_FILE)
    scaler = MinMaxScaler()
    scaler.fit(df[FEATURE_COLS].values)
    
    # 3. Pick 10 random houses
    sample = df.sample(10)
    
    total_error = 0
    print(f"\n{'ID':<12} | {'REAL PRICE':<12} | {'PREDICTED':<12} | {'DIFF':<10} | {'ERROR %'}")
    print("-" * 75)

    for index, row in sample.iterrows():
        house_id = row['id']
        price = row['price']
        
        # --- PREPARE INPUT 1: IMAGE ---
        img_name = f"image_{house_id}.jpg"
        img_path = os.path.join(IMG_FOLDER, img_name)
        img_array = load_image(img_path)
        
        if img_array is None:
            continue

        # --- PREPARE INPUT 2: STATS ---
        # Extract raw numbers
        raw_stats = row[FEATURE_COLS].values.reshape(1, -1)
        # SCALER MAGIC: Convert raw numbers to "AI friendly" 0-1 numbers
        stats_scaled = scaler.transform(raw_stats)

        # --- PREDICT ---
        prediction = model.predict([img_array, stats_scaled], verbose=0)[0][0]
        
        # Calculate Math
        diff = abs(price - prediction)
        error_percent = (diff / price) * 100
        total_error += diff
        
        # Formatting
        p_real = f"${int(price):,}"
        p_pred = f"${int(prediction):,}"
        p_diff = f"${int(diff):,}"
        
        print(f"{str(house_id):<12} | {p_real:<12} | {p_pred:<12} | {p_diff:<10} | {error_percent:.1f}%")

    avg_error = total_error / 10
    print("-" * 75)
    print(f"ðŸ’° AVERAGE ERROR: ${int(avg_error):,}")
    print("=" * 75)

if __name__ == "__main__":
    main()

--- ðŸŽ² RANDOM BATCH TEST (10 HOUSES) ---
Loading Brain...

ID           | REAL PRICE   | PREDICTED    | DIFF       | ERROR %
---------------------------------------------------------------------------
7942600006   | $345,000     | $364,534     | $19,534    | 5.7%
1665400025   | $259,000     | $258,649     | $350       | 0.1%
8952900204   | $810,000     | $1,103,316   | $293,316   | 36.2%
1545801410   | $276,900     | $327,137     | $50,237    | 18.1%
4459800075   | $710,000     | $817,479     | $107,479   | 15.1%
2490200320   | $545,000     | $511,005     | $33,994    | 6.2%
3024079063   | $850,000     | $1,014,651   | $164,651   | 19.4%
3523069060   | $415,000     | $210,919     | $204,080   | 49.2%
7234601142   | $665,000     | $524,886     | $140,113   | 21.1%
2460700430   | $342,000     | $286,765     | $55,235    | 16.2%
---------------------------------------------------------------------------
ðŸ’° AVERAGE ERROR: $106,899
