In [5]:
# Test House Price Prediction Model with Your Own Inputs

import pandas as pd
import numpy as np
import joblib

print("="*60)
print("HOUSE PRICE PREDICTION - TESTING")
print("="*60)

# ============================================================================
# 1. LOAD SAVED MODELS AND SCALER
# ============================================================================
print("\n[1/3] Loading saved models and scaler...")
try:
    lr_model = joblib.load('linear_regression_model.pkl')
    dt_model = joblib.load('decision_tree_model.pkl')
    rf_model = joblib.load('random_forest_model.pkl')
    scaler = joblib.load('scaler.pkl')
    feature_names = joblib.load('feature_names.pkl')
    print("✓ All models and scaler loaded successfully!")
    print(f"✓ Total features expected: {len(feature_names)}")
except FileNotFoundError as e:
    print(f"  Error: {e}")
    print("Please make sure you've run the training script first!")
    exit()

# ============================================================================
# 2. DEFINE YOUR CUSTOM INPUT
# ============================================================================
print("\n[2/3] Preparing your custom input...")

# YOUR CUSTOM HOUSE DETAILS HERE
# Modify these values to test different houses
custom_house = {
    # Basic features
    'bedrooms': 4,
    'bathrooms': 2,
    'living_area': 2500,
    'lot_area': 5000,
    'number of floors': 2.0,
    'number of views': 3,
    'area_no_basement': 2000,
    'basement_area': 500,
    'built_year': 2010,
    'renovation_year': 0,
    'living_area_renov': 2500,
    'lot_area_renov': 5000,
    'schools_nearby': 3,
    'distance_airport': 25,
    
    # Categorical features (these will be one-hot encoded)
    'waterfront present': 0,  # 0 or 1
    'condition of the house': 3,  # typically 1-5
    'grade of the house': 7,  # typically 1-13
}

print("\n  House Details You're Testing:")
print("-" * 60)
print(f"  Bedrooms: {custom_house['bedrooms']}")
print(f"  Bathrooms: {custom_house['bathrooms']}")
print(f"  Living Area: {custom_house['living_area']} sq ft")
print(f"  Lot Area: {custom_house['lot_area']} sq ft")
print(f"  Number of Floors: {custom_house['number of floors']}")
print(f"  Built Year: {custom_house['built_year']}")
print(f"  Renovated: {'Yes' if custom_house['renovation_year'] > 0 else 'No'}")
print(f"  Schools Nearby: {custom_house['schools_nearby']}")
print(f"  Distance from Airport: {custom_house['distance_airport']} km")
print(f"  Waterfront: {'Yes' if custom_house['waterfront present'] else 'No'}")
print(f"  Condition: {custom_house['condition of the house']}/5")
print(f"  Grade: {custom_house['grade of the house']}/13")
print("-" * 60)

# ============================================================================
# 3. ENGINEER FEATURES (same as training)
# ============================================================================
# Calculate engineered features
custom_house['house_age'] = 2024 - custom_house['built_year']
custom_house['is_renovated'] = 1 if custom_house['renovation_year'] > 0 else 0
custom_house['years_since_renovation'] = (2024 - custom_house['renovation_year'] 
                                          if custom_house['renovation_year'] > 0 
                                          else custom_house['house_age'])
custom_house['total_rooms'] = custom_house['bedrooms'] + custom_house['bathrooms']

# Create initial dataframe
input_df = pd.DataFrame([custom_house])

# Apply one-hot encoding (same as training)
categorical_cols = ['waterfront present', 'condition of the house', 'grade of the house']

for col in categorical_cols:
    if col in input_df.columns:
        input_df = pd.get_dummies(input_df, columns=[col], prefix=col, drop_first=True)

# ============================================================================
# 4. ALIGN FEATURES WITH TRAINING DATA
# ============================================================================
# Create a dataframe with all expected features initialized to 0
aligned_input = pd.DataFrame(columns=feature_names)
aligned_input.loc[0] = 0  # Initialize all values to 0

# Copy over the values that exist in our input
for col in input_df.columns:
    if col in aligned_input.columns:
        aligned_input[col] = input_df[col].values[0]

# Convert to numeric (important!)
aligned_input = aligned_input.apply(pd.to_numeric, errors='coerce').fillna(0)

# Scale only the numerical features that were scaled during training
# Get the numerical feature names from the scaler
scaled_feature_indices = []
for i, feature in enumerate(feature_names):
    # Check if this feature should be scaled (not a one-hot encoded dummy)
    if not any(prefix in feature for prefix in ['waterfront present_', 'condition of the house_', 'grade of the house_']):
        scaled_feature_indices.append(i)

# Create a copy for scaling
input_for_prediction = aligned_input.copy()

# Scale the features
features_to_scale = [feature_names[i] for i in scaled_feature_indices]
input_for_prediction[features_to_scale] = scaler.transform(aligned_input[features_to_scale])

print(f"\n✓ Input prepared with {len(feature_names)} features")

# ============================================================================
# 5. MAKE PREDICTIONS WITH ALL MODELS
# ============================================================================
print("\n[3/3] Making Predictions...")
print("\n" + "="*60)
print("PREDICTION RESULTS")
print("="*60)

models = {
    'Linear Regression': lr_model,
    'Decision Tree': dt_model,
    'Random Forest': rf_model
}

predictions = {}

for model_name, model in models.items():
    predicted_price = model.predict(input_for_prediction)[0]
    predictions[model_name] = predicted_price
    print(f"\n{model_name}:")
    print(f"  Predicted Price: ₹{predicted_price:,.2f}")

# Calculate average prediction
avg_prediction = np.mean(list(predictions.values()))
print("\n" + "-"*60)
print(f"Average Prediction (across all models): ₹{avg_prediction:,.2f}")
print("="*60)

# ============================================================================
# 6. HELPER FUNCTION FOR MULTIPLE PREDICTIONS
# ============================================================================
def predict_house_price(house_details, model=rf_model):
    """
    Predict house price given house details
    
    Parameters:
    house_details: dict with keys like 'bedrooms', 'bathrooms', etc.
    model: trained model to use (default: Random Forest)
    
    Returns:
    predicted_price: float
    """
    # Add engineered features
    house_details['house_age'] = 2024 - house_details['built_year']
    house_details['is_renovated'] = 1 if house_details.get('renovation_year', 0) > 0 else 0
    house_details['years_since_renovation'] = (2024 - house_details.get('renovation_year', 0)
                                               if house_details.get('renovation_year', 0) > 0
                                               else house_details['house_age'])
    house_details['total_rooms'] = house_details['bedrooms'] + house_details['bathrooms']
    
    # Create dataframe
    df = pd.DataFrame([house_details])
    
    # One-hot encode
    for col in ['waterfront present', 'condition of the house', 'grade of the house']:
        if col in df.columns:
            df = pd.get_dummies(df, columns=[col], prefix=col, drop_first=True)
    
    # Align with training features
    aligned = pd.DataFrame(columns=feature_names)
    aligned.loc[0] = 0
    for col in df.columns:
        if col in aligned.columns:
            aligned[col] = df[col].values[0]
    aligned = aligned.apply(pd.to_numeric, errors='coerce').fillna(0)
    
    # Scale features
    scaled_features = [f for f in feature_names if not any(p in f for p in ['waterfront present_', 'condition of the house_', 'grade of the house_'])]
    aligned[scaled_features] = scaler.transform(aligned[scaled_features])
    
    # Predict
    return model.predict(aligned)[0]

# ============================================================================
# 7. ADDITIONAL TESTING - MULTIPLE SCENARIOS
# ============================================================================
print("\n\n" + "="*60)
print("TESTING MULTIPLE SCENARIOS")
print("="*60)

test_scenarios = [
    {
        'name': 'Budget Home',
        'bedrooms': 2,
        'bathrooms': 1.0,
        'living_area': 1200,
        'lot_area': 2500,
        'number of floors': 1.0,
        'number of views': 0,
        'area_no_basement': 1200,
        'basement_area': 0,
        'built_year': 1990,
        'renovation_year': 0,
        'living_area_renov': 1200,
        'lot_area_renov': 2500,
        'schools_nearby': 1,
        'distance_airport': 40,
        'waterfront present': 0,
        'condition of the house': 3,
        'grade of the house': 6
    },
    {
        'name': 'Mid-Range Home',
        'bedrooms': 3,
        'bathrooms': 2.0,
        'living_area': 1800,
        'lot_area': 4000,
        'number of floors': 1.5,
        'number of views': 2,
        'area_no_basement': 1400,
        'basement_area': 400,
        'built_year': 2005,
        'renovation_year': 0,
        'living_area_renov': 1800,
        'lot_area_renov': 4000,
        'schools_nearby': 2,
        'distance_airport': 30,
        'waterfront present': 0,
        'condition of the house': 4,
        'grade of the house': 7
    },
    {
        'name': 'Luxury Home',
        'bedrooms': 5,
        'bathrooms': 3.5,
        'living_area': 4000,
        'lot_area': 8000,
        'number of floors': 2.5,
        'number of views': 4,
        'area_no_basement': 3200,
        'basement_area': 800,
        'built_year': 2018,
        'renovation_year': 0,
        'living_area_renov': 4000,
        'lot_area_renov': 8000,
        'schools_nearby': 4,
        'distance_airport': 15,
        'waterfront present': 1,
        'condition of the house': 5,
        'grade of the house': 11
    }
]

print("\nUsing Random Forest Model:\n")

for scenario in test_scenarios:
    price = predict_house_price(scenario, rf_model)
    print(f"{scenario['name']}:")
    print(f"  {scenario['bedrooms']} bed, {scenario['bathrooms']} bath, {scenario['living_area']} sqft")
    print(f"  Built: {scenario['built_year']}, Grade: {scenario['grade of the house']}/13")
    print(f"  Predicted Price: ₹{price:,.2f}\n")

print("="*60)
print("TESTING COMPLETE!")
print("="*60)
print("\n  TIP: Modify the 'custom_house' dictionary at line 33 to test your own inputs!")

HOUSE PRICE PREDICTION - TESTING

[1/3] Loading saved models and scaler...
✓ All models and scaler loaded successfully!
✓ Total features expected: 32

[2/3] Preparing your custom input...

  House Details You're Testing:
------------------------------------------------------------
  Bedrooms: 4
  Bathrooms: 2
  Living Area: 2500 sq ft
  Lot Area: 5000 sq ft
  Number of Floors: 2.0
  Built Year: 2010
  Renovated: No
  Schools Nearby: 3
  Distance from Airport: 25 km
  Waterfront: No
  Condition: 3/5
  Grade: 7/13
------------------------------------------------------------

✓ Input prepared with 32 features

[3/3] Making Predictions...

PREDICTION RESULTS

Linear Regression:
  Predicted Price: ₹293,514.42

Decision Tree:
  Predicted Price: ₹891,642.86

Random Forest:
  Predicted Price: ₹695,197.89

------------------------------------------------------------
Average Prediction (across all models): ₹626,785.06


TESTING MULTIPLE SCENARIOS

Using Random Forest Model:

Budget Home:
  2 bed