In [None]:
import pandas as pd

In [None]:
house_data = pd.read_csv('house_data.csv')
house_data.head(5)

In [None]:
house_data.columns

In [None]:
house_data.isnull().sum()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Select features and target
X = house_data[['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'sqft_above', 'sqft_basement', 'sqft_living15', 'sqft_lot15']]
y = house_data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor()
}

# Train and evaluate models
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[model_name] = {'MSE': mse, 'R2': r2}

results_df = pd.DataFrame(results).T

results_df


In [None]:
# Train the best model on the entire dataset
best_model = GradientBoostingRegressor()
best_model.fit(X, y)

# Function to predict house price
def predict_price(features):
    feature_names = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'sqft_above', 'sqft_basement', 'sqft_living15', 'sqft_lot15']
    input_data = pd.DataFrame([features], columns=feature_names)
    predicted_price = best_model.predict(input_data)
    return predicted_price[0]

# Example usage
example_features = {
    'bedrooms': 3,
    'bathrooms': 2,
    'sqft_living': 2000,
    'sqft_lot': 5000,
    'floors': 1,
    'waterfront': 0,
    'sqft_above': 1500,
    'sqft_basement': 500,
    'sqft_living15': 1800,
    'sqft_lot15': 6000
}

predicted_price = predict_price(example_features)
predicted_price


In [None]:
import pickle

# Save the model to a file
model_filename = 'house_price_model.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(best_model, file)

print(f"Model saved to {model_filename}")


In [2]:
# Load the model from the file
import pickle
import pandas as pd
model_filename = 'house_price_model.pkl'
with open(model_filename, 'rb') as file:
    loaded_model = pickle.load(file)

def predict_price_with_loaded_model(features):
    feature_names = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'sqft_above', 'sqft_basement', 'sqft_living15', 'sqft_lot15']
    input_data = pd.DataFrame([features], columns=feature_names)
    predicted_price = loaded_model.predict(input_data)
    return predicted_price[0]

# Example usage with the loaded model
example_features = {
    'bedrooms': 5,
    'bathrooms': 4,
    'sqft_living': 20000,
    'sqft_lot': 50000,
    'floors': 3,
    'waterfront': 1,
    'sqft_above': 15000,
    'sqft_basement': 5000,
    'sqft_living15': 18000,
    'sqft_lot15': 60000
}

predicted_price = predict_price_with_loaded_model(example_features)
print(predicted_price)


5635026.5356889
