In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv("data.csv")

# Separate features and target variable
X = data.drop(columns=['price'])
y = data['price']

# Define preprocessing for numerical and categorical features
numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 
                      'condition', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated']
categorical_features = ['street', 'city', 'statezip', 'country']

# Preprocessing pipelines
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the model
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output performance
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R²):", r2)

def predict_price(bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view, condition, sqft_above, 
                  sqft_basement, yr_built, yr_renovated, street, city, statezip, country):
    # Create a DataFrame for the input data
    input_data = pd.DataFrame([{
        'bedrooms': bedrooms,
        'bathrooms': bathrooms,
        'sqft_living': sqft_living,
        'sqft_lot': sqft_lot,
        'floors': floors,
        'waterfront': waterfront,
        'view': view,
        'condition': condition,
        'sqft_above': sqft_above,
        'sqft_basement': sqft_basement,
        'yr_built': yr_built,
        'yr_renovated': yr_renovated,
        'street': street,
        'city': city,
        'statezip': statezip,
        'country': country
    }])

    # Predict and return the price
    predicted_price = model.predict(input_data)
    return predicted_price[0]

# Example usage of the function
predicted_price = predict_price(
    bedrooms=3, bathrooms=1.5, sqft_living=1340, sqft_lot=7912, floors=1.5, waterfront=0, view=0, condition=3,
    sqft_above=1340, sqft_basement=0, yr_built=1955, yr_renovated=2005, 
    street="18810 Densmore Ave N", city="Shoreline", statezip="WA 98133", country="USA"
)

print("Predicted Price:", predicted_price)


Mean Squared Error (MSE): 48738831302.643234
Mean Absolute Error (MAE): 126283.10197475986
R-squared (R²): 0.670568767651011
Predicted Price: 313000.89671919256


In [1]:
import pickle

In [5]:
# Save the trained model to a pickle file
with open('house_price_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)
