<a href="https://colab.research.google.com/github/munyanza/house_prediction_model/blob/main/housing_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
#creating synthetic dataset
np.random.seed(42)
data = {
    'SquareFootage': np.random.randint(800, 4000, 20),
    'Bedrooms': np.random.randint(1, 6, 20),
    'Bathrooms': np.random.randint(1, 4, 20),
    'YearBuilt': np.random.randint(1950, 2025, 20),
    'Garage': np.random.randint(0, 3, 20),
    'LocationScore' :np.random.uniform(1, 10, 20).round(1),
    'Price' :np.random.randint(150000, 950000, 20)
}

#Add some correction between features and price
data['Price'] = (data['SquareFootage'] * 150 +
                 data['Bedrooms'] * 20000 +
                 data['Bathrooms'] * 15000 +
                 data['LocationScore'] * 10000 +
                 (2023 - data['YearBuilt']) * -500 +
                 data['Garage'] * 10000 +
                 np.random.randint(-50000, 50000, 20))
df = pd.DataFrame(data)
print(df.head(10))

   SquareFootage  Bedrooms  Bathrooms  YearBuilt  Garage  LocationScore  \
0           3974         2          1       1985       0            7.4   
1           1660         4          3       1999       0            7.6   
2           2094         5          3       1953       1            7.9   
3           1930         1          1       1951       2            1.7   
4           1895         4          1       1955       0            4.2   
5           3892         2          3       2003       1            2.0   
6           2438         5          2       1953       0            8.8   
7           2969         4          1       2003       0            6.6   
8           1266         1          2       2012       0            4.0   
9           2038         1          2       1967       0            1.6   

      Price  
0  689927.0  
1  443820.0  
2  525723.0  
3  357234.0  
4  412700.0  
5  732226.0  
6  520999.0  
7  589935.0  
8  288444.0  
9  336257.0  


**Housing Price Prediction Model**

In [None]:
#Prepare data for modelling
X = df.drop('Price',axis=1)
y = df['Price']

#Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

#Create and train linear regression model
model = LinearRegression()
model.fit(X_train,y_train)

#Make predictions
y_pred = model.predict(X_test)

#Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error:{mse:.2f}")
print(f"R-squaared Score:{r2:.2f}")

#Show predictions vs actual
results = pd.DataFrame({'Actual':y_test,'Predicted':y_pred.round()})
print("\nPrediction Results:")
print(results)


Model Evaluation:
Mean Squared Error:537168770.44
R-squaared Score:0.98

Prediction Results:
      Actual  Predicted
0   689927.0   729476.0
17  416291.0   421263.0
15  259439.0   262905.0
1   443820.0   420414.0


**Predicting New House Prices**

In [None]:
#New house data
new_houses = pd.DataFrame({
    'SquareFootage' : [1500,3000],
    'Bedrooms' : [3, 4],
    'Bathrooms' : [2, 3],
    'YearBuilt' : [2010, 1995],
    'Garage' : [1, 2],
    'LocationScore' : [7.5, 8.0]
})

#Make predictions
new_predictions = model.predict(new_houses)

print("\nNew House Predictions:")
for i, price in enumerate(new_predictions):
  print(f'House {i+1}: ${price:,.2f}')


New House Predictions:
House 1: $381,960.06
House 2: $660,590.77
