## Import Data

In [3]:
import pandas as pd
import statsmodels.formula.api as smf

# Load the dataset
url = "https://raw.githubusercontent.com/opencampus-sh/einfuehrung-in-data-science-und-ml/main/house_pricing_data/house_pricing_train.csv"
house_pricing = pd.read_csv(url)

house_pricing.head()



Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,...,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,price
0,6840701095,20150403T000000,3,1.0,1740,4400,1.5,0,0,3,...,1740,0,1924,0,98122,47.6059,-122.3,1720,4400,548500.0
1,1025049114,20140717T000000,3,2.25,1270,1566,2.0,0,0,3,...,1060,210,2014,0,98105,47.6647,-122.284,1160,1327,625504.0
2,4025300360,20150326T000000,3,2.0,1130,16875,1.0,0,0,4,...,1130,0,1947,0,98155,47.7489,-122.3,1600,14300,349500.0
3,5536500200,20140918T000000,5,3.5,3760,4857,2.0,0,3,3,...,2820,940,2004,0,98072,47.7398,-122.167,3000,5693,730000.0
4,1245003660,20150321T000000,3,2.0,1470,6000,1.0,0,0,3,...,1090,380,1950,1996,98033,47.6829,-122.202,1880,7799,630000.0


## Fit the Linear Model

In [2]:
# Fit the linear model
mod = smf.ols('price ~ sqft_lot15 + C(condition)', data=house_pricing).fit()

# Print the summary
print(mod.summary())

                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.014
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     49.39
Date:                Thu, 12 Dec 2024   Prob (F-statistic):           5.82e-51
Time:                        07:50:14   Log-Likelihood:            -2.4603e+05
No. Observations:               17290   AIC:                         4.921e+05
Df Residuals:                   17284   BIC:                         4.921e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept          3.261e+05   7.05e+0

## Predict the price for a new house

In [6]:
# Create a new house with the following features:
sqft_lot15 = 10000
condition = 3

# Print the input
print("New house features:")
print(f"sqft_lot15: {sqft_lot15}, condition: {condition}")

# Predict the price for this house
new_house = pd.DataFrame({'sqft_lot15': [sqft_lot15], 'condition': [condition]})
predicted_price = mod.predict(new_house)
print(f"\nPredicted price for the new house: ${predicted_price[0]:.2f}")

New house features:
sqft_lot15: 10000, condition: 3

Predicted price for the new house: $540541.17
