# Housing Market Analysis using Python
**Author:** Phong
**Date:** August 2025

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

housing_data = {
    'SquareFeet': [850, 900, 1000, 1200, 1500, 1800, 2000, 2200, 2500],
    'Price': [150000, 160000, 170000, 200000, 240000, 280000, 310000, 340000, 400000],
    'Bedrooms': [2, 2, 3, 3, 3, 4, 4, 4, 5]
}
df = pd.DataFrame(housing_data)

print(df.head())
print(df.info())
print(df.describe())

print("Missing values per column:\n", df.isnull().sum())

plt.figure(figsize=(8,5))
sns.histplot(df['Price'], bins=5, kde=True)
plt.title('Distribution of House Prices')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.show()

plt.figure(figsize=(8,5))
sns.scatterplot(x='SquareFeet', y='Price', data=df, hue='Bedrooms', palette='viridis')
plt.title('Price vs. Square Footage by Bedrooms')
plt.xlabel('Square Feet')
plt.ylabel('Price')
plt.show()

print('Mean Price:', df['Price'].mean())
print('Median Price:', df['Price'].median())

X = df[['SquareFeet']]
y = df['Price']
model = LinearRegression()
model.fit(X, y)

print('Slope:', model.coef_[0])
print('Intercept:', model.intercept_)

new_sqft = np.array([[1300], [2100]])
predicted_prices = model.predict(new_sqft)
print('Predicted Prices for 1300 and 2100 sq ft:', predicted_prices)

plt.figure(figsize=(8,5))
plt.scatter(df['SquareFeet'], df['Price'], color='blue', label='Actual Prices')
plt.plot(df['SquareFeet'], model.predict(X), color='red', label='Regression Line')
plt.title('Linear Regression: Price vs. Square Footage')
plt.xlabel('Square Feet')
plt.ylabel('Price')
plt.legend()
plt.show()

### Conclusion
- Larger houses tend to have higher prices.
- Prices increase approximately $140 per square foot in this dataset.
- Bedrooms also play a role in price variation.
- Future work: add location, age of house, and amenities for better predictions.