In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Create a DataFrame with sample house price data
data_dict = {
    'size': [1500, 2000, 1200, 1800, 1600, 2200, 1400, 2400],
    'bedrooms': [3, 4, 2, 3, 3, 4, 2, 4],
    'location': ['Suburb', 'City Center', 'Suburb', 'City Center', 'Suburb', 'City Center', 'Suburb', 'City Center'],
    'price': [300000, 500000, 250000, 450000, 320000, 550000, 270000, 600000]
}

# Convert the dictionary to a DataFrame
data = pd.DataFrame(data_dict)

# Display the first few rows of the DataFrame
print("Initial DataFrame:")
print(data.head())

# Encode categorical data (e.g., location) as numerical values
data['location'] = data['location'].astype('category').cat.codes

# Define features and target variable
X = data[['size', 'bedrooms', 'location']]
y = data['price']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)

print("\nModel Evaluation:")
print(f'Mean Absolute Error: {mae}')

# Predicting house prices for new data
new_data = pd.DataFrame({
    'size': [1700, 2500],
    'bedrooms': [3, 4],
    'location': ['Suburb', 'City Center']
})

# Encode the new data
new_data['location'] = new_data['location'].astype('category').cat.codes

# Predict house prices
price_predictions = model.predict(new_data)

print("\nPrice Predictions for New Data:")
for i, price in enumerate(price_predictions):
    print(f'House {i+1} price prediction: ${price:,.2f}')


Initial DataFrame:
   size  bedrooms     location   price
0  1500         3       Suburb  300000
1  2000         4  City Center  500000
2  1200         2       Suburb  250000
3  1800         3  City Center  450000
4  1600         3       Suburb  320000

Model Evaluation:
Mean Absolute Error: 2721.5189873420168

Price Predictions for New Data:
House 1 price prediction: $347,088.61
House 2 price prediction: $614,936.71
