In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder,StandardScaler

In [2]:
# Create a DataFrame with the sample data
data = {
    'House Size (sqft)': [1200, 1500, 900, 1800, 1600, 1000, 2200, 2000, 800, 2500],
    'Bedrooms': [3, 4, 2, 4, 3, 3, 5, 4, 2, 5],
    'Bathrooms': [2, 3, 1, 3, 2, 1, 4, 3, 1, 4],
    'Toilet Area (sqft)': [100, 120, 80, 150, 110, 90, 180, 160, 70, 200],
    'Location': ['Urban', 'Urban', 'Rural', 'Suburban', 'Suburban', 'Rural', 'Urban', 'Suburban', 'Rural', 'Urban'],
    'Price ($)': [350000, 450000, 200000, 500000, 420000, 250000, 650000, 550000, 180000, 750000]
}

In [3]:
df = pd.DataFrame(data)

In [4]:
# Encode the categorical 'Location' column
le = LabelEncoder()
df['Location'] = le.fit_transform(df['Location'])

In [5]:
# Features and target
X = df.drop('Price ($)', axis=1)
y = df['Price ($)']

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train a Linear Regression model
model = LinearRegression()

In [8]:
model.fit(X_train, y_train)

In [9]:
# Predict prices
y_pred = model.predict(X_test)

In [13]:
# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: ${mae:.2f}")

Mean Absolute Error: $20657.89


In [15]:
# Show predicted vs actual values
for actual, predicted in zip(y_test, y_pred):
    print(f"Actual: ${actual:.2f}, Predicted: ${predicted:.2f}")

Actual: $180000.00, Predicted: $161052.63
Actual: $450000.00, Predicted: $427631.58


In [29]:
import pickle as pkl

In [30]:
with open('model.pkl','wb') as f:
    pkl.dump(model,f)

In [31]:
with open('le.pkl','wb') as f:
    pkl.dump(le,f)