In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt
import joblib
import pickle

In [22]:
df = pd.read_csv("house_data.csv")

In [23]:
df.head(6)

Unnamed: 0,sqft,bedrooms,bathrooms,age,distance_to_city_km,price
0,1360,2,1,31,2.63,418.41
1,4272,3,3,2,6.56,1348.51
2,3592,1,2,2,11.75,1116.05
3,966,1,2,27,29.47,295.08
4,4926,2,1,43,23.65,1433.81
5,3944,5,3,14,22.14,1198.06


In [24]:
# Step 4: Check for missing values
print("\nüß© Checking for missing values:")
print(df.isnull().sum())


üß© Checking for missing values:
sqft                   0
bedrooms               0
bathrooms              0
age                    0
distance_to_city_km    0
price                  0
dtype: int64


In [25]:
features = ['sqft', 'bedrooms', 'bathrooms', 'age', 'distance_to_city_km']
target = 'price'

In [37]:
X = df[features].values
y = df[target].values
print(y)

[ 418.41 1348.51 1116.05  295.08 1433.81 1198.06 1125.26  973.17  245.18
  709.36  301.17  858.62  947.16  568.95 1270.74 1407.16 1026.82  315.06
  499.91  893.59  831.08  956.82  463.77  736.94  197.51  952.24 1094.23
  712.45  622.87  652.55 1145.45 1289.24 1289.35  311.76 1052.92  892.59
  471.18  824.53  738.6   255.19  822.77 1002.33  952.54  541.09  664.52
  903.45  404.91 1392.8   187.75 1074.91  761.64  668.35 1467.42 1132.29
  515.1  1282.19 1069.21 1296.15  651.6   226.89 1432.72  757.89  493.46
 1131.24 1338.98  478.68  508.48  255.22  432.75  482.27 1379.76  277.91
  288.96  770.31 1324.32  235.85  954.64  761.86  963.87  569.24  229.61
 1520.1  1205.98  722.58 1049.37  191.39 1018.77  902.68 1253.68  458.02
  936.87  997.98  676.51  913.11 1410.98  500.7   335.06 1525.23  377.2
 1570.81  240.23  643.45  413.66  982.65  318.12 1377.73  662.47  593.05
 1123.91  454.53  622.46 1182.52 1144.05 1040.68  915.82  678.45 1031.34
  954.79 1227.09  567.69  819.11  619.46  607.89  76

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [28]:
model = LinearRegression()

In [29]:
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [30]:
print("\nüéØ Model Training Complete!")
with open("my_model.pkl", "wb") as file:  # 'wb' means write in binary mode
    pickle.dump(model, file)

print("‚úÖ Model saved successfully as 'my_model.pkl'")
print(f"Intercept: {model.intercept_:.2f}")
print("Coefficients:")
for name, coef in zip(features, model.coef_):
    print(f"  {name}: {coef:.2f}")


üéØ Model Training Complete!
‚úÖ Model saved successfully as 'my_model.pkl'
Intercept: -1.55
Coefficients:
  sqft: 0.30
  bedrooms: 14.52
  bathrooms: 20.02
  age: -1.14
  distance_to_city_km: -2.02


In [None]:
# Step 9: Predict prices for test data
y_pred = model.predict(X_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)

In [None]:
mse = metrics.mean_squared_error(y_test, y_pred)

In [None]:
rmse = mse ** 0.5

In [None]:
r2 = metrics.r2_score(y_test, y_pred)

In [None]:
print("\nüìà Model Evaluation Results:")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R¬≤ Score: {r2:.4f}")

In [None]:
r2_score_value = 0.9968

# Convert to percentage
r2_percentage = r2_score_value * 100

# Print
print(f"R¬≤ Score: {r2_score_value}")
print(f"R¬≤ Score (Percentage): {r2_percentage:.2f}%")

In [None]:
comparison = pd.DataFrame({
    'Actual': y_test[:10],
    'Predicted': y_pred[:10],
    'Difference': y_test[:10] - y_pred[:10]
})
print("\nüîç Actual vs Predicted Prices:")
print(comparison)

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\nüìä Model Evaluation:")
print(f"MAE  : {mae:.2f}")
print(f"MSE  : {mse:.2f}")
print(f"RMSE : {rmse:.2f}")
print(f"R¬≤   : {r2:.2f}")


In [None]:
import joblib
import numpy as np

# üîπ Step 1: Load the trained model (.pkl file)
model = joblib.load("my_model.pkl")   # change file name if needed

print("üè† House Price Prediction üè†\n")

# üîπ Step 2: Take input from user
sqft = float(input("Enter area in square feet: "))
bedrooms = int(input("Enter number of bedrooms: "))
bathrooms = int(input("Enter number of bathrooms: "))
age = int(input("Enter age of the house (in years): "))
distance_to_city_km = float(input("Enter distance to city (in km): "))

# üîπ Step 3: Prepare input for model (2D array)
features = np.array([[sqft, bedrooms, bathrooms, age, distance_to_city_km]])

# üîπ Step 4: Predict the price
predicted_price = model.predict(features)

# üîπ Step 5: Show output
print(f"\nüí∞ Predicted House Price: ‚Çπ{predicted_price[0]:,.2f}")
