In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt
import joblib
import pickle

In [None]:
df = pd.read_csv("house_data.csv")

In [None]:
df.head(6)

In [None]:
# Step 4: Check for missing values
print("\nüß© Checking for missing values:")
print(df.isnull().sum())

In [None]:
features = ['sqft', 'bedrooms', 'bathrooms', 'age', 'distance_to_city_km']
target = 'price'

In [None]:
X = df[features].values
y = df[target].values
print(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()

In [None]:
model.fit(X_train, y_train)

In [None]:
print("\nüéØ Model Training Complete!")
with open("my_model.pkl", "wb") as file:  # 'wb' means write in binary mode
    pickle.dump(model, file)

print("‚úÖ Model saved successfully as 'my_model.pkl'")
print(f"Intercept: {model.intercept_:.2f}")
print("Coefficients:")
for name, coef in zip(features, model.coef_):
    print(f"  {name}: {coef:.2f}")

In [None]:
# Step 9: Predict prices for test data
y_pred = model.predict(X_test)

In [None]:
mae = metrics.mean_absolute_error(y_test, y_pred)

In [None]:
mse = metrics.mean_squared_error(y_test, y_pred)

In [None]:
rmse = mse ** 0.5

In [None]:
r2 = metrics.r2_score(y_test, y_pred)

In [None]:
print("\nüìà Model Evaluation Results:")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R¬≤ Score: {r2:.4f}")

In [None]:
r2_score_value = 0.9968

# Convert to percentage
r2_percentage = r2_score_value * 100

# Print
print(f"R¬≤ Score: {r2_score_value}")
print(f"R¬≤ Score (Percentage): {r2_percentage:.2f}%")

In [None]:
comparison = pd.DataFrame({
    'Actual': y_test[:10],
    'Predicted': y_pred[:10],
    'Difference': y_test[:10] - y_pred[:10]
})
print("\nüîç Actual vs Predicted Prices:")
print(comparison)

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\nüìä Model Evaluation:")
print(f"MAE  : {mae:.2f}")
print(f"MSE  : {mse:.2f}")
print(f"RMSE : {rmse:.2f}")
print(f"R¬≤   : {r2:.2f}")
