In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib
df = pd.read_csv('housing.csv')
# Independent variable
X = df[['total_rooms']]
# Dependent variable
y = df['median_house_value']

# Scale the independent variable
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize the model
model = LinearRegression()
# Train the model
model.fit(X_scaled, y)

# Make predictions
y_pred = model.predict(X_scaled)
# Display the first few predicted values
print("Predicted House Values:")
print(y_pred[:5])

# Calculate evaluation metrics
mae = mean_absolute_error(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))
r2 = r2_score(y, y_pred)

# Print the results
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R²):", r2)

joblib.dump(model, "house_model.pkl")
joblib.dump(scaler, "scaler.pkl")

Predicted House Values:
[194396.9708832  238526.82203164 198562.30656806 197192.78393233
 199697.662121  ]
Mean Absolute Error (MAE): 89931.67504919827
Root Mean Squared Error (RMSE): 114349.7403659444
R-squared (R²): 0.017997057943996753


['scaler.pkl']