In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler


In [3]:
df = pd.read_csv("housing.csv")

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 489 entries, 0 to 488
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   RM       489 non-null    float64
 1   LSTAT    489 non-null    float64
 2   PTRATIO  489 non-null    float64
 3   MEDV     489 non-null    float64
dtypes: float64(4)
memory usage: 15.4 KB


In [4]:
df.head()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0


In [6]:
# Features and target variable
X = df.drop(columns=['MEDV'])  # MEDV is the target (Median value of owner-occupied homes)
y = df['MEDV']

# Standardize the features for better performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert the scaled features back to a DataFrame for easier handling
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [8]:
# Create the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)


In [9]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared: {r2:.2f}")


Mean Squared Error (MSE): 6789025559.27
R-squared: 0.69


In [17]:
# Example: Predict the price for a new set of features (after scaling)
example_features = pd.DataFrame({

    'RM': [6.575],
    'LSTAT': [4.98],
    'PTRATIO': [15.3]

})

# Scale the new data using the same scaler
example_features_scaled = scaler.transform(example_features)

# Predict the price
predicted_price = model.predict(example_features_scaled)
print(f"Predicted Housing Price: ${predicted_price[0]:.2f}")


Predicted Housing Price: $633616.89


