**1.Consider two more features and implement the algorithm**

In [3]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Step 2: Load the housing dataset
db = pd.read_csv('/content/sample_data/california_housing_train.csv')

# Step 3: Select multiple features (3 in this case)
X = db[['housing_median_age', 'median_income', 'total_rooms']].values
Y = db['median_house_value'].values

# Step 4: Locally Weighted Regression Function (works for multiple features)
def locally_weighted_regression(X, Y, tau, x_query):
    weights = np.exp(-np.sum((X - x_query)**2, axis=1) / (2 * tau**2))
    X_augmented = np.c_[np.ones(X.shape[0]), X]
    x_query_augmented = np.r_[1, x_query]
    W = np.diag(weights)
    XTW = X_augmented.T @ W
    theta = np.linalg.pinv(XTW @ X_augmented) @ XTW @ Y
    return x_query_augmented @ theta

# Step 5: Predict for a sample input
X_query = np.array([30, 4.5, 2500])  # Example: 30 yrs age, 4.5 income, 2500 rooms
tau = 0.5
y_query = locally_weighted_regression(X, Y, tau, X_query)
print(f"Predicted median house value for the given input: {y_query}")


Predicted median house value for the given input: 95034.51483960787


**2.Implement the same for Diabetes dataset available in sklearn.datasets**

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler


In [5]:
# Load the dataset
diabetes = load_diabetes()
X = diabetes.data
Y = diabetes.target

# Standardize features for better performance (important for LWR)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [6]:
def locally_weighted_regression(X, Y, tau, x_query):
    weights = np.exp(-np.sum((X - x_query)**2, axis=1) / (2 * tau**2))
    X_augmented = np.c_[np.ones(X.shape[0]), X]
    x_query_augmented = np.r_[1, x_query]
    W = np.diag(weights)
    XTW = X_augmented.T @ W
    theta = np.linalg.pinv(XTW @ X_augmented) @ XTW @ Y
    return x_query_augmented @ theta


In [7]:
# Example query point (we can take the first record as a test query)
x_query = X_scaled[0]
tau = 0.5  # Try 0.1, 0.5, 1.0, etc.

y_pred = locally_weighted_regression(X_scaled, Y, tau, x_query)
print(f"Predicted diabetes progression score: {y_pred}")
print(f"Actual value: {Y[0]}")


Predicted diabetes progression score: 151.15629525417413
Actual value: 151.0


**3.Compare KNN regression and Local weighted regression (LWR) algorithm considering multiple features for both housing dataset and Diabetes dataset.**

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load housing dataset
db = pd.read_csv("/content/sample_data/california_housing_train.csv")
X = db[['housing_median_age', 'median_income', 'total_rooms']].values
Y = db['median_house_value'].values

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)


In [10]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, Y_train)
y_pred_knn = knn.predict(X_test)
mse_knn = mean_squared_error(Y_test, y_pred_knn)
print(f"Housing Dataset - KNN MSE: {mse_knn:.2f}")


Housing Dataset - KNN MSE: 6686846368.91


In [11]:
def locally_weighted_regression(X, Y, tau, x_query):
    weights = np.exp(-np.sum((X - x_query) ** 2, axis=1) / (2 * tau ** 2))
    X_augmented = np.c_[np.ones(X.shape[0]), X]
    x_query_augmented = np.r_[1, x_query]
    W = np.diag(weights)
    XTW = X_augmented.T @ W
    theta = np.linalg.pinv(XTW @ X_augmented) @ XTW @ Y
    return x_query_augmented @ theta

# Predict for a few test samples due to high cost
lwr_preds = []
for i in range(50):  # Predict on first 50 test points
    pred = locally_weighted_regression(X_train, Y_train, tau=0.5, x_query=X_test[i])
    lwr_preds.append(pred)
mse_lwr = mean_squared_error(Y_test[:50], lwr_preds)
print(f"Housing Dataset - LWR MSE (first 50): {mse_lwr:.2f}")


Housing Dataset - LWR MSE (first 50): 3972832994.23


In [12]:
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
X = diabetes.data
Y = diabetes.target

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)


In [13]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, Y_train)
y_pred_knn = knn.predict(X_test)
mse_knn = mean_squared_error(Y_test, y_pred_knn)
print(f"Diabetes Dataset - KNN MSE: {mse_knn:.2f}")


Diabetes Dataset - KNN MSE: 3019.08


In [14]:
lwr_preds = []
for i in range(50):
    pred = locally_weighted_regression(X_train, Y_train, tau=0.5, x_query=X_test[i])
    lwr_preds.append(pred)
mse_lwr = mean_squared_error(Y_test[:50], lwr_preds)
print(f"Diabetes Dataset - LWR MSE (first 50): {mse_lwr:.2f}")


Diabetes Dataset - LWR MSE (first 50): 8543.23
