In [1]:
%matplotlib qt

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from lls import LLS
from sklearn.datasets import fetch_california_housing

In [3]:
california = fetch_california_housing()
data = california.data
target = california.target  
features = california.feature_names
target

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [4]:
data_df = pd.DataFrame(data, columns=features)
data_df['Target'] = target
data_df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [5]:
corr = data_df.corr()
corr_target = corr['Target'].sort_values(ascending=False) 
corr_target

Target        1.000000
MedInc        0.688075
AveRooms      0.151948
HouseAge      0.105623
AveOccup     -0.023737
Population   -0.024650
Longitude    -0.045967
AveBedrms    -0.046701
Latitude     -0.144160
Name: Target, dtype: float64

In [6]:
feature1 = np.where(np.array(features) == "MedInc")[0][0]  
feature2 = np.where(np.array(features) == "AveRooms")[0][0]  
X = data[:, [feature1, feature2]]
Y = target

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
print("Training set size:", len(X_train))
print("Testing set size:", len(X_test))

Training set size: 16512
Testing set size: 4128


In [8]:
lls = LLS()
weights = lls.calculate_weights(X_train, y_train)
print("LLS weights:", weights)

LLS weights: [ 0.59726778  0.43626089 -0.04017161]


In [9]:
y_pred_test = lls.predict(X_test)
#y_pred_test = y_pred_test.flatten()
y_pred_test

array([1.16230214, 1.49913482, 1.95573059, ..., 4.33311942, 1.59978552,
       1.98667198])

In [13]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
fig.patch.set_facecolor('#2b2b2b')  
ax.set_facecolor('#2b2b2b')  


ax.scatter(X_test[:, 0], X_test[:, 1], y_test, c='pink', marker='o', label='Actual Test Data', alpha=0.7)


x1_range = np.linspace(X_test[:, 0].min(), X_test[:, 0].max(), 10)
x2_range = np.linspace(X_test[:, 1].min(), X_test[:, 1].max(), 10)
x1_grid, x2_grid = np.meshgrid(x1_range, x2_range)
y_pred_plane = weights[0] + weights[1] * x1_grid + weights[2] * x2_grid  
ax.plot_surface(x1_grid, x2_grid, y_pred_plane, color='blue', alpha=0.5)


ax.set_title('California Housing Prices Regression', fontsize=14, color='#e0e0e0')
ax.set_xlabel('Median Income (MedInc)', fontsize=12, color='#e0e0e0')
ax.set_ylabel('Average Rooms (AveRooms)', fontsize=12, color='#e0e0e0')
ax.set_zlabel('Median House Price ($100,000s)', fontsize=12, color='#e0e0e0')


ax.tick_params(axis='x', colors='#e0e0e0')
ax.tick_params(axis='y', colors='#e0e0e0')
ax.tick_params(axis='z', colors='#e0e0e0')
ax.grid(color='#555555', linestyle='--', linewidth=0.5, alpha=0.6)

ax.legend(loc='upper left', fontsize=10, facecolor='#2b2b2b', edgecolor='#555555', framealpha=0.8)

plt.tight_layout()
plt.show()


In [11]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error

mae = mean_absolute_error(y_test, y_pred_test)
mse = mean_squared_error(y_test, y_pred_test)
rmse = root_mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R²) Score:", r2)


Mean Absolute Error (MAE): 0.624682807713972
Mean Squared Error (MSE): 0.7021550469058007
Root Mean Squared Error (RMSE): 0.8379469236806115
R-squared (R²) Score: 0.4641710652437856
