In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [7]:
df = pd.read_csv('housing.csv')
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [8]:
print(df.columns)

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value', 'ocean_proximity'],
      dtype='object')


In [9]:
df = pd.read_csv('housing.csv')
df = pd.get_dummies(df, columns=['ocean_proximity'], drop_first=True)
data = df.dropna()

In [10]:
X = data.drop('median_house_value', axis=1)
y = data['median_house_value']
print(X)
print(y)

       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0        -122.23     37.88                41.0        880.0           129.0   
1        -122.22     37.86                21.0       7099.0          1106.0   
2        -122.24     37.85                52.0       1467.0           190.0   
3        -122.25     37.85                52.0       1274.0           235.0   
4        -122.25     37.85                52.0       1627.0           280.0   
...          ...       ...                 ...          ...             ...   
20635    -121.09     39.48                25.0       1665.0           374.0   
20636    -121.21     39.49                18.0        697.0           150.0   
20637    -121.22     39.43                17.0       2254.0           485.0   
20638    -121.32     39.43                18.0       1860.0           409.0   
20639    -121.24     39.37                16.0       2785.0           616.0   

       population  households  median_income  ocean

In [11]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [13]:
def weighted_mse(y_true, y_pred):
    weights = tf.where(y_true > 200000, 2.0, 1.0)
    return tf.reduce_mean(tf.square(y_true - y_pred) * weights)

In [14]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(1)
])
print(model)

<Sequential name=sequential, built=True>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(optimizer='adam', loss=weighted_mse, metrics=['mse'])
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

Epoch 1/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 100611563520.0000 - mse: 55585652736.0000
Epoch 2/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 102582722560.0000 - mse: 56461586432.0000
Epoch 3/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 100380262400.0000 - mse: 55449612288.0000
Epoch 4/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 99839401984.0000 - mse: 55142793216.0000
Epoch 5/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 99783589888.0000 - mse: 55156600832.0000 
Epoch 6/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 99100180480.0000 - mse: 54751698944.0000
Epoch 7/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 100402184192.0000 - mse: 55376142336.0000
Epoch 8/10
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2aa9ee6a080>

In [16]:
y_pred = model.predict(X_test).flatten()
standard_mse = mean_squared_error(y_test, y_pred)
custom_mse = np.mean((y_test - y_pred)**2 * np.where(y_test > 200000, 2.0, 1.0))


[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step   


In [17]:
print("\nLoss Comparison Table:")
print(f"{'Standard MSE':<20}: {standard_mse:.2f}")
print(f"{'Custom Weighted MSE':<20}: {custom_mse:.2f}")


Loss Comparison Table:
Standard MSE        : 54518001818.06
Custom Weighted MSE : 99227027446.72
