In [1]:
# Create a dataset using pandas for the following attbituts and then predict the housing prices using neural network:

# Area: The total area of the house in square feet
# Bedrooms: The number of bedrooms in the house.
# Bathrooms: The number of bathrooms in the house.
# Age: The age of the house in years.
# Location: The neighborhood or area where the house is located.
# Garage Size: The size of the garage in square feet.
# Yard Size: The size of the yard or outdoor space in square feet.
# Amenities: A binary feature indicating whether the house has additional amenities such as a swimming pool, gym, etc.
# School Rating: The rating of nearby schools, on a scale from 1 to 10.
# Distance to City Center: The distance of the house from the city center in miles.
# Price: The selling price of the house.

# dataset example:
# data = { 'Area': [2000, 1800, 2500, 2200, 1900, 2800, 2100, 1700, 2400, 2000],
#         'Bedrooms': [3, 2, 4, 3, 2, 5, 4, 2, 3, 3],
#         'Bathrooms': [2, 1.5, 3, 2.5, 2, 3.5, 2.5, 1, 3, 2],
#         'Age': [10, 5, 15, 8, 3, 20, 12, 6, 18, 9],
#         'Location': ['Suburban', 'Urban', 'Rural', 'Suburban', 'Urban', 'Rural', 'Suburban', 'Urban', 'Rural', 'Suburban'],
#         'Garage_Size': [400, 300, 500, 450, 350, 600, 400, 250, 550, 400],
#         'Yard_Size': [800, 600, 1000, 900, 700, 1200, 800, 500, 1100, 800],
#         'Amenities': [1, 0, 1, 1, 0, 1, 1, 0, 1, 1],
#         'School_Rating': [8, 7, 6, 9, 8, 5, 7, 6, 4, 8],
#         'Distance_to_City_Center': [5, 2, 10, 7, 4, 15, 6, 3, 12, 5],
#         'Price': [300000, 250000, 350000, 320000, 280000, 400000, 310000, 240000, 370000, 300000]
#        }

In [2]:
# data = pd.read_csv('housing_data.csv')
# print(data.head())  # Prints the first 5 rows of the CSV file

In [3]:
# Create the dataset
data = {
    'Area': [2000, 1800, 2500, 2200, 1900, 2800, 2100, 1700, 2400, 2000],
    'Bedrooms': [3, 2, 4, 3, 2, 5, 4, 2, 3, 3],
    'Bathrooms': [2, 1.5, 3, 2.5, 2, 3.5, 2.5, 1, 3, 2],
    'Age': [10, 5, 15, 8, 3, 20, 12, 6, 18, 9],
    'Location': ['Suburban', 'Urban', 'Rural', 'Suburban', 'Urban', 'Rural', 'Suburban', 'Urban', 'Rural', 'Suburban'],
    'Garage_Size': [400, 300, 500, 450, 350, 600, 400, 250, 550, 400],
    'Yard_Size': [800, 600, 1000, 900, 700, 1200, 800, 500, 1100, 800],
    'Amenities': [1, 0, 1, 1, 0, 1, 1, 0, 1, 1],
    'School_Rating': [8, 7, 6, 9, 8, 5, 7, 6, 4, 8],
    'Distance_to_City_Center': [5, 2, 10, 7, 4, 15, 6, 3, 12, 5],
    'Price': [300000, 250000, 350000, 320000, 280000, 400000, 310000, 240000, 370000, 300000]
}

In [4]:
import pandas as pd

In [5]:
# Convert the data into a pandas DataFrame
housing_data = pd.DataFrame(data)

In [6]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [7]:
# Preprocess the data
# Encode the 'Location' column
label_encoder = LabelEncoder()
housing_data['Location'] = label_encoder.fit_transform(housing_data['Location'])

In [8]:
# housing_data.head()

In [9]:
# Split the data into features and target
X = housing_data.drop('Price', axis=1)
y = housing_data['Price']

In [10]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [13]:
from tensorflow import keras
import tensorflow as tf

In [14]:
# Build the neural network model
model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),  # Define the input shape separately
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)  # Output layer
])

In [15]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [16]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 99749814272.0000 - mae: 311666.4062 - val_loss: 99249946624.0000 - val_mae: 314999.9375
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - loss: 99749765120.0000 - mae: 311666.3438 - val_loss: 99249930240.0000 - val_mae: 314999.8750
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - loss: 99749732352.0000 - mae: 311666.2812 - val_loss: 99249913856.0000 - val_mae: 314999.8750
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 99749699584.0000 - mae: 311666.2188 - val_loss: 99249897472.0000 - val_mae: 314999.8438
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - loss: 99749650432.0000 - mae: 311666.1562 - val_loss: 99249897472.0000 - val_mae: 314999.8438
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - loss: 9974961766

In [17]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Mean Absolute Error on test set: {test_mae}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 99677151232.0000 - mae: 309962.7812
Mean Absolute Error on test set: 309962.78125


In [18]:
# Predict the prices for the test set
predictions = model.predict(X_test)
print("Predicted Prices:", predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Predicted Prices: [[35.29765 ]
 [39.143463]]


In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [20]:
# Calculate additional metrics
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f"Mean Squared Error on test set: {mse}")
print(f"R-squared on test set: {r2}")

Mean Squared Error on test set: 99677155393.01178
R-squared on test set: -26.68809700012207
