In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score
import keras
from keras.models import Sequential
from keras.layers import Dense

In [3]:
df = pd.read_excel("ENB2012_data.xlsx")
df

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0,20.84,28.28
...,...,...,...,...,...,...,...,...,...,...
763,0.64,784.0,343.0,220.50,3.5,5,0.4,5,17.88,21.40
764,0.62,808.5,367.5,220.50,3.5,2,0.4,5,16.54,16.88
765,0.62,808.5,367.5,220.50,3.5,3,0.4,5,16.44,17.11
766,0.62,808.5,367.5,220.50,3.5,4,0.4,5,16.48,16.61


In [4]:
X = df.iloc[:, :-2].values
y_heating = df.iloc[:, -2].values
y_cooling = df.iloc[:, -1].values

In [5]:
X_train, X_test, y_train_h, y_test_h = train_test_split(X, y_heating, test_size=0.2, random_state=42)
_, _, y_train_c, y_test_c = train_test_split(X, y_cooling, test_size=0.2, random_state=42)

In [7]:
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train_h)
y_pred_rf = rf.predict(X_test)

In [8]:
print("=== Random Forest (Heating Load) ===")
print("RMSE:", np.sqrt(mean_squared_error(y_test_h, y_pred_rf)))
print("R²:", r2_score(y_test_h, y_pred_rf))

=== Random Forest (Heating Load) ===
RMSE: 0.49263804987213333
R²: 0.9976716239233653


In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model.fit(X_train_scaled, y_train_h, 
                    validation_split=0.2, 
                    epochs=100, batch_size=16, verbose=0)

y_pred_nn = model.predict(X_test_scaled).flatten()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


In [11]:
print("\n=== Neural Network (Heating Load) ===")
print("RMSE:", np.sqrt(mean_squared_error(y_test_h, y_pred_nn)))
print("R²:", r2_score(y_test_h, y_pred_nn))


=== Neural Network (Heating Load) ===
RMSE: 1.432020152626324
R²: 0.9803258724262713


In [None]:
'''In my domain, classical ML performed better than neural networks because the dataset
 I worked with was relatively small, structured, and tabular. 
 Random Forest was able to capture the non-linear relationships efficiently with minimal preprocessing,
   trained much faster, and generalized well without overfitting.
 In contrast, the neural network required more preprocessing (scaling), 
 careful tuning of layers and epochs, and still did not outperform the classical model. 
 This shows that for engineering and infrastructure-related datasets of this size, classical ML is 
 often more practical and effective, while neural networks may become advantageous only when dealing 
 with very large or highly complex data such as images or satellite maps.'''

''