Importing all the libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from math import sqrt

Import the set containing the data

In [None]:
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
data = pd.read_excel(data_url)

Naming the colomns

In [None]:
data.columns = [
    "Relative_Compactness", "Surface_Area", "Wall_Area", "Roof_Area",
    "Overall_Height", "Orientation", "Glazing_Area", "Glazing_Area_Distribution",
    "Heating_Load", "Cooling_Load"
]

Check and fill values

In [None]:
if data.isnull().sum().any():
    data.fillna(method='ffill', inplace=True)

Matrix formation

In [None]:
correlation_matrix = data.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix")
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
plt.boxplot(data["Heating_Load"], vert=False, patch_artist=True)
plt.title("Boxplot of Heating Load")
plt.xlabel("Heating Load")
plt.show()

In [None]:
X = data.drop(columns=["Heating_Load", "Cooling_Load"])
y = data["Heating_Load"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_np = np.c_[np.ones(X_train_scaled.shape[0]), X_train_scaled]
X_test_np = np.c_[np.ones(X_test_scaled.shape[0]), X_test_scaled]

In [None]:
weights = np.zeros(X_train_np.shape[1])

In [None]:
def gradient_descent(X, y, weights, learning_rate, iterations):
    m = len(y)
    for i in range(iterations):
        predictions = np.dot(X, weights)
        errors = predictions - y
        gradients = (1/m) * np.dot(X.T, errors)
        weights -= learning_rate * gradients
        if i % 100 == 0:  # Debugging: Print progress
            mse = np.mean(errors**2)
            print(f"Iteration {i}: MSE={mse:.4f}")
    return weights

weights = gradient_descent(X_train_np, y_train.values, weights, learning_rate=0.01, iterations=1000)
y_pred_manual = np.dot(X_test_np, weights)

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
y_pred_sklearn = linear_model.predict(X_test_scaled)

ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)

In [None]:
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} Performance:")
    print(f"MSE: {mse:.2f}, RMSE: {rmse:.2f}, R2: {r2:.2f}\n")

evaluate_model(y_test, y_pred_manual, "Manual Linear Regression")
evaluate_model(y_test, y_pred_sklearn, "Sklearn Linear Regression")
evaluate_model(y_test, y_pred_ridge, "Ridge Regression")
evaluate_model(y_test, y_pred_lasso, "Lasso Regression")

In [None]:
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.scatter(y_test, y_pred_manual, color='blue', label='Manual Model', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', label='Ideal Line')
plt.xlabel('Actual Heating Load')
plt.ylabel('Predicted Heating Load')
plt.title('Manual Linear Regression')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(y_test, y_pred_sklearn, color='green', label='Sklearn Model', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', label='Ideal Line')
plt.xlabel('Actual Heating Load')
plt.ylabel('Predicted Heating Load')
plt.title('Sklearn Linear Regression')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Coefficient': linear_model.coef_
}).sort_values(by='Coefficient', ascending=False)

print("\nFeature Importance:")
print(feature_importance)

print("\nEnergy Efficiency Suggestions:")
print("1. Focus on optimizing Relative Compactness, which has the largest positive impact on Heating Load.")
print("2. Minimize Surface Area and Roof Area to reduce heat loss.")
print("3. Experiment with different Glazing Area and Orientation to achieve energy balance.")