In [138]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import umap
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.feature_selection import RFE
from sklearn.linear_model import Ridge
from sklearn.decomposition import FastICA

In [139]:
df = pd.read_csv('/content/ElectricityBill.csv')


numerical_cols = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM',
                   'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate',
                   'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time',
                   'Energy_Per_SqM', 'Number_of_Residents']

categorical_cols = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']

# Fill missing values....... fill with mean median mode depending on data type

# For numerical columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].median())
# For categorical columns
df[categorical_cols] = df[categorical_cols].fillna(df[categorical_cols].mode().iloc[0])

# One-Hot Encoding for categorical columns
df = pd.get_dummies(df, columns=categorical_cols)

# Defining the features and target variable
X = df.drop('Electricity_Bill', axis=1)
Y = df['Electricity_Bill']

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [144]:
# Define number of components to try
n_components_list = [4, 5, 6, 8]

results = {}

for n_components in n_components_list:
    # Apply ICA
    ica = FastICA(n_components=n_components, random_state=42)
    X_train_ica = ica.fit_transform(X_train)
    X_test_ica = ica.transform(X_test)

    # Train Ridge Regression model
    model = Ridge()
    model.fit(X_train_ica, Y_train)

    # Predict and evaluate
    Y_train_pred = model.predict(X_train_ica)
    Y_test_pred = model.predict(X_test_ica)

    #performance
    metrics = {
        'MSE_train': mean_squared_error(Y_train, Y_train_pred),
        'RMSE_train': mean_squared_error(Y_train, Y_train_pred, squared=False),
        'MAE_train': mean_absolute_error(Y_train, Y_train_pred),
        'R2_train': r2_score(Y_train, Y_train_pred),
        'Adjusted_R2_train': 1 - (1 - r2_score(Y_train, Y_train_pred)) * (len(Y_train) - 1) / (len(Y_train) - X_train_ica.shape[1] - 1),

        'MSE_test': mean_squared_error(Y_test, Y_test_pred),
        'RMSE_test': mean_squared_error(Y_test, Y_test_pred, squared=False),
        'MAE_test': mean_absolute_error(Y_test, Y_test_pred),
        'R2_test': r2_score(Y_test, Y_test_pred),
        'Adjusted_R2_test': 1 - (1 - r2_score(Y_test, Y_test_pred)) * (len(Y_test) - 1) / (len(Y_test) - X_test_ica.shape[1] - 1)
    }

    results[n_components] = metrics

for n_components, metrics in results.items():
    print(f"\n--- ICA with {n_components} Components ---")
    print(f"Training Data:")
    print(f"MSE: {metrics['MSE_train']}")
    print(f"RMSE: {metrics['RMSE_train']}")
    print(f"MAE: {metrics['MAE_train']}")
    print(f"R2 Score: {metrics['R2_train']}")
    print(f"Adjusted R2 Score: {metrics['Adjusted_R2_train']}")

    print(f"\nTesting Data:")
    print(f"MSE: {metrics['MSE_test']}")
    print(f"RMSE: {metrics['RMSE_test']}")
    print(f"MAE: {metrics['MAE_test']}")
    print(f"R2 Score: {metrics['R2_test']}")
    print(f"Adjusted R2 Score: {metrics['Adjusted_R2_test']}")


--- ICA with 4 Components ---
Training Data:
MSE: 24701058.83595767
RMSE: 4970.015979446914
MAE: 4010.9839044323808
R2 Score: 0.00481533301878545
Adjusted R2 Score: 0.0008145906389613877

Testing Data:
MSE: 24167219.465412047
RMSE: 4916.016625827464
MAE: 3818.893490105248
R2 Score: 0.004600837563256355
Adjusted R2 Score: -0.011650577333670142

--- ICA with 5 Components ---
Training Data:
MSE: 24683781.19389808
RMSE: 4968.277487610579
MAE: 4008.4359205885153
R2 Score: 0.005511434533035486
Adjusted R2 Score: 0.0005089769602639738

Testing Data:
MSE: 24261490.949717674
RMSE: 4925.595491889044
MAE: 3831.4951996885943
R2 Score: 0.0007179847322226207
Adjusted R2 Score: -0.019759105744576066

--- ICA with 6 Components ---
Training Data:
MSE: 24682728.660109933
RMSE: 4968.171561058448
MAE: 4009.3988550415106
R2 Score: 0.00555384022480776
Adjusted R2 Score: -0.0004548979007221732

Testing Data:
MSE: 24253810.821500693
RMSE: 4924.815815997659
MAE: 3829.8593447145345
R2 Score: 0.0010343137664968


FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.

