In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm




In [None]:
# Load the dataset
df = pd.read_csv('Energy_ENB2012_data.csv')
df



In [None]:
# Display basic information
print("Dataset Overview:")
print(df.info())
print(df.head())



In [None]:

# Checking for missing values
print("\nMissing Values:")
print(df.isnull().sum())


In [None]:
# Descriptive Statistics
print("\nDescriptive Statistics:")
print(df.describe())



In [None]:
# Central Tendency Measures
print("\nMeasures of Central Tendency:")
for column in df.select_dtypes(include=[np.number]).columns:
    mean_value = df[column].mean()
    median_value = df[column].median()
    mode_value = df[column].mode()[0]
    print(f"{column} - Mean: {mean_value:.2f}, Median: {median_value:.2f}, Mode: {mode_value:.2f}")



In [None]:
# Data Visualization
plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.show()





In [None]:
# Histogram of Energy Consumption
plt.figure(figsize=(10, 5))
sns.histplot(df.iloc[:, -1], kde=True, bins=30)
plt.title('Distribution of Energy Consumption')
plt.xlabel('Energy Consumption')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Identifying Outliers using Boxplots
plt.figure(figsize=(12, 6))
df.boxplot(rot=90)
plt.title('Boxplot for Outlier Detection')
plt.show()



In [None]:
# Regression Analysis

X = df.iloc[:, :-2]  # Independent variables (excluding energy targets)
y = df.iloc[:, -2]   # Dependent variable (energy heating load)
X = sm.add_constant(X)  # Add intercept
model = sm.OLS(y, X).fit()
print(model.summary())



In [None]:
# Feature Importance using Random Forest
from sklearn.ensemble import RandomForestRegressor
X = df.iloc[:, :-2]
y = df.iloc[:, -2]
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)
feature_importance = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
print("\nFeature Importance:")
print(feature_importance)



In [None]:
# Plot Feature Importance
plt.figure(figsize=(10, 6))
feature_importance.plot(kind='bar', color='skyblue')
plt.title('Feature Importance for Energy Efficiency')
plt.xlabel('Features')
plt.ylabel('Importance Score')
plt.show()



In [28]:

# Optimization Proposal
improvement_suggestions = {
    "Compactness": "Increase compactness to reduce heat loss.",
    "Wall Area": "Optimize insulation material to reduce energy leakage.",
    "Roof Area": "Implement energy-reflective roofing.",
    "Glazing Area": "Use triple-glazed windows for better insulation."
}


In [None]:
print("\nDesign Improvement Suggestions:")
for key, value in improvement_suggestions.items():
    print(f"- {key}: {value}")
