# Building Environmental Performance Analysis

This notebook implements the core functionality from the Streamlit dashboard for analyzing building environmental performance. It includes data processing, 3D visualization, and statistical analysis components.

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import folium
from joblib import Parallel, delayed
from IPython.display import display, HTML
from data_fetcher import get_city_bounding_box, fetch_buildings_from_osm

ModuleNotFoundError: No module named 'folium'

## Step 1: Data Processing Functions

In [None]:
def process_data(df):
    """
    Process building data by normalizing metrics and classifying buildings.
    
    Args:
        df (DataFrame): DataFrame containing building data
        
    Returns:
        DataFrame: Processed DataFrame with additional columns
    """
    # Normalize data
    df['energy_norm'] = df['energy_consumption_kwh'] / df['energy_consumption_kwh'].max()
    df['carbon_norm'] = df['carbon_footprint_tco2e'] / df['carbon_footprint_tco2e'].max()
    df['water_norm'] = df['water_usage_m3'] / df['water_usage_m3'].max()

    # Vectorized distance calculation
    df['distance'] = np.sqrt(df['energy_norm']**2 + df['carbon_norm']**2 + df['water_norm']**2)

    # Parallelize binning and class assignment
    def assign_class(distance, thresholds):
        return np.digitize(distance, thresholds[1:], right=True)

    thresholds = np.linspace(df['distance'].min(), df['distance'].max(), 7)
    df['class'] = Parallel(n_jobs=-1)(delayed(assign_class)(d, thresholds) for d in df['distance'])

    # Map class numbers to letters
    class_labels = ['A', 'B', 'C', 'D', 'E', 'F']
    df['class_label'] = df['class'].apply(lambda x: class_labels[x])

    return df

## Step 2: Data Fetching

In [None]:
def fetch_building_data(city="Casablanca", country="Morocco", limit=1000):
    """
    Fetch building data for a specified city and country.
    
    Args:
        city (str): City name
        country (str): Country name
        limit (int): Maximum number of buildings to fetch
        
    Returns:
        DataFrame: Processed building data
    """
    try:
        bounding_box = get_city_bounding_box(city, country)
        if not bounding_box:
            print(f"Could not fetch bounding box for {city}, {country}.")
            return None
        
        df = fetch_buildings_from_osm(city, country, limit=limit)
        if df.empty:
            print(f"No buildings found for {city}, {country}.")
            return None
        
        # Process data
        processed_df = process_data(df)
        print(f"Fetched and classified {len(df)} buildings.")
        return processed_df
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

## Fetch Buildings Data
Run this cell to fetch building data for a city

In [None]:
# Set city and country
city = "Rabat"  # Change as needed
country = "Morocco"

# Fetch and process data
df = fetch_building_data(city, country)

# Display basic information about the dataset
if df is not None:
    print("\nDataset Information:")
    print(f"Number of buildings: {len(df)}")
    print("\nClass distribution:")
    display(df['class_label'].value_counts().sort_index())
    print("\nSample data:")
    display(df.head())

Fetched and classified 1000 buildings.

Dataset Information:
Number of buildings: 1000

Class distribution:


class_label
A    149
B    196
C    190
D    214
E    168
F     83
Name: count, dtype: int64


Sample data:


Unnamed: 0,city,building_name,latitude,longitude,energy_consumption_kwh,carbon_footprint_tco2e,water_usage_m3,energy_norm,carbon_norm,water_norm,distance,class,class_label
0,Rabat,"al khaizouran 2, ilot 2",33.90621,-6.912826,887062,372.0,30290.0,0.887642,0.640275,0.776647,1.342028,4,E
1,Rabat,Centre Visite Technique Dekra,33.987906,-6.872168,967504,492.0,19629.0,0.968136,0.846816,0.503295,1.381192,4,E
2,Rabat,MGPAP,33.966439,-6.865826,671462,310.7,20365.0,0.671901,0.534768,0.522166,1.00503,2,C
3,Rabat,Ambassade du Chili,33.980314,-6.843922,411307,219.6,9198.0,0.411576,0.377969,0.23584,0.606528,0,A
4,Rabat,سفارة اليابان,33.981116,-6.844665,704715,383.3,21180.0,0.705175,0.659725,0.543063,1.107893,3,D


## Alternative: Create Sample Data for Testing
Use this if you don't want to fetch real data

In [None]:
def create_sample_data(n=100):
    """Create sample building data for testing purposes."""
    np.random.seed(42)  # For reproducibility
    
    
    data = {
        'building_name': [f"Building {i}" for i in range(1, n+1)],
        'energy_consumption_kwh': np.random.uniform(50, 500, n),
        'carbon_footprint_tco2e': np.random.uniform(5, 100, n),
        'water_usage_m3': np.random.uniform(100, 2000, n),
        'latitude': np.random.uniform(33.56, 33.60, n),  
        'longitude': np.random.uniform(-7.65, -7.58, n)  
    }
    
    # Create DataFrame and process it
    sample_df = pd.DataFrame(data)
    processed_df = process_data(sample_df)
    
    return processed_df

# Uncomment to use sample data instead of fetched data
# df = create_sample_data(150)
# print("Created sample data with 150 buildings")
# display(df.head())

## Filter data by classes

In [None]:
selected_classes = ['A', 'B', 'C', 'D', 'E', 'F']

if df is not None:
    filtered_df = df[df['class_label'].isin(selected_classes)]
    print(f"Filtered to {len(filtered_df)} buildings in classes {', '.join(selected_classes)}")
else:
    print("No data available. Run the data fetching or sample data creation cell first.")

Filtered to 1000 buildings in classes A, B, C, D, E, F


## Step 3: 3D Visualization

In [None]:
def plot_3d_classification(df):
    """3D classification visualization with spheres representing class boundaries."""
    if df is None or df.empty:
        print("No data available for visualization")
        return None
        
    class_colors = {
        'A': 'green', 'B': 'lightgreen', 'C': 'yellow',
        'D': 'orange', 'E': 'red', 'F': 'darkred'
    }

    fig = px.scatter_3d(
        df, x='energy_consumption_kwh', y='carbon_footprint_tco2e',
        z='water_usage_m3', color='class_label',
        color_discrete_map=class_colors,
        labels={
            'energy_consumption_kwh': 'Energy (kWh)',
            'carbon_footprint_tco2e': 'Carbon (tCO2e)',
            'water_usage_m3': 'Water (m³)',
            'class_label': 'Class'
        },
        title="3D Classification of Buildings",
        hover_data={
            'building_name': True,
            'energy_consumption_kwh': ':.2f',
            'carbon_footprint_tco2e': ':.2f',
            'water_usage_m3': ':.2f',
            'class_label': True
        }
    )

    fig.update_traces(marker=dict(size=6, opacity=0.8))

    P_min = df[['energy_norm', 'carbon_norm', 'water_norm']].min().values
    d_max = df['distance'].max()
    thresholds = np.linspace(0, d_max, 7)  

    for i, threshold in enumerate(thresholds[1:]):
        # Create a sphere mesh
        u, v = np.mgrid[0:2*np.pi:50j, 0:np.pi:50j]
        x = P_min[0] + threshold * np.cos(u) * np.sin(v)
        y = P_min[1] + threshold * np.sin(u) * np.sin(v)
        z = P_min[2] + threshold * np.cos(v)

        fig.add_trace(go.Surface(
            x=x, y=y, z=z,
            opacity=0.2,  
            showscale=False,
            colorscale=[[0, class_colors[chr(65 + i)]], [1, class_colors[chr(65 + i)]]],
            name=f'Class {chr(65 + i)} Boundary'
        ))

    fig.update_layout(
        scene=dict(
            xaxis_title='Energy Consumption (kWh)',
            yaxis_title='Carbon Footprint (tCO2e)',
            zaxis_title='Water Usage (m³)'
        ),
        legend_title_text='Class',
        margin=dict(l=0, r=0, b=0, t=40)
    )

    return fig

In [None]:
fig = plot_3d_classification(filtered_df)
if fig:
    fig.show()

## Step 4: Statistical Analysis

In [None]:
def create_statistics_features(df, selected_classes):
    """Creates statistical visualizations and summaries for the data."""
    if df is None or df.empty:
        print("No data available for statistical analysis")
        return None, None, None, None
    
    # 1. Class Distribution Bar Chart
    class_counts = df['class_label'].value_counts().sort_index()
    class_fig = px.bar(
        x=class_counts.index, 
        y=class_counts.values,
        labels={'x': 'Building Class', 'y': 'Number of Buildings'},
        title="Distribution of Building Classes",
        color=class_counts.index,
        color_discrete_map={
            'A': 'green', 'B': 'lightgreen', 'C': 'yellow',
            'D': 'orange', 'E': 'red', 'F': 'darkred'
        }
    )
    
    # 2. Key Performance Stats
    filtered_df = df[df['class_label'].isin(selected_classes)]
    total_buildings = len(filtered_df)
    avg_energy = filtered_df['energy_consumption_kwh'].mean() if not filtered_df.empty else 0
    avg_carbon = filtered_df['carbon_footprint_tco2e'].mean() if not filtered_df.empty else 0
    avg_water = filtered_df['water_usage_m3'].mean() if not filtered_df.empty else 0
    top_performers = filtered_df[filtered_df['class_label'] == 'A']['building_name'].tolist() if not filtered_df.empty else []
    

    table_cols = [
        'building_name', 'class_label', 'energy_consumption_kwh', 
        'carbon_footprint_tco2e', 'water_usage_m3', 'distance'
    ]
 
    existing_cols = [col for col in table_cols if col in filtered_df.columns]
    sortable_table = filtered_df[existing_cols].sort_values('class_label') if not filtered_df.empty else pd.DataFrame()
    
    # 4. Metric Comparison Chart
    comparison_fig = px.scatter(
        filtered_df, 
        x='energy_consumption_kwh', 
        y='carbon_footprint_tco2e', 
        color='class_label',
        size='water_usage_m3',
        hover_name='building_name',
        color_discrete_map={
            'A': 'green', 'B': 'lightgreen', 'C': 'yellow',
            'D': 'orange', 'E': 'red', 'F': 'darkred'
        },
        labels={
            'energy_consumption_kwh': 'Energy Consumption (kWh)',
            'carbon_footprint_tco2e': 'Carbon Footprint (tCO2e)',
            'water_usage_m3': 'Water Usage (m³)'
        },
        title="Energy vs. Carbon Consumption"
    ) if not filtered_df.empty else None
    
    return class_fig, {
        'total_buildings': total_buildings,
        'avg_energy': avg_energy,
        'avg_carbon': avg_carbon,
        'avg_water': avg_water,
        'top_performers': top_performers[:5] if len(top_performers) > 5 else top_performers
    }, sortable_table, comparison_fig

In [None]:

class_fig, stats, table, comparison_fig = create_statistics_features(df, selected_classes)


if class_fig:
    print("\nClass Distribution:")
    class_fig.show()


Class Distribution:


In [None]:

if stats:
    print("\nKey Performance Metrics:")
    print(f"Total Buildings: {stats['total_buildings']}")
    print(f"Average Energy Consumption: {stats['avg_energy']:.2f} kWh")
    print(f"Average Carbon Footprint: {stats['avg_carbon']:.2f} tCO2e")
    print(f"Average Water Usage: {stats['avg_water']:.2f} m³")
    
    print("\nTop Performing Buildings (Class A):")
    if stats['top_performers']:
        for i, building in enumerate(stats['top_performers']):
            print(f"{i+1}. {building}")
    else:
        print("No Class A buildings in the current selection.")


Key Performance Metrics:
Total Buildings: 1000
Average Energy Consumption: 692959.72 kWh
Average Carbon Footprint: 345.70 tCO2e
Average Water Usage: 20768.17 m³

Top Performing Buildings (Class A):
1. Ambassade du Chili
2. Embassy of Mali in Morocco
3. Ambassade du Canada
4. City Sport
5. Le Bosquet


In [None]:
# Display Energy vs Carbon comparison
if comparison_fig:
    print("\nEnergy vs. Carbon Consumption:")
    comparison_fig.show()


Energy vs. Carbon Consumption:


In [None]:
if table is not None and not table.empty:
    print("\nBuilding Details:")
    display(table.head(10)) 
    print(f"Showing 10 of {len(table)} buildings")


Building Details:


Unnamed: 0,building_name,class_label,energy_consumption_kwh,carbon_footprint_tco2e,water_usage_m3,distance
862,Building-508257006,A,449818,228.0,9480.0,0.644735
796,Building-1105299871,A,455838,240.2,15299.0,0.729971
316,Collège Ibn Khaldoun ثانوية ابن خلدون الإعدادية,A,422264,239.4,8851.0,0.632319
131,Building-1237567172,A,445106,216.7,13255.0,0.673051
314,Mosquée Redouane,A,489010,265.3,11119.0,0.727482
313,Building-1101838203,A,405454,211.7,9752.0,0.599914
719,Building-1105299221,A,491552,244.3,12846.0,0.726108
310,Building-1088639530,A,496991,199.4,14418.0,0.708361
715,Building-1105653432,A,510432,224.1,14490.0,0.740061
794,Building-1105299869,A,401004,220.2,11826.0,0.629763


Showing 10 of 1000 buildings


## Step 6: Interactive Map

In [None]:
def create_buildings_map(df):
    """Creates an interactive Folium map displaying buildings."""
    if df is None or df.empty:
        print("No buildings data available for mapping.")
        return folium.Map(location=[31.7917, -7.0926], zoom_start=5)

    map_center = [df['latitude'].mean(), df['longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12, tiles="OpenStreetMap")

    class_colors = {'A': 'green', 'B': 'lightgreen', 'C': 'yellow',
                    'D': 'orange', 'E': 'red', 'F': 'darkred'}

    for _, row in df.iterrows():
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=f"<b>{row['building_name']}</b><br>Class: {row['class_label']}",
            tooltip=row['building_name'],
            icon=folium.Icon(color=class_colors.get(row['class_label'], 'gray'))
        ).add_to(m)

    return m

In [None]:
m = create_buildings_map(filtered_df)
display(m)


color argument of Icon should be one of: {'lightred', 'lightblue', 'green', 'pink', 'beige', 'black', 'purple', 'darkgreen', 'cadetblue', 'white', 'darkblue', 'lightgray', 'orange', 'blue', 'darkpurple', 'lightgreen', 'darkred', 'gray', 'red'}.

