In [None]:

import pandas as pd
import numpy as np

class Building:
    def __init__(self, Building_ID, Building_Type): 
        # initializing the attributes for ID and Type
        self.Building_ID = Building_ID
        self.Building_Type = Building_Type

        # storing lists of all the data
        self.timestamps = pd.Series([], dtype="object")
        self.energy = pd.Series([], dtype="float")
        self.temperature = pd.Series([], dtype="float")
        self.humidity = pd.Series([], dtype="float")
        self.occupancy = pd.Series([], dtype="object")

    def recording_data(self, dataframe): # instance method (updates the data lists)
        # (as written in the file:) Timestamp,Building_ID,Energy_Usage (kWh),Temperature (°C),Humidity (%),Building_Type,Occupancy_Level
        self.timestamps = dataframe["Timestamp"]
        self.energy = pd.to_numeric(dataframe["Energy_Usage (kWh)"], errors="coerce")
        self.temperature = pd.to_numeric(dataframe["Temperature (°C)"], errors="coerce")
        self.humidity = pd.to_numeric(dataframe["Humidity (%)"], errors="coerce")
        self.occupancy = dataframe["Occupancy_Level"]

    def __str__(self): # returns a readable string
        return (
            f"Building ID: {self.Building_ID} (type: {self.Building_Type})\n"
            f"Total records: {self.energy.count()}\n"
            f"Average Energy Usage: {self.energy.mean():.2f}\n"
            f"Average Temperature: {self.temperature.mean():.2f}\n"
            f"Average Humidity: {self.humidity.mean():.2f}"
            # we can add more ofc
            # heres more 
            # yay data wow this is incredible !
        )

# Load data
df = pd.read_csv("building_energy_data_extended.csv")

buildings = {}

class BuildingCalc:
    """
    This class has a composition relationship with the Building class. 
    Ex. Week 12 Composition Slides: "has-a" relationship
    Class function is to calculate estimates for energy, temp, humidity based
    on inputs of: building type, occupancy level, and time
    """
    def __init__(self, building):
        #this is a constructor.
        #Storing the building instance in this separate class.
        self.building = building #composition within this class to the previous class
        #Pandas dataframe from the building instance.
        df = pd.DataFrame({
            "Timestamp": self.building.timestamps,
            "Energy": self.building.energy,
            "Temp": self.building.temperature,
            "Humid": self.building.humidity,
            "Occ": self.building.occupancy,
            "Type": self.building.Building_Type
        })
        #Filtering the data to create a new timeofday column (separates timestamps)
        df["TimeOfDay"] = df["Timestamp"].apply(self._categorize_time)
        self.df = df

    def _categorize_time(self, ts):
        #This function works by sorting the timestamps into morning, afternoon, evening, night
        hour = pd.to_datetime(ts).hour

        if 6 <= hour < 12:
            return "morning"
        elif 12 <= hour < 17:
            return "afternoon"
        elif 17 <= hour < 23:
            return "evening"
        else:
            return "night"
    #Determines the avg energy, temp, humidity given the:
    #Building type, occupancy level, and time period
    def estimate(self, building_type, occupancy_level, time_period, show_rows = False):
        #Process details:
        # First creates a condition which considers all the filters
        df = self.df

        condition = (
            (df["Type"] == building_type) &
            (df["Occ"] == occupancy_level) &
            (df["TimeOfDay"] == time_period)
        )
        #Select all rows which fulfill the filter requirements
        # Use filter + lambda to select matching rows
        # Source: Week 14 – Built-in functions (filter, lambda)
        filtered = pd.DataFrame(
            filter(
                lambda row: 
                (row["Type"] == building_type and row["Occ"] == occupancy_level and row["TimeOfDay"] == time_period), df.to_dict("records"))
)


        #State there is no data for this case, if there is no match- using valueerror
        if filtered.empty:
            raise ValueError("No data available for the given parameters.")
        

        #This if statement is for testing to ensure the correct data rows are used for testing
        if show_rows:
            print("Filtered DataFrame used for calculation:")
            print(filtered)
            print()
        #Calc the average of filtered datasets
        return {
            "Estimated Energy (kWh)": filtered["Energy"].mean(),
            "Estimated Temperature (C)": filtered["Temp"].mean(),
            "Estimated Humidity (%)": filtered["Humid"].mean()
        }


# For loop to group dataset by Building_ID
for building_id, group in df.groupby("Building_ID"):
    building_type = group["Building_Type"].iloc[0]

    b = Building(building_id, building_type)
    b.recording_data(group)

    buildings[building_id] = b

print("Buildings loaded:", len(buildings))



# while loop to let user choose what building stats they wanna see
while True:
    user_input = input("Enter Building ID (ie B001) to see stats ('X or x' to exit): ").strip()
    if user_input == 'X' or user_input == 'x': # first check if want exit/cancel
        break
    elif user_input in buildings: # if the ID entered is valid
        print(buildings[user_input])
    else:
        try:
            raise ValueError(f"Invalid Building ID entered: '{user_input}'")
        except ValueError as e:
            print("Error:", e)


#TESTING SECTION:
#Test for the __str__
test_building = buildings["B001"]
print("This test should display building statistics for B001:")
print(test_building)

#Test for building calc class
calc = BuildingCalc(test_building)
print("This test should display first few rows of dataframe with time of day column:")
print(calc.df.head())

#Test estimate avg functionality:
print("This should pring the avg energy, temp, humidity for given param")
result = calc.estimate(building_type="Industrial", occupancy_level="High", time_period="afternoon", show_rows = True)
print(result)



Buildings loaded: 20
Building ID: B002 (type: Residential)
Total records: 360
Average Energy Usage: 285.20
Average Temperature: 12.84
Average Humidity: 59.80
This test should display building statistics for B001:
Building ID: B001 (type: Industrial)
Total records: 360
Average Energy Usage: 265.45
Average Temperature: 12.46
Average Humidity: 60.24
This test should display first few rows of dataframe with time of day column:
             Timestamp  Energy   Temp  Humid     Occ        Type TimeOfDay
0  2025-01-01 00:00:00  121.30  -7.20  79.36     Low  Industrial     night
1  2025-01-01 01:00:00  230.76  12.62  80.37    High  Industrial     night
2  2025-01-01 02:00:00  187.21  -1.33  37.74    High  Industrial     night
3  2025-01-01 03:00:00  262.23   0.24  39.97    High  Industrial     night
4  2025-01-01 04:00:00  472.97   5.44  89.29  Medium  Industrial     night
This should pring the avg energy, temp, humidity for given param
Filtered DataFrame used for calculation:
               Ti