In [22]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import warnings
# warnings.filterwarnings("ignore")

# Load your dataset (replace 'your_dataset.csv' with your actual data file).
data = pd.read_csv('./data/dummy_energy_data.csv')

# Define the features and the target variable
features = [
    'Property Type',
    'Location',
    'Square Footage',
    'Number of Bedrooms',
    'Number of Bathrooms',
    'Roof Type',
    'Year of Construction',
    'Primary Energy Source',
    'Number of Occupants',
    'Heating System',
    'Cooling System',
    'Water Heating System',
    'Thermostat Type',
    'Lighting Type',
    'Roof Insulation',
    'Wall Insulation',
    'Major Appliances',
    'Window Type',
    'Door Type',
    'Solar Panel',
    'Other Renewable Energy Source'
]

target = 'Current Monthly Energy Usage'  # Use this column as the target variable

# Preprocess the data
data = data.dropna()  # Remove rows with missing data (you can choose to handle missing data differently)

# Encode categorical variables
label_encoders = {}
for feature in features:
    if data[feature].dtype == 'object':
        le = LabelEncoder()
        data[feature] = le.fit_transform(data[feature])
        label_encoders[feature] = le

# Split the data into training and testing sets
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a machine learning model (Random Forest in this example)
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Squared Error: {mse:.2f}')
print(f'Mean Absolute Error: {mae:.2f}')

# Provide energy efficiency upgrade recommendations based on the predictions

  # Provide energy efficiency upgrade recommendations based on the predictions
def get_energy_efficiency_upgrade_recommendations(property_data):
    # Encode categorical variables using the label encoders
    for feature, le in label_encoders.items():
        if feature in property_data:
            property_data[feature] = le.transform([property_data[feature]])[0]
    
    # Convert the property_data dictionary into a DataFrame with the same columns as the training data
    property_df = pd.DataFrame([property_data], columns=features)
    
    # Use the trained model to predict current monthly energy usage
    predicted_energy_usage = model.predict(property_df)
    print(predicted_energy_usage)
    # Get feature importances
    
    # feature_importance = model.feature_importances_
    
    # # Sort features by importance
    # important_features = [features[i] for i in range(len(features)) if feature_importance[i] > 0.05]
    
    # # Create a dictionary to store upgrade recommendations for each feature
    # upgrade_recommendations = {}

    # for feature_name in important_features:
    #     original_value = property_data[feature_name]

    #     # Suggest upgrades for relevant features
    #     if original_value == 0:  # Assuming 0 indicates a need for improvement
    #         if feature_name == 'Roof Insulation':
    #             upgrade_recommendations[feature_name] = "Upgrade roof insulation to a more energy-efficient material."
    #         elif feature_name == 'Heating System':
    #             upgrade_recommendations[feature_name] = "Consider upgrading the heating system to a more efficient option."
    #         # Add more specific upgrade suggestions for other features as needed

    # return upgrade_recommendations

# Example property data (customize this with your actual data)
property_data = {
    'Property Type': 'House',
    'Location': 'Urban',
    'Square Footage': 1800,
    'Number of Bedrooms': 4,
    'Number of Bathrooms': 2,
    'Roof Type': 'Asphalt',
    'Year of Construction': 2005,
    'Primary Energy Source': 'Electric',
    'Number of Occupants': 4,
    'Heating System': 'Radiant',
    'Cooling System': 'Central AC',
    'Water Heating System': 'Tankless',
    'Thermostat Type': 'Smart',
    'Lighting Type': 'LED',
    'Roof Insulation': 'Fiberglass',
    'Wall Insulation': 'Foam',
    'Major Appliances': 'Refrigerator, Dishwasher',
    'Window Type': 'Double Pane',
    'Door Type': 'Fiberglass',
    'Solar Panel': 'No',
    'Other Renewable Energy Source': 'No'
}

# Get energy efficiency upgrade recommendations for the example property data
upgrade_recommendations = get_energy_efficiency_upgrade_recommendations(property_data)
# for feature, recommendation in upgrade_recommendations.items():
#     print(f"Upgrade {feature}: {recommendation}")


Mean Squared Error: 6655.06
Mean Absolute Error: 70.02
[151.91345492]
