In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import pickle


In [32]:
# Load the dataset
df = pd.read_csv("green_building_dataset_100000.csv")

# Display first few rows
df.head()


Unnamed: 0,Latitude,Longitude,Climate,Elevation,Hazard_Risk,Avg_Temperature,Annual_Rainfall,Urban_Density,Green_Building_Suggestion,Reason
0,-58.832937,90.34091,Dry,1441.403107,1,31.443681,2580.54147,1,Energy-Efficient Windows & Insulation,Reducing heat gain in buildings lowers energy ...
1,-67.805415,174.629074,Dry,1053.412455,0,10.517754,2266.170989,0,Green Roofs & Walls,Urban areas with high temperatures benefit fro...
2,62.471418,93.817176,Dry,1663.527714,1,38.698052,1010.305226,0,"Sustainable Materials (Bamboo, Recycled)",Locally available materials reduce carbon foot...
3,12.603448,-115.007319,Temperate,797.658709,1,37.180855,2121.768402,1,Greywater Recycling System,Water scarcity regions benefit from recycling ...
4,-77.085086,153.34288,Tropical,1070.940158,0,38.250691,2746.50236,0,Green Roofs & Walls,Urban areas with high temperatures benefit fro...


In [33]:
# Encode Green Building Suggestions
le_suggestion = LabelEncoder()
df["Green_Building_Num"] = le_suggestion.fit_transform(df["Green_Building_Suggestion"])

# Encode Climate Types
climate_map = {"Tropical": 0, "Dry": 1, "Temperate": 2, "Continental": 3}
df["Climate_Num"] = df["Climate"].map(climate_map)


In [34]:
X = df[["Latitude", "Longitude", "Climate_Num", "Elevation", "Hazard_Risk", "Avg_Temperature", "Annual_Rainfall", "Urban_Density"]]
y = df["Green_Building_Num"]

# Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [35]:
# Train Optimized Random Forest Model
rf = RandomForestClassifier(n_estimators=1000, max_depth=30, min_samples_split=2, random_state=42)
rf.fit(X_train, y_train)

# Save the trained model
with open("rf_green_building.pkl", "wb") as model_file:
    pickle.dump(rf, model_file)
print("Model trained and saved successfully!")


Model trained and saved successfully!


In [44]:
# Predict on Test Set
y_pred = rf.predict(X_test)

# Compute Accuracy
accuracy = accuracy_score(y_test, y_pred)
# print(f"Green Building Suggestion Model Accuracy: {accuracy * 100:.2f}%")


In [45]:
import pickle

# Save the Label Encoder after training
with open("label_encoder.pkl", "wb") as le_file:
    pickle.dump(le_suggestion, le_file)

print("Label encoder saved successfully!")


Label encoder saved successfully!


In [53]:
import numpy as np
import pandas as pd
import pickle

# Load the Trained Model
with open("rf_green_building.pkl", "rb") as model_file:
    rf = pickle.load(model_file)

# Load Label Encoder
with open("label_encoder.pkl", "rb") as le_file:
    le_suggestion = pickle.load(le_file)

# Load the Updated Dataset
df = pd.read_csv("green_building_dataset_100000.csv")

# Create a Dictionary for Reasons
reasons_dict = dict(zip(df["Green_Building_Suggestion"], df["Reason"]))

# Function to Predict Multiple Green Building Suggestions with Reasons
def predict_green_building(lat, lon, climate, elevation, hazard, temp, rainfall, urban_density, top_n=3):
    climate_map = {"Tropical": 0, "Dry": 1, "Temperate": 2, "Continental": 3}
    climate_num = climate_map.get(climate, 0)

    # Convert input into a properly formatted DataFrame
    input_data = pd.DataFrame(np.array([[lat, lon, climate_num, elevation, hazard, temp, rainfall, urban_density]]),
                              columns=["Latitude", "Longitude", "Climate_Num", "Elevation", "Hazard_Risk",
                                       "Avg_Temperature", "Annual_Rainfall", "Urban_Density"])

    # Get Probability Scores for Each Suggestion
    probabilities = rf.predict_proba(input_data)[0]  # Avoids feature name warnings

    # Get Top N Predictions
    top_indices = np.argsort(probabilities)[-top_n:][::-1]  # Sort by highest probability
    top_suggestions = le_suggestion.inverse_transform(top_indices)

    # Create Output with Suggestions & Reasons
    recommendations = []
    for suggestion in top_suggestions:
        reason = reasons_dict.get(suggestion, "No specific reason available.")
        recommendations.append(f"{suggestion}: {reason}")

    return f"Top {top_n} Recommended Green Building Practices:\n" + "\n".join(recommendations)

# Example Prediction
print(predict_green_building(20.3850, 78.4867, "Temprate", 500, 1, 32, 1200, 1))


Top 3 Recommended Green Building Practices:
Greywater Recycling System: Water scarcity regions benefit from recycling water for irrigation and flushing.
Energy-Efficient Windows & Insulation: Reducing heat gain in buildings lowers energy costs for cooling.
Sustainable Materials (Bamboo, Recycled): Locally available materials reduce carbon footprint and improve insulation.
