In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

file = "Buildings_LST_mean.csv"

df = pd.read_csv(file, sep=";")
df["geometry_area_m2"] = df["geometry_area_m2"].str.replace(",", ".").astype(float)
df["CURRENT_ENERGY_EFFICIENCY"] = df["CURRENT_ENERGY_EFFICIENCY"].astype(int)
df["LST_Celsius_13"] = df["LST_Celsius_13"].str.replace(",", ".").astype(float)
df["LST_Celsius_12"] = df["LST_Celsius_12"].str.replace(",", ".").astype(float)
df["LST_Celsius_mean"] = df["LST_Celsius_mean"].str.replace(",", ".").astype(float)

# plot diagramm for building areas
bins = [30, 50, 60, 70, 80, 90, 100, 110, 120, 130, 5000]
labels = ['≤ 50', '> 50 - 60', '> 60 - 70', '> 70 - 80', '> 80 - 90', '> 90 - 100', '> 100 - 110', '> 110 - 120', '> 120 - 130', '> 130']
df["Gebaeudeflaechen"] = pd.cut(df["geometry_area_m2"], bins=bins, labels=labels, right=True)
mean_value = df["geometry_area_m2"].mean()
median_value = df["geometry_area_m2"].median()
min_value = df["geometry_area_m2"].min()
max_value = df["geometry_area_m2"].max()

ax = df["Gebaeudeflaechen"].value_counts().sort_index().plot(kind="bar", color="#008B8B", edgecolor="black")
plt.xlabel("Gebäudefläche in m²", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')

x_pos = len(labels) -4  
y_pos = max(df["Gebaeudeflaechen"].value_counts())  
text = (
    f"Minimum: {min_value:.1f} m²\n"
    f"Maximum: {max_value:.1f} m²\n"
    f"Mittelwert: {mean_value:.1f} m²\n"
    f"Median: {median_value:.1f} m²"
)

plt.text(x_pos, y_pos, text, fontsize=10, color="black",
         verticalalignment="top", horizontalalignment="left",
         bbox=dict(facecolor="white", alpha=0.8, edgecolor="black"))
plt.xticks(rotation=45)
plt.savefig("Statistik_Gebäudefläche.png", bbox_inches='tight')
plt.show()

# plot diagramm for energy efficiency of the buildings (A to G)
order = ["A", "B", "C", "D", "E", "F", "G"]
category_counts = df["CURRENT_ENERGY_RATING"].value_counts().reindex(order)
category_counts.plot(kind="bar", color=["#4575b4", "#91bfdb", "#e0f3f8", "#ffffbf", "#fee090", "#fc8d59", "#d73027"], edgecolor="black")

plt.xlabel("Energieeffizienzklasse der Gebäude", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.xticks(rotation=0)
plt.savefig("Statistik_Energieeffizienzklasse.png", bbox_inches='tight')
plt.show()

# plot diagramm for energy efficiency of the building roofs (Very Poor to Very Good)
order = ["Very Good", "Good", "Average", "Poor", "Very Poor"]
category_counts = df["ROOF_ENERGY_EFF"].value_counts().reindex(order)
category_counts.plot(kind="bar", color=["#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c"], edgecolor="black")

plt.xlabel("Energieeffizienz der Gebäudedächer", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.xticks(rotation=0)
plt.savefig("Statistik_Dach_Energieeffizienz.png", bbox_inches='tight')
plt.show()

# plot histogram for energy efficiency of the buildings (1 to 100)
mean_value = df["CURRENT_ENERGY_EFFICIENCY"].mean()
median_value = df["CURRENT_ENERGY_EFFICIENCY"].median()

sns.histplot(df["CURRENT_ENERGY_EFFICIENCY"], bins=100, kde=False, color="#008B8B", edgecolor="black")
plt.axvline(mean_value, color='#7570b3', linestyle='dashed', linewidth=2, label=f'Mittelwert: {mean_value:.1f}')
plt.axvline(median_value, color='#d95f02', linestyle='dashed', linewidth=2, label=f'Median: {median_value:.1f}')
plt.legend()
plt.xticks(range(0, 110, 10)) 
plt.xlabel("Energieeffizienz der Gebäude", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.savefig("Statistik_Energieeffizienz_1_bis_100.png", bbox_inches='tight')
plt.show()

# plot histogram for the LST from 12.11.2022
mean_value = df["LST_Celsius_13"].mean()
median_value = df["LST_Celsius_13"].median()

print(f"Mittelwert: {mean_value}")
print(f"Median: {median_value}")
print(f"Minimum: {df["LST_Celsius_13"].min()}")
print(f"Maximum: {df["LST_Celsius_13"].max()}")
print(f"Spannweite: {df["LST_Celsius_13"].max()-df["LST_Celsius_13"].min()}")
print(f"Standardabweichung: {df["LST_Celsius_13"].std()}")
print(f"Q1: {df["LST_Celsius_13"].quantile(0.25)}")
print(f"Q3: {df["LST_Celsius_13"].quantile(0.75)}")
print(f"Skewness: {df["LST_Celsius_13"].skew()}")
print(f"Kurtosis: {df["LST_Celsius_13"].kurt() + 3}")

sns.histplot(df["LST_Celsius_13"], bins=100, kde=True, color="#008B8B", edgecolor="black")
plt.axvline(mean_value, color='#7570b3', linestyle='dashed', linewidth=2, label=f'Mittelwert: {mean_value:.2f} °C')
plt.axvline(median_value, color='#d95f02', linestyle='dashed', linewidth=2, label=f'Median: {median_value:.2f} °C')
plt.legend()
plt.xticks(np.arange(10.0, 13.5, 0.5))
plt.xlabel("Land Surface Temperature (LST) in °C", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.savefig("Statistik_LST_Celsius_13.png", bbox_inches='tight')
plt.show()

# plot histogram for the LST from 13.11.2022
mean_value = df["LST_Celsius_12"].mean()
median_value = df["LST_Celsius_12"].median()

print(f"Mittelwert: {mean_value}")
print(f"Median: {median_value}")
print(f"Minimum: {df["LST_Celsius_12"].min()}")
print(f"Maximum: {df["LST_Celsius_12"].max()}")
print(f"Spannweite: {df["LST_Celsius_12"].max()-df["LST_Celsius_12"].min()}")
print(f"Standardabweichung: {df["LST_Celsius_12"].std()}")
print(f"Q1: {df["LST_Celsius_12"].quantile(0.25)}")
print(f"Q3: {df["LST_Celsius_12"].quantile(0.75)}")
print(f"Skewness: {df["LST_Celsius_12"].skew()}")
print(f"Kurtosis: {df["LST_Celsius_12"].kurt() + 3}")

sns.histplot(df["LST_Celsius_12"], bins=100, kde=True, color="#008B8B", edgecolor="black")
plt.axvline(mean_value, color='#7570b3', linestyle='dashed', linewidth=2, label=f'Mittelwert: {mean_value:.2f} °C')
plt.axvline(median_value, color='#d95f02', linestyle='dashed', linewidth=2, label=f'Median: {median_value:.2f} °C')
plt.legend()
plt.xticks(np.arange(10.0, 14.5, 0.5))
plt.xlabel("Land Surface Temperature (LST) in °C", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.savefig("Statistik_LST_Celsius_12.png", bbox_inches='tight')
plt.show()

# plot histogram for the mean values of the LST
mean_value = df["LST_Celsius_mean"].mean()
median_value = df["LST_Celsius_mean"].median()

print(f"Mittelwert: {mean_value}")
print(f"Median: {median_value}")
print(f"Minimum: {df["LST_Celsius_mean"].min()}")
print(f"Maximum: {df["LST_Celsius_mean"].max()}")
print(f"Spannweite: {df["LST_Celsius_mean"].max()-df["LST_Celsius_mean"].min()}")
print(f"Standardabweichung: {df["LST_Celsius_mean"].std()}")
print(f"Q1: {df["LST_Celsius_mean"].quantile(0.25)}")
print(f"Q3: {df["LST_Celsius_mean"].quantile(0.75)}")
print(f"Skewness: {df["LST_Celsius_mean"].skew()}")
print(f"Kurtosis: {df["LST_Celsius_mean"].kurt() + 3}")

sns.histplot(df["LST_Celsius_mean"], bins=100, kde=True, color="#008B8B", edgecolor="black")
plt.axvline(mean_value, color='#7570b3', linestyle='dashed', linewidth=2, label=f'Mittelwert: {mean_value:.2f} °C')
plt.axvline(median_value, color='#d95f02', linestyle='dashed', linewidth=2, label=f'Median: {median_value:.2f} °C')
plt.legend()
plt.xticks(np.arange(10.5, 13.5, 0.5))
plt.xlabel("Land Surface Temperature (LST) in °C", fontweight='bold')
plt.ylabel("Anzahl der Objekte", fontweight='bold')
plt.savefig("Statistik_LST_Mittelwert.png", bbox_inches='tight')
plt.show()
