### Variabilitätsscore berechnen

In [2]:
# Daten einlesen
wetterdaten <- readRDS("datasets/rds/wetterdaten.rds")

In [3]:
# Dataframe für die Standardabweichungen erzeugen
n <- length(wetterdaten)
standardabweichungen_df <- data.frame(
  id = c("00856", "00891", "01346", "02290", "02712", "03631", "03730", "03987", "04271", "04625"),
  name = c("Chieming", "Cuxhaven", "Feldberg/Schwarzwald", "Hohenpeißenberg", "Konstanz", "Norderney", "Oberstdorf", "Potsdam", "Rostock-Warnemünde", "Schwerin"),
  STDA_RSK = numeric(n),
  STDA_SDK = numeric(n),
  STDA_TMK = numeric(n)
)

# Führe die Berechnungen für jede Station oder jeden Datensatz in der Liste durch
for (i in seq_along(wetterdaten)) {
  standardabweichungen_df$STDA_RSK[i] <- sqrt(var(wetterdaten[[i]]$RSK, na.rm = TRUE))
  standardabweichungen_df$STDA_SDK[i] <- sqrt(var(wetterdaten[[i]]$SDK, na.rm = TRUE))
  standardabweichungen_df$STDA_TMK[i] <- sqrt(var(wetterdaten[[i]]$TMK, na.rm = TRUE))
}

# Ausgabe des Dataframes
print(standardabweichungen_df)

      id                 name STDA_RSK STDA_SDK STDA_TMK
1  00856             Chieming 6.250016 4.659858 7.376363
2  00891             Cuxhaven 4.380597 4.442633 6.133311
3  01346 Feldberg/Schwarzwald 8.938837 4.478617 7.095335
4  02290      Hohenpeißenberg 7.079220 4.613091 7.480038
5  02712             Konstanz 5.608673 4.695837 7.372305
6  03631            Norderney 4.052408 4.558483 5.810113
7  03730           Oberstdorf 8.792411 4.049344 7.539743
8  03987              Potsdam 4.122642 4.607992 7.515048
9  04271   Rostock-Warnemünde 3.793048 4.807175 6.533197
10 04625             Schwerin 3.792455 4.448527 6.976120


In [4]:
# Min-Max Normalisierung der Standardabweichungen
#standardabweichungen_df$MM_norm_STDA_RSK <- (standardabweichungen_df$STDA_RSK - min(standardabweichungen_df$STDA_RSK)) / (max(standardabweichungen_df$STDA_RSK) - min(standardabweichungen_df$STDA_RSK))
#standardabweichungen_df$MM_norm_STDA_SDK <- (standardabweichungen_df$STDA_SDK - min(standardabweichungen_df$STDA_SDK)) / (max(standardabweichungen_df$STDA_SDK) - min(standardabweichungen_df$STDA_SDK))
#standardabweichungen_df$MM_norm_STDA_TMK <- (standardabweichungen_df$STDA_TMK - min(standardabweichungen_df$STDA_TMK)) / (max(standardabweichungen_df$STDA_TMK) - min(standardabweichungen_df$STDA_TMK))

# Z-Score Normalisierung der Standardabweichungen
#standardabweichungen_df$Z_norm_STDA_RSK <- scale(standardabweichungen_df$STDA_RSK)
#standardabweichungen_df$Z_norm_STDA_SDK <- scale(standardabweichungen_df$STDA_SDK)
#standardabweichungen_df$Z_norm_STDA_TMK <- scale(standardabweichungen_df$STDA_TMK)

# Robuste Normalisierung der Standardabweichungen
robust_scale <- function(x) {
  iqr <- IQR(x, na.rm = TRUE)
  median_x <- median(x, na.rm = TRUE)
  (x - median_x) / iqr
}
standardabweichungen_df$R_norm_STDA_RSK <- robust_scale(standardabweichungen_df$STDA_RSK)
standardabweichungen_df$R_norm_STDA_SDK <- robust_scale(standardabweichungen_df$STDA_SDK)
standardabweichungen_df$R_norm_STDA_TMK <- robust_scale(standardabweichungen_df$STDA_TMK)

print(standardabweichungen_df)

      id                 name STDA_RSK STDA_SDK STDA_TMK R_norm_STDA_RSK
1  00856             Chieming 6.250016 4.659858 7.376363       0.4480379
2  00891             Cuxhaven 4.380597 4.442633 6.133311      -0.2191465
3  01346 Feldberg/Schwarzwald 8.938837 4.478617 7.095335       1.4076622
4  02290      Hohenpeißenberg 7.079220 4.613091 7.480038       0.7439758
5  02712             Konstanz 5.608673 4.695837 7.372305       0.2191465
6  03631            Norderney 4.052408 4.558483 5.810113      -0.3362752
7  03730           Oberstdorf 8.792411 4.049344 7.539743       1.3554034
8  03987              Potsdam 4.122642 4.607992 7.515048      -0.3112092
9  04271   Rostock-Warnemünde 3.793048 4.807175 6.533197      -0.4288393
10 04625             Schwerin 3.792455 4.448527 6.976120      -0.4290508
   R_norm_STDA_SDK R_norm_STDA_TMK
1        0.3988226       0.1759378
2       -0.7318661      -1.3583310
3       -0.5445629      -0.1709282
4        0.1553962       0.3039012
5        0.5861016    

In [15]:
x <- standardabweichungen_df$STDA_RSK
x
iqr <- IQR(x, na.rm = TRUE)
med <- median(x)
zeahler <- x - med
skalliert <- zeahler / iqr
print(paste(iqr, med, zeahler, skalliert, sep = "   "))

 [1] "2.80195218781474   4.99463515378928   1.25538075760363   0.44803789410222"    
 [2] "2.80195218781474   4.99463515378928   -0.614038019139397   -0.219146501432021"
 [3] "2.80195218781474   4.99463515378928   3.94420211637051   1.40766217693622"    
 [4] "2.80195218781474   4.99463515378928   2.08458464886964   0.743975810128087"   
 [5] "2.80195218781474   4.99463515378928   0.614038019139397   0.219146501432021"  
 [6] "2.80195218781474   4.99463515378928   -0.942226896213402   -0.336275151414432"
 [7] "2.80195218781474   4.99463515378928   3.79777559379945   1.35540342562424"    
 [8] "2.80195218781474   4.99463515378928   -0.871993358406229   -0.311209221270225"
 [9] "2.80195218781474   4.99463515378928   -1.20158708871234   -0.428839254980102" 
[10] "2.80195218781474   4.99463515378928   -1.20217995570103   -0.42905084566722"  


In [5]:
# Gewichtungen festlegen
gewicht_rsk <- 0.2
gewicht_sdk <- 0.2
gewicht_tmk <- 0.6

# Gewichtete Summe der Absolutwerte
standardabweichungen_df$Variabilitaetsscore <- (abs(standardabweichungen_df$R_norm_STDA_RSK) * gewicht_rsk) +
                                               (abs(standardabweichungen_df$R_norm_STDA_SDK) * gewicht_sdk) +
                                               (abs(standardabweichungen_df$R_norm_STDA_TMK) * gewicht_tmk)

print(standardabweichungen_df[order(standardabweichungen_df$Variabilitaetsscore), ])

      id                 name STDA_RSK STDA_SDK STDA_TMK R_norm_STDA_RSK
5  02712             Konstanz 5.608673 4.695837 7.372305       0.2191465
1  00856             Chieming 6.250016 4.659858 7.376363       0.4480379
8  03987              Potsdam 4.122642 4.607992 7.515048      -0.3112092
4  02290      Hohenpeißenberg 7.079220 4.613091 7.480038       0.7439758
10 04625             Schwerin 3.792455 4.448527 6.976120      -0.4290508
3  01346 Feldberg/Schwarzwald 8.938837 4.478617 7.095335       1.4076622
9  04271   Rostock-Warnemünde 3.793048 4.807175 6.533197      -0.4288393
2  00891             Cuxhaven 4.380597 4.442633 6.133311      -0.2191465
7  03730           Oberstdorf 8.792411 4.049344 7.539743       1.3554034
6  03631            Norderney 4.052408 4.558483 5.810113      -0.3362752
   R_norm_STDA_SDK R_norm_STDA_TMK Variabilitaetsscore
5        0.5861016       0.1709282           0.2636065
1        0.3988226       0.1759378           0.2749348
8        0.1288514       0.34711

In [23]:
rank_df <- data.frame(Name = standardabweichungen_df$name, Variabilitaetsscore = standardabweichungen_df$Variabilitaetsscore)

# Sortiere den DataFrame für eine bessere Visualisierung
rank_df <- rank_df[order(rank_df$Variabilitaetsscore, decreasing = TRUE), ]

# Erstelle ein Balkendiagramm
library(ggplot2)

png_name <- paste("plots/rank/", "rank.png", sep = "")
png(png_name, width=800, height=400)

ggplot(rank_df, aes(x = reorder(Name, Variabilitaetsscore), y = Variabilitaetsscore, fill = Name)) +
  geom_bar(stat = "identity") +
  theme_minimal(base_size = 16) +
  labs(x = "", y = "Variabilitätsscore") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

dev.off()