# Generating Soil Strength Data Tagged with Uncertainty Information

### *This notebook uses real world data on soil strength collected in the Wissahickon park in Philadelphia, PA to generate fake data sets with differing levels of variability, and assigns uncertainty rankings to data based on distance from the mean*

In [24]:
# install neccessary packages and load libraries
install.packages (c("tidyverse", "psych"))
library ("tidyverse")
library ("psych")

# load dataframe and filter out rain-event data
WissDat <- read.csv("Data.csv")
WissDat <- filter (WissDat, WissDat$Date != "5-Aug", WissDat$Date != "6-Aug", WissDat$Date != "8-Aug")

"packages 'tidyverse', 'psych' are in use and will not be installed"


In [25]:
# create new binning variable for averaging based on distance 
# 0-10, 15-35, 40-60, 65-85, 90-110, 115-135, 140-160, 165-185
WissDat$Bin <- ifelse (WissDat$Distance <= 10, 1, 
                       ifelse(WissDat$Distance <= 35, 2,
                            ifelse(WissDat$Distance <= 60, 3,
                                  ifelse(WissDat$Distance <= 85, 4,
                                        ifelse(WissDat$Distance <= 110, 5, 
                                              ifelse(WissDat$Distance <= 135, 6, 
                                                    ifelse(WissDat$Distance <= 160, 7,8)))))))

#compute descriptives for soil strength in each bin
describeBy(WissDat$Strength, WissDat$Bin)


 Descriptive statistics by group 
group: 1
   vars  n mean   sd median trimmed  mad  min  max range skew kurtosis   se
X1    1 17 4.64 1.39   4.53    4.64 1.07 1.85 7.42  5.57 0.04    -0.26 0.34
------------------------------------------------------------ 
group: 2
   vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis   se
X1    1 42 4.45 1.18   4.58    4.54 1.15 0.78 6.48   5.7 -0.81     0.53 0.18
------------------------------------------------------------ 
group: 3
   vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis   se
X1    1 18 4.43 1.45   4.39    4.47 1.61 1.36 6.84  5.48 -0.23    -0.89 0.34
------------------------------------------------------------ 
group: 4
   vars  n mean   sd median trimmed  mad  min  max range skew kurtosis   se
X1    1 36 4.91 0.94    4.8    4.86 1.08 3.64 6.72  3.08 0.32    -1.08 0.16
------------------------------------------------------------ 
group: 5
   vars  n mean   sd median trimmed  mad  min  max range  sk

In [26]:
#MOVE FUNCTIONS HERE
Strength <- function(StrengthVar, Min, Max, Mean, SD){
    
    while(StrengthVar < Min | StrengthVar > Max){
        StrengthVar <- rnorm(1, mean = Mean,sd = SD) 
        }
        
    return(StrengthVar)
}



In [27]:
Uncertainty <- function(UncertaintyVar, StrengthVar, Mean, SD){  
    UncertaintyVar <- ifelse(StrengthVar <(Mean - (SD*2)) |
                             StrengthVar > (Mean + (SD*2)), 5,
                      ifelse(StrengthVar < (Mean - SD - (SD/2)) |
                             StrengthVar > (Mean + SD + (SD/2)), 4,
                      ifelse(StrengthVar < (Mean - SD) | 
                             StrengthVar > (Mean + SD), 3,
                      ifelse(StrengthVar > (Mean - (SD/2)) | 
                             StrengthVar < (Mean + (SD/2)), 2, 1))))
    
    return(UncertaintyVar)
    }

In [28]:
#REPLACE CODE BELOW WITH FUNCTION CALLS
TotalM <- 24
WissDat.New <- data.frame(Location = integer(TotalM), 
                          Measurement = integer (TotalM),
                          Strength = integer (TotalM),
                          Uncertainty = integer (TotalM),
                          stringsAsFactors = FALSE)
for (i in 1:TotalM) {
    if (i <= 3){ 
        WissDat.New$Location[i] <- 1 
        WissDat.New$Measurement[i] <- i
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 1.85, 7.42, 4.53, 1.39)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 4.53, 1.39)
    } else if (i <= 6){
        WissDat.New$Location[i] <- 2
        WissDat.New$Measurement[i] <- i - 3
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 0.78, 6.48, 4.58, 1.18)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 4.58, 1.18)
    } else if (i <= 9){
        WissDat.New$Location[i] <- 3
        WissDat.New$Measurement[i] <- i - 4
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 1.36, 6.84, 4.39, 1.45)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 4.39, 1.45)
    } else if (i <= 12){
        WissDat.New$Location[i] <- 4
        WissDat.New$Measurement[i] <- i - 5
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 3.64, 6.72, 4.8, 0.94)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 4.8, 0.94)
    } else if (i <= 15){
        WissDat.New$Location[i] <- 5
        WissDat.New$Measurement[i] <- i - 6    
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 4.27, 6.71, 5.62, 0.72)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 5.62, 0.72)
    } else if (i <= 18){
        WissDat.New$Location[i] <- 6
        WissDat.New$Measurement[i] <- i - 7
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 4.13, 7.32, 6.02, 0.97)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 6.02, 0.97)
    } else if (i <= 21){
        WissDat.New$Location[i] <- 7
        WissDat.New$Measurement[i] <- i - 8
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 3.68, 7.46, 5.61, 0.95)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 5.61, 0.95)
     } else { (i <= 22 - 24) 
        WissDat.New$Location[i] <- 8
        WissDat.New$Measurement[i] <- i - 9    
        WissDat.New$Strength[i] <- Strength(WissDat.New$Strength[i], 3.36, 8.16, 6.02, 1)
        WissDat.New$Uncertainty[i] <- Uncertainty( WissDat.New$Uncertainty[i], WissDat.New$Strength[i], 6.02, 1)  
            }  
}


In [29]:
WissDat.New

Location,Measurement,Strength,Uncertainty
<dbl>,<dbl>,<dbl>,<dbl>
1,1,5.034413,2
1,2,3.754399,2
1,3,5.683977,2
2,1,4.464777,2
2,2,5.0733,2
2,3,4.532911,2
3,3,4.435538,2
3,4,5.051498,2
3,5,3.331894,2
4,5,5.011808,2


In [30]:
TotalM <- 24
WissDat.New2 <- data.frame(Location = integer(TotalM), 
                          Measurement = integer (TotalM),
                          Strength = integer (TotalM),
                          Uncertainty = integer (TotalM),
                          stringsAsFactors = FALSE)


for (i in 1:TotalM) {
    
    if (i <= 3){ 
        WissDat.New2$Location[i] <- 1 
        WissDat.New2$Measurement[i] <- i 
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 1.85, 7.42, 4.53, 2.78)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 4.53, 1.39) 
    } else if (i <= 6){
        WissDat.New2$Location[i] <- 2
        WissDat.New2$Measurement[i] <- i - 3
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 0.78, 6.48, 4.58, 2.36)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 4.58, 1.18)
    } else if (i <= 9){
        WissDat.New2$Location[i] <- 3
        WissDat.New2$Measurement[i] <- i - 4
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 1.36, 6.84, 4.39, 2.9)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 4.39, 1.45)
    } else if (i <= 12){
        WissDat.New2$Location[i] <- 4
        WissDat.New2$Measurement[i] <- i - 5
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 3.64, 6.72, 4.8, 1.88)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 4.8, 0.94)
    } else if (i <= 15){
        WissDat.New2$Location[i] <- 5
        WissDat.New2$Measurement[i] <- i - 6    
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 4.27, 6.71, 5.62, 1.44)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 5.62, 0.72)
    } else if (i <= 18){
        WissDat.New2$Location[i] <- 6
        WissDat.New2$Measurement[i] <- i - 7
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 4.13, 7.32, 6.02, 1.94)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 6.02, 0.97)
    } else if (i <= 21){
        WissDat.New2$Location[i] <- 7
        WissDat.New2$Measurement[i] <- i - 8
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 3.68, 7.46, 5.61, 1.9)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 5.61, 0.95)
     } else { (i <= 22 - 24) 
        WissDat.New2$Location[i] <- 8
        WissDat.New2$Measurement[i] <- i - 9    
        WissDat.New2$Strength[i] <- Strength(WissDat.New2$Strength[i], 3.36, 8.16, 6.02, 2)
        WissDat.New2$Uncertainty[i] <- Uncertainty( WissDat.New2$Uncertainty[i], WissDat.New2$Strength[i], 6.02, 1)  
            }  
}
  

In [31]:
WissDat.New2

Location,Measurement,Strength,Uncertainty
<dbl>,<dbl>,<dbl>,<dbl>
1,1,6.974437,4
1,2,4.509753,2
1,3,7.047681,4
2,1,4.064065,2
2,2,5.892468,3
2,3,1.747949,5
3,3,4.67594,2
3,4,4.619822,2
3,5,3.539676,2
4,5,4.755615,2


In [32]:
TotalM <- 24
WissDat.New3 <- data.frame(Location = integer(TotalM), 
                          Measurement = integer (TotalM),
                          Strength = integer (TotalM),
                          Uncertainty = integer (TotalM),
                          stringsAsFactors = FALSE)


for (i in 1:TotalM) {
    
    if (i <= 3){ 
        WissDat.New3$Location[i] <- 1 
        WissDat.New3$Measurement[i] <- i 
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 1.85, 7.42, 4.53, 4.17)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 4.53, 1.39) 
    } else if (i <= 6){
        WissDat.New3$Location[i] <- 2
        WissDat.New3$Measurement[i] <- i - 3
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 0.78, 6.48, 4.58, 3.54)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 4.58, 1.18)
    } else if (i <= 9){
        WissDat.New3$Location[i] <- 3
        WissDat.New3$Measurement[i] <- i - 4
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 1.36, 6.84, 4.39, 4.35)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 4.39, 1.45)
    } else if (i <= 12){
        WissDat.New3$Location[i] <- 4
        WissDat.New3$Measurement[i] <- i - 5
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 3.64, 6.72, 4.8, 2.82)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 4.8, 0.94)
    } else if (i <= 15){
        WissDat.New3$Location[i] <- 5
        WissDat.New3$Measurement[i] <- i - 6    
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 4.27, 6.71, 5.62, 2.16)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 5.62, 0.72)
    } else if (i <= 18){
        WissDat.New3$Location[i] <- 6
        WissDat.New3$Measurement[i] <- i - 7
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 4.13, 7.32, 6.02, 2.91)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 6.02, 0.97)
    } else if (i <= 21){
        WissDat.New3$Location[i] <- 7
        WissDat.New3$Measurement[i] <- i - 8
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 3.68, 7.46, 5.61, 2.85)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 5.61, 0.95)
     } else { (i <= 22 - 24) 
        WissDat.New3$Location[i] <- 8
        WissDat.New3$Measurement[i] <- i - 9    
        WissDat.New3$Strength[i] <- Strength(WissDat.New3$Strength[i], 3.36, 8.16, 6.02, 3)
        WissDat.New3$Uncertainty[i] <- Uncertainty( WissDat.New3$Uncertainty[i], WissDat.New3$Strength[i], 6.02, 1)  
            }  
}
  

In [33]:
WissDat.New3

Location,Measurement,Strength,Uncertainty
<dbl>,<dbl>,<dbl>,<dbl>
1,1,6.341723,3
1,2,3.380403,2
1,3,2.306603,4
2,1,3.459175,2
2,2,1.890433,5
2,3,1.933107,5
3,3,6.01862,3
3,4,6.361537,3
3,5,3.621534,2
4,5,4.715166,2


In [34]:
install.packages ("data.table")

"package 'data.table' is in use and will not be installed"


In [35]:
library ("data.table")

In [21]:
fwrite(WissDat.New, file="1GeneratedData.SignalPresent.OriginalDistribution.csv")

In [22]:
fwrite(WissDat.New2, file="2GeneratedData.SignalPresent.2xSDdistribution.csv")

In [23]:
fwrite(WissDat.New3, file="3GeneratedData.SignalPresent.3xSDdistribution.csv")

In [38]:
One <- read.csv("1GeneratedData.SignalPresent.OriginalDistribution.csv")
Two <- read.csv("2GeneratedData.SignalPresent.2xSDdistribution.csv")
Three <- read.csv("3GeneratedData.SignalPresent.3xSDdistribution.csv")

In [39]:
One
Two
Three

Location,Measurement,Strength,Uncertainty
<int>,<int>,<dbl>,<int>
1,1,6.322959,3
1,2,5.89675,2
1,3,5.521799,2
2,1,4.027208,2
2,2,3.749779,2
2,3,2.156858,5
3,3,3.056224,2
3,4,2.806891,3
3,5,3.901582,2
4,5,5.248223,2


Location,Measurement,Strength,Uncertainty
<int>,<int>,<dbl>,<int>
1,1,5.876353,2
1,2,4.014474,2
1,3,2.735336,3
2,1,3.346279,3
2,2,1.858478,5
2,3,2.464523,4
3,3,5.30827,2
3,4,2.027191,4
3,5,3.461553,2
4,5,5.004215,2


Location,Measurement,Strength,Uncertainty
<int>,<int>,<dbl>,<int>
1,1,3.5062895,2
1,2,7.3464992,5
1,3,4.4615845,2
2,1,0.8025868,5
2,2,4.2496358,2
2,3,5.9582991,3
3,3,2.9851531,2
3,4,5.2392744,2
3,5,4.9131988,2
4,5,4.2450996,2


In [40]:
colnames(One)

In [41]:
names(One)[names(One) == "Strength"] <- "StrengthOne" 
names(One)[names(One) == "Uncertainty"] <- "UncertaintyOne"

In [42]:
One

Location,Measurement,StrengthOne,UncertaintyOne
<int>,<int>,<dbl>,<int>
1,1,6.322959,3
1,2,5.89675,2
1,3,5.521799,2
2,1,4.027208,2
2,2,3.749779,2
2,3,2.156858,5
3,3,3.056224,2
3,4,2.806891,3
3,5,3.901582,2
4,5,5.248223,2


In [43]:
names(Two)[names(Two) == "Strength"] <- "StrengthTwo" 
names(Two)[names(Two) == "Uncertainty"] <- "UncertaintyTwo"
Two

Location,Measurement,StrengthTwo,UncertaintyTwo
<int>,<int>,<dbl>,<int>
1,1,5.876353,2
1,2,4.014474,2
1,3,2.735336,3
2,1,3.346279,3
2,2,1.858478,5
2,3,2.464523,4
3,3,5.30827,2
3,4,2.027191,4
3,5,3.461553,2
4,5,5.004215,2


In [45]:
names(Three)[names(Three) == "Strength"] <- "StrengthThree" 
names(Three)[names(Three) == "Uncertainty"] <- "UncertaintyThree"
Three

Location,Measurement,StrengthThree,UncertaintyThree
<int>,<int>,<dbl>,<int>
1,1,3.5062895,2
1,2,7.3464992,5
1,3,4.4615845,2
2,1,0.8025868,5
2,2,4.2496358,2
2,3,5.9582991,3
3,3,2.9851531,2
3,4,5.2392744,2
3,5,4.9131988,2
4,5,4.2450996,2


In [46]:
GSU <- full_join(One,Two,Three, by=c("Location","Measurement"))

In [47]:
GSU

Location,Measurement,StrengthOne,UncertaintyOne,StrengthTwo,UncertaintyTwo
<int>,<int>,<dbl>,<int>,<dbl>,<int>
1,1,6.322959,3,5.876353,2
1,2,5.89675,2,4.014474,2
1,3,5.521799,2,2.735336,3
2,1,4.027208,2,3.346279,3
2,2,3.749779,2,1.858478,5
2,3,2.156858,5,2.464523,4
3,3,3.056224,2,5.30827,2
3,4,2.806891,3,2.027191,4
3,5,3.901582,2,3.461553,2
4,5,5.248223,2,5.004215,2


In [48]:
GSU1 <- full_join(GSU,Three, by=c("Location","Measurement"))
GSU1

Location,Measurement,StrengthOne,UncertaintyOne,StrengthTwo,UncertaintyTwo,StrengthThree,UncertaintyThree
<int>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>
1,1,6.322959,3,5.876353,2,3.5062895,2
1,2,5.89675,2,4.014474,2,7.3464992,5
1,3,5.521799,2,2.735336,3,4.4615845,2
2,1,4.027208,2,3.346279,3,0.8025868,5
2,2,3.749779,2,1.858478,5,4.2496358,2
2,3,2.156858,5,2.464523,4,5.9582991,3
3,3,3.056224,2,5.30827,2,2.9851531,2
3,4,2.806891,3,2.027191,4,5.2392744,2
3,5,3.901582,2,3.461553,2,4.9131988,2
4,5,5.248223,2,5.004215,2,4.2450996,2


In [None]:
#split dataframe into two, One with location, measurement, strenghtOne,Two, and Three, and the other with UncertaintyOne,Two,and Three 
#2 dataframes, 24 rows + 5 coloumns. Use gather function to gather the three variables that share the name, create new index variable "varType" or "VarLevel" -> 4 coloumns (location measurements type uncertainty) 72 rows 
#Repeat for strength
#PLOTTING, y axis= strength or uncertianty x axis= Location..... Can be either discreate or contiuous ... NEW Index/VarLevel - type color shape 
#Scatterplot 