-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.predictive.models.R
141 lines (98 loc) · 3.77 KB
/
analysis.predictive.models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
library(tidyverse)
library(gsheet)
library(lubridate)
library(data.table)
library(readxl)
url<-"https://docs.google.com/spreadsheets/d/14nn7NWMBatbzcz9nqcTFzQghmzMUE2o0/edit?usp=sharing&ouid=104854259661631892531&rtpof=true&sd=true"
sud <-gsheet2tbl(url)
url2 <- 'https://docs.google.com/spreadsheets/d/1WdrZuZP9J6Im4KQdo6MudPQC4Fwf13rD/edit?usp=sharing&ouid=104854259661631892531&rtpof=true&sd=true'
sond<-gsheet2tbl(url2)
url3 <- "https://docs.google.com/spreadsheets/d/1eRF3RnsWTt6ObsY6hlgi6jOjvliS1Gn5/edit#gid=200414766"
lagoonC<-gsheet2tbl(url3)
lagoonC
######## CLEAN THE DATA #######################
names(sud)
sond<-sond%>%
rename(Date = `Date (MM/DD/YYYY)`)
sud<-sud%>%
rename(Date = `Date Values Reflect`)
sud<-sud %>%
filter(Date != "MISSING DATA" | Date != "NO POWER - AERATOR INSTALL")
library(lubridate)
sud2 <- sud
sud2$Date <- mdy(sud$Date)
lagoonC<-lagoonC%>%
rename(Date = `Date (MM/DD/YYYY)`)
lagoonC<-lagoonC %>%
filter(Date != "MISSING DATA - POWER ISSUE" | Date != "SONDES AT WATSON RESEARCH SITE" | Date != "DAMAGED COND/TEMP SENSOR CAUSED FAULT THAT HAULTED LOGGING")
############FIX THE DATES #####################
sond$Date <- mdy(sond$Date)
sud<-sud2%>%
filter(Date >= "2020-10-19")
lagoonC$Date <- mdy(lagoonC$Date)
lagoonC<-lagoonC%>%
filter(year(as_date(Date)) != 2022)
############ MELT THE DATA ######################
lagoonC2<-melt(data = as.data.table(lagoonC),
id.vars= 1:4,
measure.vars= 5:ncol(lagoonC))
sond2<-melt(data = as.data.table(sond),
id.vars= 1:4,
measure.vars= 5:ncol(sond))
############# ADD THE MONTH AND YEAR COLUMNS ###############
sud<-sud2%>%
mutate(month = month(Date))%>%
mutate(month = month.name[month])%>%
mutate(year = year(Date))
sond2<-sond2%>%
mutate(month = month(Date))%>%
mutate(month = month.name[month])%>%
mutate(year = year(Date))
lagoonC2<-lagoonC2%>%
mutate(month = month(Date))%>%
mutate(month = month.name[month])%>%
mutate(year = year(Date))
########## combine the data sets ##################
all_data<-rbind(lagoonC2, sond2)
###################################################
avg_boxplot <- all_data %>%
filter(year == year)%>%
filter(year == 2021) %>%
filter(variable == 'Turbidity NTU') %>%
mutate(month = factor(month,
levels = c('January', 'February',
'March',
'April',
'May', 'June',
'July', 'August', 'September',
'October', 'November', 'December'))) %>%
na.omit()
#substr(month,1,1),
ggplot(data = avg_boxplot, aes(x = month, y = as.numeric(value)))+
geom_boxplot()+
labs(title = 'Variance of Turbidity (2021)',
subtitle = 'Turbidity (NTU) in Lagoon C and Basin 3',
y = 'Turbidity NTU',
x = 'Months')+
theme(axis.text.x = element_text(angle = 90))+
facet_wrap(~`Site Name`)
# code for for predictive model for turbidity
avg_turb <- avg_boxplot %>%
filter(year == 2021) %>%
group_by(month) %>%
filter(variable =='Turbidity NTU') %>%
filter(`Site Name` == 'Wetland Basin 3') %>%
summarise(avgturb = mean(as.numeric(value)))
# predictive model for turbidity
ggplot(data = avg_boxplot, aes( x= (month), y = as.numeric(value)))+
geom_point()+
theme(axis.text.x = element_text(angle = 90))+
labs(title = 'Predicted Turbidity Using 2021',
subtitle = 'Turbidity (NTU) Predicted per Month',
y = 'Turbidity NTU',
x = 'Months')+
geom_point(data = avg_turb, aes(x = month, y = avgturb),
size = 2, color = 'red')+
geom_line(data = avg_turb, aes(x = month, y = avgturb),
size = .5, color = 'blue', group =1)
#ylim(0,125)