# Functions used in multiple files to be run with source()
Note that these functions currently lack basic error checking and error messages (TODO)

In [None]:
# Functions used in multiple files to be run with source()
## Note that these functions currently lack basic error checking and error messages (TODO)

## Clear session memory

In [None]:
## Clear session memory
clearMemory<-function(){
    rm(list = ls()) # clear R working memory
    graphics.off() # close any open plots
}

## Take a list of packages, install them to the local library, if necessary, and then load them all

In [2]:
## Take a list of packages, install them to the local library, if necessary, and then load them all
packageHandler<-function(packageList=packages){
    # Install packages if necessary
    installed_packages <- packages %in% rownames(installed.packages())
    if (any(installed_packages == FALSE)) {
      install.packages(packages[!installed_packages])
    }

    # Load packages
    invisible(lapply(packages, library, character.only = TRUE))
}

In [3]:
# # # Package names
# packages <- c("dplyr", "doBy", "lubridate", "ggplot2", "Hmisc", "zoo", "viridis", "rmarkdown")
# packageHandler(packages)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘doBy’


The following object is masked from ‘package:dplyr’:

    order_by



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union


Loading required package: lattice

Loading required package: survival

Loading required package: Formula


Attaching package: ‘Hmisc’


The following objects are masked from ‘package:dplyr’:

    src, summarize


The following objects are masked from ‘package:base’:

    format.pval, units



Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


Loading required package: viridisLite



In [4]:
# input<-"~/Documents/github/SensorDataProcessing/rriv_methane/methane_functions.ipynb"
# convert_ipynb(input)

# 1. File management

In [None]:
# 1. File management

## Delete a file if it exists

In [11]:
# check if file at path exists, delete if it does
# will only call in functions that generate new files
cleanFile<-function(path){
  if(file.exists(path)){
    cat(sprintf("Deleting old file: %s\n",path))
    file.remove(path)
  }
}

## Create a new directory if it does not exist

In [12]:
# check if directory exists, create if not
newDir<-function(dirPath){
  if(dir.exists(dirPath)){
    cat(sprintf("Output directory, %s, already exists\n", dirPath))
  }
  else{
    cat(sprintf("Output directory not found, creating: %s\n",dirPath))
    dir.create(dirPath)
  }
}

## Save a list of plots to an output directory

In [13]:
#input list of plots, output directory, custom directory path or tag
#output save each plot to output directory
#800 is a little over 5" on my screen at 1920x1080
savePlotList<-function(plotList, tag="", width=800, height=800, od=outputDir){
    plots<-names(plotList)
    for(i in 1:length(plots)){
        pngPath = paste(sep="", od, tag, plots[i], ".png")
        png(file=pngPath, width=width, height=height)
        print(plotList[[ plots[i] ]])
        dev.off()
    }
}

## Save a list of list of plots to an output directory

In [14]:
# Save a list of list of plots to an output directory
savePlotListList<-function(pll, tag="", width=800, height=800, od=outputDir){
    for(i in 1:length(pll)){
        savePlotList(pll[[i]], od, tag=paste(tag,names(pll[i]),"_",sep=""), width=width, height=height)
    }
}

## Save a dataframe as an rds file to a specified directory
filename is : dataframeName_{user input string}
TODO: what if tag is left as ""?

In [15]:
## Save a dataframe as an rds file to a specified directory
# filename is : dataframeName_{user input string}
# TODO: what if tag is left as ""?
saveDFrds<-function(inputDF, tag="", od=outputDir){
    dfName<-substitute(inputDF)
    outputPath<-paste(od,dfName,"_",tag,".rds",sep="")
    print(outputPath)
    saveRDS(inputDF, outputPath)
}

## Save a dataframe as a csv file to a specified directory

In [16]:
## Save a dataframe as a csv file to a specified directory
saveDFcsv<-function(inputDF, tag="", od=outputDir){
    dfName<-substitute(inputDF)
    outputPath<-paste(od,dfName,"_",tag,".csv",sep="")
    print(outputPath)
    write.csv(inputDF, outputPath)
}

## Custom readCSV function for RRIV data

In [17]:
## Custom readCSV function for RRIV data
# define custom call to read.csv
# RRIV data has a "type" column which contains "debug" lines which can be removed for processing data
# some files have lines with issues that offest the data,
# which can be seen by checking if there is a logger value present, and removing if not
read_rriv_CSV<-function(filePath){
    fileData<-read.csv(filePath,header=TRUE)
    fileData<-subset(fileData, type!="debug" & !is.na(logger))
}

## Concatenate all files in a directory including sub directories into one dataframe and return it using a supplied read function (such as read.csv, read.csv2, or a custom reading function )

In [18]:
## Concatenate all files in a directory including sub directories into one dataframe and return it using a supplied read function (such as read.csv, read.csv2, or a custom reading function )
# input: "directory" containing all files to be concatenated (will check sub directories),
# "readFn" function to use for reading files,
# "filePattern" regex? string to search for specific file names/types/etc,  ex "*.csv$"
# "minFileSize" omit files under specified size in bytes, 0 bytes by default
# output: dataframe of concatenated files
concat_dirs<-function(directory=dataDirectory, readFn, filePattern, minFileSize=0){
    # list all files following specific pattern in folders and subfolders
    Files<-list.files(path=directory, recursive=TRUE, pattern=filePattern, full.names=TRUE)
    
    # subset files larger than minFileSize bytes
    Files<-subset(Files, file.info(Files)[,1]>minFileSize)
    
    #read each file and output a single dataframe
    data<-do.call(rbind, lapply(Files, readFn))
    
    print("Dataframe generated, manually process column types if necessary")
    return(data)
}

# 2. RRIV data specific functions

In [None]:
# 2. RRIV data specific functions

## Custom column data type processing for RRIV csv data

In [21]:
## Custom column data type processing for RRIV csv data
process_rriv_columns<-function(df){
    df$type<-as.factor(df$type)
    df$site<-as.factor(df$site)
    df$logger<-as.factor(df$logger)
    df$deployment<-as.factor(df$deployment)
    df$deployed_at<-as.integer(df$deployed_at)
    df$uuid<-as.factor(df$uuid)
    df$time.s<-as.numeric(df$time.s)
    df$time.h<-lubridate::as_datetime(df$time.h)
    
    cols<-names(df)
    if("measurementCycle" %in% cols & "burstCycle" %in% cols){
        df$measurementCycle<-as.numeric(df$measurementCycle)
        df$burstCycle<-as.factor(df$burstCycle)
    }
    
    ##hardcoded, but could be all columns between time.h and user_note? or measurementCycle when included
    df$battery.V<-as.numeric(df$battery.V)
    df$dht_C<-as.numeric(df$dht_C)
    df$dht_RH<-as.numeric(df$dht_RH)
    
    if("atlas_CO2_ppm" %in% cols){
        df$atlas_CO2_ppm<-as.numeric(df$atlas_CO2_ppm)
    }
    
    df$ch4rf_raw<-as.numeric(df$ch4rf_raw)
    df$ch4rf_cal<-as.numeric(df$ch4rf_cal)
    df$ch4_raw<-as.numeric(df$ch4_raw)
    df$ch4_cal<-as.numeric(df$ch4_cal)
    
    return(df)
}

ERROR: Error in class(ff) <- "formula": attempt to set an attribute on NULL


## Custom RRIV data parsing that created measurementCycle and burstCycle columns, before the columns were included in the RRIV firmware

## Function that parses through the data and assigns a burst number and then counts each measurement in a burst. (Data will eventually have measurementCycle and burstCycle natively)

In [None]:
## Custom RRIV data parsing that created measurementCycle and burstCycle columns, before the columns were included in the RRIV firmware

## Function that parses through the data and assigns a burst number and then counts each measurement in a burst. (Data will eventually have measurementCycle and burstCycle natively)
parse_data<-function(df,burst_interval_threshold,measure_cycle_size){
    #check if measurementCycle and burstCycle already present in data:
    cols<-names(df)
    if("measurementCycle" %in% cols & "burstCycle" %in% cols){
        return(df)
    }
    
    df$interval<-df$time.s-Lag(df$time.s,shift=1)

    measurementCycle<-1
    burstCycle<-1
    reading<-1
    df$measurementCycle<-1
    df$burstCycle<-1
    df$reading<-1
    comp_rows<-nrow(df)
    
    for(i in 1:comp_rows){
        if(!is.na(df$interval[i])){
            if(df$interval[i]>burst_interval_threshold){
                burstCycle<-burstCycle+1
                reading<-1
                df$burstCycle[i:comp_rows]<-burstCycle
                df$reading[i]<-reading
            }else{
                df$reading[i]<-reading
                reading<-reading+1
            }
            if(burstCycle == measure_cycle_size){
                measurementCycle<-measurementCycle+1
                df$measurementCycle[i:comp_rows]<-measurementCycle
                burstCycle<-0
            }            
        }
    }
    df$burstCycle<-as.factor(df$burstCycle)
#     df$measurementCycle<-as.factor(df$measurementCycle)
    return(df)
}

## Function to process each logger individually using above function and output single dataframe

In [None]:
## Function to process each logger individually using above function and output single dataframe
parseIndividualLoggers<-function(df, ll=loggerList, lc=loggerCount){
    loggerDataList<-vector("list", lc) #empty list to hold temporary data frames
    
    #subset each logger's data and parse
    for(i in 1:lc){
        loggerDataList[[i]]<-parse_data(subset(df, logger==ll[i]), 30, 30)
    }
    return(bind_rows(loggerDataList))
}

## Function to calculate rolling CV for warm-up detection
Starting with using the summary data for calculations

In [None]:
## Function to calculate rolling CV for warm-up detection
# Starting with using the summary data for calculations
# calculateCV<-function(processed_data, dataType, lc=loggerCount, ll=loggerList){
#     temp_cv<-data.frame()
#     calculated_cv_list<-vector("list", lc)
#     for(i in 1:lc){
#         df<-subset(processed_data, logger==ll[i] & type==dataType)

#         measurementCycles<-unique(df$measurementCycle)
#         for(i in 1:length(measurementCycles)){
#             temp<-subset(df,measurementCycle==measurementCycles[i])
#             temp$roll_mean<-rollapply(data=temp$ch4_raw,width=4,align=c("right"),FUN=mean,fill=NA)
#             temp$roll_sd<-rollapply(data=temp$ch4_raw,width=4,align=c("right"),FUN=sd,fill=NA)
#             temp$roll_cv<-temp$roll_sd/temp$roll_mean #optionally multiply by 100 for %
#             temp_cv<-bind_rows(temp_cv,temp)
#         }
#         calculated_cv_list[[i]]<-temp_cv
#     }
#     return(bind_rows(calculated_cv_list))
# }

# 3. Plotting functions

In [None]:
# 3. Plotting functions

## Function to create basic plots of burst vs raw methane reading colored by measurement cycle

In [None]:
## Function to create basic plots of burst vs raw methane reading colored by measurement cycle
# cycleVsCH4_plots<-function(processed_data, dataType, lc=loggerCount, ll=loggerList){
#     plots<-vector("list", lc)
#     names(plots)<-ll
#     for(i in 1:lc){
#         data<-subset(processed_data, logger==ll[i] & type==dataType)

#         plots[[i]]<-ggplot(data, aes(burstCycle, ch4_raw))+
#             geom_point(aes(color=measurementCycle))+scale_color_viridis()+
#             ggtitle(paste("Logger: ", ll[i], "\nData type: ", dataType))
#     }
#     return(plots)
# }

## Function to plot rolling cv vs burst cycle and colored by measurement cycle

In [None]:
## Function to plot rolling cv vs burst cycle and colored by measurement cycle
# rollCV_plots<-function(parseDataCV, ll=loggerList, lc=loggerCount){
#     plots<-vector("list", lc)
#     names(plots)<-ll
#     for(i in 1:lc){
#         data<-subset(parseDataCV, logger==ll[i])
#         plots[[i]]<-ggplot(data, aes(burstCycle, roll_cv))+
#         geom_point(aes(color=as.integer(measurementCycle)))+
#         scale_y_log10()+scale_color_viridis()+
#         ggtitle(paste("Rolling CV for Logger: ",ll[i]))
#     }
#     return(plots)
# }

## function that goes through each item in a variable dictionary and creates a basic plot of variable vs a time column, or just x vs y colored by logger/site

In [None]:
## function that goes through each item in a variable dictionary and creates a basic plot of variable vs a time column, or just x vs y colored by logger/site
plot_Data_v_Time <-function(df, color="logger", timeCol="time.h", vd=variableDict, vk=variableKeys, vc=variableCount){
  # initialize list to hold plots
  DvT = vector('list', vc)
  names(DvT) = names(vd)
  
  # plot each column vs time with all deployments into list
  for ( i in 1:vc ){
    DvT[[i]] = ggplot(data=df,aes_string(x=timeCol,y=vk[i],color=color),size=1)+
      geom_point()+geom_line()+theme_classic(base_size=12)+
      labs(x="Date", y=vd[i], color=NULL)
    # +
    #   scale_color_manual(values=custom_colors2,na.translate=F)+
    #   scale_x_datetime(date_labels="%m/%d %H",breaks=scales::pretty_breaks(n=4),expand=c(0,60*5))
  }
  return(DvT)
}

## function that goes through and does each series of basic plots of variable vs time.h for each individual logger
### TODO, allow for uuid/site instead of logger
1. function can count unique uuid/site occurrences in df and iterate over that
2. title can be column name? or added by user later, or generic "RRIV:"

In [None]:
## function that goes through and does each series of basic plots of variable vs time.h for each individual logger
### TODO, allow for uuid/site instead of logger
# 1. function can count unique uuid/site occurrences in df and iterate over that
# 2. title can be column name? or added by user later, or generic "RRIV:"
plot_individual_logger_data_v_time <-function(df, timeCol="time.h", vd=variableDict, vk=variableKeys, vc=variableCount,
                                              ll=loggerList, lc=loggerCount){
    ## list of lists, where list values are the names of columns and loggers for the deployment
    output <- vector("list", vc)
    names(output) <- vk
    
    # initialize empty double list to hold plots
    for(i in 1:vc){
        output[[ vk[i] ]] <- vector("list", lc)
        names( output[[ vk[i] ]] ) <- loggerList
    }
    
    # create plots at respective locations
    for(i in 1:vc){
        for(j in 1:lc){
          output[[ vk[i] ]][[ ll[j] ]] = ggplot(data=subset(df, logger==ll[j]))+
            geom_point(aes_string(x=timeCol,y=vk[i]),size=1)+theme_classic(base_size=12)+
            ylab(vd[i])+xlab("Date")+ggtitle(paste("Logger: ",ll[j],sep=""))
          # +
          #   scale_color_manual(values=custom_colors2,na.translate=F)+
          #   scale_x_datetime(date_labels="%m/%d %H",breaks=scales::pretty_breaks(n=4),expand=c(0,60*120))
        }
    }
    return(output)
}