In [13]:
## Install the package if needed:
#install.packages('Rtools', repos='http://cran.us.r-project.org')

## Load the relevent packages
library(ncdf4)  # For reading in the NCEP wind fields
#library(R.matlab)  # If you need to read in matlab .mat files
library(openxlsx)  # If you need to read in .xlsx files
#library(rNOMADS)  # For reading grib2 data files (NOMADS data for instance)
#library(rGDAL)  #
library(RColorBrewer)
library(compiler)  # required for JIT (below)

## Enable compilation (speed gain?)
enableJIT(3)

## Helper function for converting the date time stamps.
conv_excel_time = function(x, tz='GMT') {
    as.POSIXct(as.Date(x,origin="1899-12-30", tz=tz))
}

get.qual.pal = function(n=100, pal='Accent') {
    colorRampPalette(brewer.pal(8, pal))(n)
}

get.seq.pal = function(n=100, pal='YlOrRd') {
    colorRampPalette(rev(brewer.pal(11, pal)))(n)
}

get.div.pal = function(n=100, pal='Spectral') {
    colorRampPalette(rev(brewer.pal(11, pal)))(n)
}

make.div.pal = function(x=100, n, pal='Spectral') {
    get.div.pal(n, pal=pal)[as.numeric(cut(x, breaks = n))]
}

---
# 2. Load and prepare ship data
This section deals with the loading of the met-merge data. Once loaded and the structure is verified, a 2 minute average is taken for all parameters. Once completed, the next section will add the NCEP wind data and MLD to the data frame.The file structure which this script is looking for is as follows where Vars are in any order but with the proper abbreviations.

``Datetime  Var1   Var2  Var3 ...``

Variable Descriptions and Abbreviations
* __DT -- Date Time__
* __TW -- True Wind__
* TI  -- Wind Dir
* __LA -- Latitude__
* __LO -- Longitude__
* __AT -- Atm Temp__
* __BP -- Pressure__
* PA -- PAR
* __TT (TT2*)  -- Water Temp__
* __SA -- Salinity__
* __OS -- Oxygen__
* FL -- Fluorometer
* BT -- Bottom Depth

___NB___: Bold identies required columns while the others are used for additional analyses.

In [1]:
read.ship.data = function(path) {
    ## Load the data file
    data = read.xlsx(path)
    
    ## Trim data to the following columns:
    keep = c('DT', 'TW', 'TI', 'LA', 'LO', 'AT', 'BP', 'PA', 'TT', 'SA', 'OS', 'OX', 'FL', 'BT')
    data = data[, which(names(data) %in% keep)]

    # Convert excel datetime numbers to actual datetimes.
    data$DT = conv_excel_time(data$DT, tz='GMT')

    # Calculated from Thomas et al.
    data$TW = data$TW * 0.942  

    ## Convert all columns (except date/time to numeric)
    for (i in 2:ncol(data)) {
        data[,i] = as.numeric(data[,i])
    }

    ## Check that it loaded properly/correct data type (num)
    print(paste('Do the number of names in keep match the number of columns now?', ncol(data) == length(keep)))
    
    data
}

In [None]:
ship.data = read.ship.data('Raw Data/MET/met-merge-final.xlsx')

#### Remove bad wind data
Here we simply remove the rows there wind speed is less than zero (TW < 0). We also report the number of rows affected (should be a relatively small amount).

In [23]:
bad.wind = which(ship.data$TW < 0)
length(bad.wind)

## Remove bad wind entries
ship.data = ship.data[-bad.wind,]

### The N minute average:
1. Start with the first row, set ___current.time___ to that row's time
2. Find all rows with times >= to the current time
3. Remove all rows with times more than N minutes away from the ___current.time___.
4. Take the column average of those rows and replace the current row.
5. Remove all the other rows
6. Repeat process on the next row.

In [17]:
take.avg = function(data, n = 2) {
    ### N Minute Averaging section

    before = nrow(data)
    i = 1
    nc = ncol(data)

    while (i < nrow(data)) {  # Loop through each row in ship.data
        current.time = 
        
        ## Determine which rows are within N minutes of the current row
        in.range = which(data$DT[1:(i+100)] >= data$DT[i] &
                         difftime(data$DT[1:(i+100)], data$DT[i], units='mins') < n)

        ##  Average the column values together ignoring the first one (time)
        data[i, 2:nc] = apply(data[in.range, 2:nc], 2, function(x) {mean(x, na.rm = TRUE)})

        ## Remove all rows used to make average except for row i
        in.range = in.range[in.range != i]
        if (length(in.range) > 0) {
            data = data[-in.range,]
        }
        i = i + 1
    }

    print(paste('The number of rows before was', before, 'and now there are', nrow(data)))
    data
}

[1] "The number of rows before was 185872 and now there are 21209"


In [None]:
ship.data = take.avg(ship.data)

## Save Results

In [18]:
## Save point after loading ship data and averaging. 
save(ship.data, file='./RStates/ship.avg.rdata')
write.xlsx(ship.data, file='./Input Data/Shipdata - Averaged.xlsx')

gc() # Free up memory

ERROR: Error: zipping up workbook failed. Please make sure Rtools is installed or a zip application is available to R.
         Try installr::install.rtools() on Windows. If the "Rtools\bin" directory does not appear in Sys.getenv("PATH") please add it to the system PATH 
         or set this within the R session with Sys.setenv("R_ZIPCMD" = "path/to/zip.exe")
