In [None]:
## Install the package if needed:
#install.packages('Mcomp', repos='http://cran.us.r-project.org')

## Load the relevent packages
library(ncdf4)  # For reading in the NCEP wind fields
#library(R.matlab)  # If you need to read in matlab .mat files
library(openxlsx)  # If you need to read in .xlsx files
#library(rNOMADS)  # For reading grib2 data files (NOMADS data for instance)
#library(rGDAL)  #
library(RColorBrewer)
library(compiler)  # required for JIT (below)

## Enable compilation (speed gain?)
enableJIT(3)

## Helper function for converting the date time stamps.
conv_excel_time = function(x, tz='US/Pacific') {
    as.POSIXct(as.Date(x,origin="1899-12-30", tz=tz))
}

get.qual.pal = function(n=100, pal='Accent') {
    colorRampPalette(brewer.pal(8, pal))(n)
}

get.seq.pal = function(n=100, pal='YlOrRd') {
    colorRampPalette(rev(brewer.pal(11, pal)))(n)
}

get.div.pal = function(n=100, pal='Spectral') {
    colorRampPalette(rev(brewer.pal(11, pal)))(n)
}

make.div.pal = function(x=100, n, pal='Spectral') {
    get.div.pal(n, pal=pal)[as.numeric(cut(x, breaks = n))]
}

---
# 2. Load and prepare ship data
This section deals with the loading of the met-merge data. Once loaded and the structure is verified, a 2 minute average is taken for all parameters. Once completed, the next section will add the NCEP wind data and MLD to the data frame.The file structure which this script is looking for is as follows where Vars are in any order but with the proper abbreviations.

``Datetime  Var1   Var2  Var3 ...``

Variable Descriptions and Abbreviations
* __DT -- Date Time__
* __TW -- True Wind__
* TI  -- Wind Dir
* __LA -- Latitude__
* __LO -- Longitude__
* __AT -- Atm Temp__
* __BP -- Pressure__
* PA -- PAR
* __TT (TT2*)  -- Water Temp__
* __SA -- Salinity__
* __OS -- Oxygen__
* FL -- Fluorometer
* BT -- Bottom Depth

___NB___: Bold identies required columns while the others are used for additional analyses.

In [None]:
## Load the xlsx file
ship.data = read.xlsx('../Met-data - merged/merge 063100702-corrected.xlsx', )

## Trim data to the following columns:
keep = c('DT', 'TW', 'TI', 'LA', 'LO', 'AT', 'BP', 'PA', 'TT', 'SA', 'OS', 'FL', 'BT')
# This call finds which columns match the names in keep and keeps them.
ship.data = ship.data[, which(names(ship.data) %in% keep)]

# Convert excel datetime numbers to actual datetimes.
ship.data$DT = conv_excel_time(ship.data$DT)

# Calculated from Thomas et al.
ship.data$TW = ship.data$TW * 0.942  

## Convert all columns (except date/time to numeric)
for (i in 2:ncol(ship.data)) {
    ship.data[,i] = as.numeric(ship.data[,i])
}

## Check that it loaded properly/correct data type (num)
print(paste('Do the number of names in keep match the number of columns now?', ncol(ship.data) == length(keep)))
str(ship.data)

#### Remove bad wind data
Here we simply remove the rows there wind speed is less than zero (TW < 0). We also report the number of rows affected (should be a relatively small amount).

In [None]:
bad.wind = which(ship.data$TW < 0)
length(bad.wind)

## Remove bad wind entries
ship.data = ship.data[-bad.wind,]

### The N minute average:
1. Start with the first row, set ___current.time___ to that row's time
2. Find all rows with times >= to the current time
3. Remove all rows with times more than N minutes away from the ___current.time___.
4. Take the column average of those rows and replace the current row.
5. Remove all the other rows
6. Repeat process on the next row.

In [None]:
### N Minute Averaging section
avg.length = 2  # minutes to avg

before = nrow(ship.data)
i = 1
nc = ncol(ship.data)

while (i < nrow(ship.data)) {  # Loop through each row in ship.data
    current.time = ship.data$DT[i]
    
    ## Determine which rows are within N minutes of the current row
    in.range = which(ship.data$DT[1:(i+100)] >= current.time & difftime(ship.data$DT[1:(i+100)], current.time, units='mins') < avg.length)
    
    ##  Average the column values together ignoring the first one (time)
    ship.data[i, 2:nc] = apply(ship.data[in.range, 2:nc], 2, function(x) {mean(x, na.rm = TRUE)})
    
    ## Remove all rows used to make average except for row i
    in.range = in.range[in.range != i]
    ship.data = ship.data[-in.range,]
    i = i + 1
}

print(paste('The number of rows before was', before, 'and now there are', nrow(ship.data)))

## Save Results

In [None]:
## Save point after loading ship data and averaging. 
save(ship.data, file='./RStates/ship.avg.rdata')
write.xlsx(ship.data, file='./Input Data/Shipdata - Averaged.xlsx')

gc() # Free up memory