# Prepare a CSV file for the OWG

This notebook contains code as a function and a walk through for creating a csv file that can be use in optical wave gauging

In [1]:
# import packages and libs

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import datetime
import calendar
from scipy import interpolate
import os
import shutil
import csv


## Read data

Data comes from two sources:  
* 1. NDBC standard meteorlogical observation archives, found [here](https://www.ndbc.noaa.gov/) 
* 2. Archived ReCON imagery, partial records can be scraped from the [website](https://www.glerl.noaa.gov/metdata/) or accessed from GLERL's network


This data must be joined so that each webcam image is associated with meterological observations taken no later than 30 minutes before or after the image was taken.

In [36]:
# prepare the buoy data for joining

def readwavetxt(fn, target):
    '''Take a  txt file and return a dataframe
    
    variables:
    fn: filename and location of .txt file
    target: attribute that the owg will be predicting'''
    df = pd.read_csv(fn, skiprows=range(1,2), delim_whitespace = True, \
                    parse_dates={'date':[0,1,2,3,4]}, keep_date_col=False)

    # Transfer data in "date" column to a column where it is stored as a datetime object
    df['datetime'] = pd.to_datetime(df['date'], format = '%Y %m %d %H %M',utc=True)
    df = df.drop(df.columns[[0,1,2,3,6,8,9,10,11,12, 13]], axis = 1)
    
    # calculate unix datetime
    df['epoch']=(df['datetime'] - pd.Timestamp("1970-01-01",tz='utc')) // pd.Timedelta('1s')
    
    # remove data with NaN values 
    if target == "WVHT":
        df = df[df['WVHT'] < 99.0]
    if target == "DPD":
         df = df[df['DPD'] <99.0]
    if target == "MWD":
        df = df[df['MWD'] < 999]
    
    print(df.head())
      

    return 

In [25]:
# Complete csv

def waveframetocsv(csvfile, directory):
    '''
    This function takes a prepared waves dataframe and associates its data with OWG images.
   
    csvfile is the name of the csvfile being created
    directory is the directory of images that have been prepped for OWG filtering
    '''
    
    successcounter = 0
    failcounter = 0
    timecounter = 0
    
    # delete the csv file if it exsists
    try:
        print ("Overwriting csv file")
        os.remove(csvfile)
        with open(csvfile, "w") as text_file:
            text_file.write("id, H, T, MWDIR\n")
    except:
        print("couldn't find file, making new one")
        # create csv file that will be appended to by loop
        with open(csvfile, "w") as text_file:
            text_file.write("id, H, T, MWDIR\n")

    #loop through directory and extract unix timestamp
    for filename in os.listdir(directory):
        # Use string slicing to remove .jpg from filename
        size = len(filename)
        fn = filename[:size - 4]
      
        # get time from filename
        utime = calendar.timegm(datetime.datetime.strptime(fn, "%Y%m%d%H%M").timetuple())
        
        try:
            if utime >= waves['epoch'].iat[0]:
                result_index = waves['epoch'].sub(search_value).abs().idxmin()
                if abs(utime - waves.iloc[result_index]) <= 30:
                    with open (csvfile, "a") as text_file:
                        text_file.write("{0:s},{1:0.2f},{2:0.2f},{3:0.2f}\n".format(filename, waves['WVHT'].iat[result_index], waves['DPD'].iat[result_index],waves['MWD'].iat[result_index]))
                        successcounter + 1
                else:
                    timecounter + 1
        except:
            failcounter + 1
                        
 
    
    return successcounter, "images added to {}".format(csvfile) and failcounter + timecounter, "images without met. data"
                   

In [37]:
fn = "C:/njc/src/GLERL_contract/buoy_data/mcy2021.txt"
target = "WVHT"
waveframe = readwavetxt(fn, target)


   WVHT   DPD  MWD                  datetime       epoch
0  0.41  2.68   24 2021-05-08 22:00:00+00:00  1620511200
1  0.39  2.78   63 2021-05-08 22:10:00+00:00  1620511800
2  0.39  2.45   65 2021-05-08 22:20:00+00:00  1620512400
3  0.36  2.62   84 2021-05-08 22:30:00+00:00  1620513000
4  0.34  2.73   86 2021-05-08 22:40:00+00:00  1620513600


In [None]:
csvfile = "C:/njc/src/GLERL_contract/buoy_data/mcy2021_oroto.csv"
directory = "D:/ReCON_imgs/mcy_total/2021"
waveframetocsv(csvfile, directory)