In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import datetime
import calendar
from scipy import interpolate
import os
import shutil
import csv

In [2]:
def pcoord(x, y): 
    """
    Convert x, y to polar coordinates r, az (geographic convention)
    r,az = pcoord(x, y)
    """
    r  = np.sqrt( x**2 + y**2 )
    az=np.degrees( np.arctan2(x, y) )
    # az[where(az<0.)[0]] += 360.
    az = (az+360.)%360.
    return r, az

def xycoord(r, az):
    """
    Convert r, az [degrees, geographic convention] to rectangular coordinates
    x,y = xycoord(r, az)
    """
    x = r * np.sin(np.radians(az))
    y = r * np.cos(np.radians(az))
    return x, y


In [3]:
fn = 'C:/njc/src/SSF/buoydata/micity2019.txt'

df = pd.read_csv(fn, skiprows=range(1,2), delim_whitespace = True, \
                    parse_dates={'date':[0,1,2,3,4]}, keep_date_col=False)

# Transfer data in "date" column to a column where it is stored as a datetime object
df['datetime'] = pd.to_datetime(df['date'], format = '%Y %m %d %H %M',utc=True)
df = df.drop(df.columns[[0,1,2,3,6,8,9,10,11,12, 13]], axis = 1)
df

Unnamed: 0,WVHT,DPD,MWD,datetime
0,0.00,99.00,999,2019-04-02 19:00:00+00:00
1,0.00,5.45,67,2019-04-02 19:10:00+00:00
2,0.00,6.83,235,2019-04-02 19:20:00+00:00
3,0.46,2.41,191,2019-04-17 23:00:00+00:00
4,0.45,2.39,206,2019-04-17 23:10:00+00:00
...,...,...,...,...
28512,0.48,2.21,41,2019-11-06 15:20:00+00:00
28513,0.49,2.21,77,2019-11-06 15:30:00+00:00
28514,0.51,2.21,118,2019-11-06 15:40:00+00:00
28515,0.51,2.26,212,2019-11-06 15:50:00+00:00


In [4]:
# calculate unix datetime
df['epoch']=(df['datetime'] - pd.Timestamp("1970-01-01",tz='utc')) // pd.Timedelta('1s')
df

Unnamed: 0,WVHT,DPD,MWD,datetime,epoch
0,0.00,99.00,999,2019-04-02 19:00:00+00:00,1554231600
1,0.00,5.45,67,2019-04-02 19:10:00+00:00,1554232200
2,0.00,6.83,235,2019-04-02 19:20:00+00:00,1554232800
3,0.46,2.41,191,2019-04-17 23:00:00+00:00,1555542000
4,0.45,2.39,206,2019-04-17 23:10:00+00:00,1555542600
...,...,...,...,...,...
28512,0.48,2.21,41,2019-11-06 15:20:00+00:00,1573053600
28513,0.49,2.21,77,2019-11-06 15:30:00+00:00,1573054200
28514,0.51,2.21,118,2019-11-06 15:40:00+00:00,1573054800
28515,0.51,2.26,212,2019-11-06 15:50:00+00:00,1573055400


In [5]:
# remove data with NaN values
df = df[df['WVHT'] < 99.0]
df = df[df['DPD'] <99.0]
df = df[df['MWD'] < 999]
df

Unnamed: 0,WVHT,DPD,MWD,datetime,epoch
1,0.00,5.45,67,2019-04-02 19:10:00+00:00,1554232200
2,0.00,6.83,235,2019-04-02 19:20:00+00:00,1554232800
3,0.46,2.41,191,2019-04-17 23:00:00+00:00,1555542000
4,0.45,2.39,206,2019-04-17 23:10:00+00:00,1555542600
5,0.43,2.38,189,2019-04-17 23:20:00+00:00,1555543200
...,...,...,...,...,...
28512,0.48,2.21,41,2019-11-06 15:20:00+00:00,1573053600
28513,0.49,2.21,77,2019-11-06 15:30:00+00:00,1573054200
28514,0.51,2.21,118,2019-11-06 15:40:00+00:00,1573054800
28515,0.51,2.26,212,2019-11-06 15:50:00+00:00,1573055400


In [6]:
# put data frame into a numpy array
waves = df[df.columns[[4,0,1,2]]].to_numpy()
print('array with shape',np.shape(waves))
print(waves[0])

array with shape (18020, 4)
[1.5542322e+09 0.0000000e+00 5.4500000e+00 6.7000000e+01]


In [7]:
#check first time in record
firsttime = (pd.Timestamp("2019-04-02 19:10:00")-pd.Timestamp('1970-01-01')) // pd.Timedelta('1s')

In [8]:
# interpolators

#linear interpolator
linf = interpolate.interp1d(waves[:,0], waves[:,1:4], axis=0, kind='linear')
# Nearest Neighbor interpolator
nnf =interpolate.interp1d(waves[:,0], waves[:,0:4], axis=0, kind='nearest')

In [10]:
# set generic names
# name of interpolated csv
csvfile = "C:/njc/src/SSF/OWG/mcyv22019(test).csv" 
# name of folder for training imgs
owgtrainimgs = "C:/njc/src/mcyimgs/2019_imgpreptest/fall"+"owgtrainimgs" 
# folder of prep images
directory = "C:/njc/src/mcyimgs/2019_imgpreptest/view2fall" 
# filename of unfiltered images
source = "C:/njc/src/mcyimgs/2019_imgpreptest/view2fall"+"/{}"
# filename of filtered image
destination = owgtrainimgs+"/{}"

# create csv file that will be appended to by loop
with open(csvfile, "w") as text_file:
        text_file.write("id, H, T, MWDIR\n")

# create directory that will hold images with wave data
try:
    shutil.rmtree(owgtrainimgs)
    os.mkdir(owgtrainimgs)
except:
    os.mkdir(owgtrainimgs)
    print("couldn't find folder, make new one")

# delete the csv file
try:
    os.remove(csvfile)
except:
    print("couldn't find file, making new one")
    
counter = 0
removedcounter = 0
failcounter = 0

#loop through directory and interpolate files
for filename in os.listdir(directory):
    # Use string slicing to remove .jpg from filename
    size = len(filename)
    fn = filename[:size - 4]
    
    # get time from filename
    ti = calendar.timegm(datetime.datetime.strptime(fn, "%Y%m%d%H%M").timetuple())
    
    try:
        # interpolate data that is within the interpolation range (aka only use images from when buoy was in water)
        if ti >= firsttime:
            zi = nnf(ti)
            wi = linf(ti)
        
        # find how many seconds elapsed between the image and the interpolated wave data
        timedif = abs(int(ti)-int(zi[0]))
        
        # Average image intensity
        avgintensity = np.mean(plt.imread(source.format(filename)))
        
        # Image Sharpness
        sharpness, contrast = estimate_sharpness(plt.imread(source.format(filename)))
        
        # if image was taken more than 30min from the wave data toss out image
        if timedif >= 1800:
            #print("{} exceeds maximum time allowance".format(filename))
            removedcounter = removedcounter + 1
        
        # if avg image intensity is below 40 (meaning it was taken at night) toss out image
        elif avgintensity < 40:
            #print("{} taken at night, passing over".format(filename))
            removedcounter = removedcounter + 1
        
        # if image sharpness is over 3.5 (the mean of sharpness for all images in 2017) toss out imaage
        elif sharpness < 3.5:
            #print ("{} is too blurry, passing over".format(filename))
            removedcounter = removedcounter + 1
            
        else:
            with open(csvfile, "a") as text_file:
                text_file.write("{0:s},{1:0.2f},{2:0.2f},{3:0.2f}\n".format(filename,wi[0],wi[1],zi[3]))
            
            #print('moving {} from {} to {}'.format(filename, source, destination))
           
            # move images from prep folder into the training folder
            shutil.copy(source.format(filename), destination.format(filename))
            
            # document that a file has been moved to the training folder
            counter = counter + 1
            
    
    except:
        failcounter = failcounter + 1
    

print (failcounter, "files outside of interpolation range")
print (counter, "files moved")
print(removedcounter, "files removed from training dataset")

0 files outside of interpolation range
213 files moved
1161 files removed from training dataset


In [14]:
with open("trial.csv", "w") as text_file:
        text_file.write("id, H, T, MWDIR\n")
x = [1,2,3,4]
for value in x 
with open("trial.csv", "a") as text_file:
                text_file.write("{0:s}.jpg,{1:0.2f},{2:0.2f},{3:0.2f}\n".format(filename,wi[0],wi[1],zi[3]))

## Make code into functions and optomize 

In [2]:
def readwavetxt(fn):
    '''Take a  txt file and return a dataframe'''
    df = pd.read_csv(fn, skiprows=range(1,2), delim_whitespace = True, \
                    parse_dates={'date':[0,1,2,3,4]}, keep_date_col=False)

    # Transfer data in "date" column to a column where it is stored as a datetime object
    df['datetime'] = pd.to_datetime(df['date'], format = '%Y %m %d %H %M',utc=True)
    df = df.drop(df.columns[[0,1,2,3,6,8,9,10,11,12, 13]], axis = 1)
    
    # calculate unix datetime
    df['epoch']=(df['datetime'] - pd.Timestamp("1970-01-01",tz='utc')) // pd.Timedelta('1s')
    
    # remove data with NaN values
    df = df[df['WVHT'] < 99.0]
    df = df[df['DPD'] <99.0]
    df = df[df['MWD'] < 999]
    
    print (df.head())
    
    # put data frame into a numpy array
    waves = df[df.columns[[4,0,1,2]]].to_numpy()
    print('array with shape',np.shape(waves))
    
    
    return waves

In [12]:
def waveframetocsv (nnf, linf, firsttime, csvfile, directory):
    '''
    This function takes a prepared waves dataframe and filters and moves imgaes into the appropirate created folders 
    for OWG training.  
    
    nnf is a nearest neighbor interpolator of waves
    linf is a linear interpolator of waves
    firsttime is the pandas datetime string of the first entry from waves
    csvfile is the name of the csvfile being created
    directory is the directory of images that have been prepped for OWG filtering
    '''
    
    # delete the csv file if it exsists
    try:
        print ("Overwriting csv file")
        os.remove(csvfile)
        with open(csvfile, "w") as text_file:
            text_file.write("id, H, T, MWDIR\n")
    except:
        print("couldn't find file, making new one")
        # create csv file that will be appended to by loop
        with open(csvfile, "w") as text_file:
            text_file.write("id, H, T, MWDIR\n")
    
    counter = 0
    failcounter = 0
    timecounter = 0
   
    #check first time in record as a unix timestamp
    firsttime = (pd.Timestamp(firsttime)-pd.Timestamp('1970-01-01')) // pd.Timedelta('1s')
    print (firsttime)

    #loop through directory and interpolate files
    for filename in os.listdir(directory):
        # Use string slicing to remove .jpg from filename
        size = len(filename)
        fn = filename[:size - 4]
    
        # get time from filename
        ti = calendar.timegm(datetime.datetime.strptime(fn, "%Y%m%d%H%M").timetuple())
    
        try:
            # interpolate data that is within the interpolation range (aka only use images from when buoy was in water)
            if ti >= firsttime:
                zi = nnf(ti)
                wi = linf(ti)
        
            # find how many seconds elapsed between the image and the interpolated wave data
            timedif = abs(int(ti)-int(zi[0]))
        
            # if image was taken more than 30min from the wave data toss out image
            if timedif >= 1800:
                timecounter = timecounter + 1
           
            # add image to the csv file
            else:
                with open(csvfile, "a") as text_file:
                    text_file.write("{0:s},{1:0.2f},{2:0.2f},{3:0.2f}\n".format(filename,wi[0],wi[1],zi[3]))
            
            
                # document that a file has been moved to the training folder
                counter = counter + 1
    
        except:
            failcounter = failcounter + 1
    

    print (failcounter, "images outside of interpolation range")
    print(timecounter, "images not included from training dataset due to no availablae wave data withing 30 minutes")
    print (timecounter+failcounter, "total images not included in training dataset")
    print (counter, "images added to {}".format(csvfile))
    return

In [13]:
fn = 'C:/njc/src/SSF/buoydata/micity2017.txt'
waves = readwavetxt(fn)
print (type(waves))

   WVHT    DPD  MWD                  datetime       epoch
2  0.01  12.15   45 2017-04-19 18:30:00+00:00  1492626600
3  0.01  10.41   40 2017-04-19 18:40:00+00:00  1492627200
4  0.01   6.58  227 2017-04-19 18:50:00+00:00  1492627800
5  0.01   7.47   36 2017-04-19 19:00:00+00:00  1492628400
6  0.01   7.63  269 2017-04-19 19:10:00+00:00  1492629000
array with shape (16116, 4)
<class 'numpy.ndarray'>


In [14]:
# interpolators

#linear interpolator
linf = interpolate.interp1d(waves[:,0], waves[:,1:4], axis=0, kind='linear')
# Nearest Neighbor interpolator
nnf =interpolate.interp1d(waves[:,0], waves[:,0:4], axis=0, kind='nearest')

In [16]:
firsttime = "2017-04-19 18:30:00"
csvfile = "C:/njc/src/SSF/OWG/mcyv22017test.csv"
directory = "C:/njc/src/mcyimgs/2017_imgpreptest/view2"
waveframetocsv(nnf, linf, firsttime, csvfile, directory)

Overwriting csv file
1492626600
214 images outside of interpolation range
162 images not included from training dataset due to no availablae wave data withing 30 minutes
376 total images not included in training dataset
600 images added to C:/njc/src/SSF/OWG/mcyv22017test.csv
