In [2]:
import netCDF4 as nc
# import matplotlib.pyplot as plt
import numpy.ma as ma
import numpy as np
import os

import pandas as pd

In [3]:
datapath = r'D:\Project_data\2023-eag249\FMI_data'

In [4]:
# folder has data from three buoys separated by years
files = os.listdir(datapath)
files[0].split('_')[1][0:4]

'2016'

In [58]:
# just for fun challenge to do this in OOP

class FmiDataLoad(object):
    datapath = r'D:\Project_data\2023-eag249\FMI_data'
    start_f_ind = 0  # 0.025 Hz
    end_f_ind = 80  # 0.6 Hz

    def __init__(self, station: str, ds_params: list):
        if self.validate_station_name(station):
            self.stationname = station
        if self.validate_param_names(ds_params):
            self.ds_params = ds_params

        self.available_files = self.get_available_files()
        self.F = None
        self.S = None
        self.SPR = None
        self.D = None

    def load_vars_into_df(self):
        for param in self.ds_params:
            if param == 'S':
                self.S = self.load_param(param)
            if param == 'spr':
                self.SPR = self.load_param(param)
            if param == 'D':
                self.D = self.load_param(param)

    def load_param(self, param) -> pd.DataFrame:
        df = pd.DataFrame()
        for file in self.available_files:
            dset = nc.Dataset(os.path.join(FmiDataLoad.datapath, file))
            time = dset['time']

            data = ma.getdata(dset[param][:])
            df_i = pd.DataFrame(data[FmiDataLoad.start_f_ind: FmiDataLoad.end_f_ind, :].T, index=pd.to_datetime(time[:], unit='s'))
            # df['Date'] = pd.to_datetime(time[:], unit='s')
            df = pd.concat([df, df_i], axis=0)

        # assumption that all the Frequency values between files are the same.. should check
        f = dset['F']  # takes F from last file
        self.F = ma.getdata(f[:][FmiDataLoad.start_f_ind:FmiDataLoad.end_f_ind, 0])
        f_str = [str(x) for x in f[:][FmiDataLoad.start_f_ind:FmiDataLoad.end_f_ind, 0]]
        df.columns = f_str
        return df

    def get_available_files(self) -> list:
        files = os.listdir(FmiDataLoad.datapath)
        available_files = []
        for file in files:
            if file.startswith(self.stationname):
                available_files.append(file)
        return available_files

    @staticmethod
    def validate_param_names(params):
        valid = []
        for param in params:
            valid.append(FmiDataLoad.valid_params(param))
            if np.all(valid):
                return True
            else:
                raise ValueError(f'Not valid parameter names. Expected S, spr; got {param}')

    @staticmethod
    def validate_station_name(station):
        if station in ['BS', 'NBP', 'GOF']:
            return True
        else:
            raise ValueError(f'Not valid stationname, expected BS, NBP, GOF. Got {station}')

    @staticmethod
    def valid_params(param):
        if param in ['S', 'spr', 'D']:
            return True
        else:
            raise ValueError(f'Not valid parameter name! Expected S, or spr; got {param}.')


In [60]:
test = FmiDataLoad(station='GOF', ds_params=['S', 'spr', 'D'])
test.load_vars_into_df()
test.S

UnboundLocalError: local variable 'dset' referenced before assignment

In [15]:
valid = [False, True, False]
[i for i, x in enumerate(valid) if not x]

[0, 2]

In [49]:
df_spec = test.D

In [50]:
df_spec.index = pd.to_datetime(df_spec.index)
df_spec

Unnamed: 0,0.025,0.03,0.035,0.04,0.045,0.05,0.055,0.06,0.065,0.07,...,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.6
2016-01-01 00:25:00,160.00,155.00,159.00,149.00,157.00,157.00,163.00,156.00,149.00,159.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 00:55:00,202.00,149.00,181.00,191.00,180.00,174.00,186.00,157.00,176.00,146.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 01:25:00,169.00,187.00,172.00,156.00,162.00,183.00,139.00,165.00,152.00,156.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 01:55:00,159.00,157.00,139.00,148.00,156.00,150.00,159.00,142.00,148.00,163.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 02:25:00,177.00,190.00,166.00,188.00,159.00,160.00,152.00,159.00,163.00,148.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 21:30:00,186.64,189.80,204.92,217.14,214.68,225.58,233.05,230.07,221.19,221.19,...,259.52,243.08,244.66,268.13,257.05,261.10,256.88,267.78,247.30,258.99
2022-12-31 22:00:00,198.51,197.36,207.38,198.68,195.60,213.10,237.89,226.29,223.12,229.36,...,275.87,253.01,251.52,256.88,271.82,229.45,243.87,264.09,280.97,252.40
2022-12-31 22:30:00,204.04,242.99,231.56,240.70,237.89,220.92,241.05,224.18,222.86,238.07,...,233.67,263.74,256.62,259.96,263.30,251.96,250.20,265.93,294.95,261.19
2022-12-31 23:00:00,216.44,217.76,214.33,230.77,246.33,263.38,215.65,230.07,238.51,233.85,...,275.60,266.37,260.75,234.55,266.81,280.53,285.27,271.30,273.93,253.27


In [51]:
df_spec_filt = df_spec[(df_spec.index.minute < 25 ) | (df_spec.index.minute > 30) ] 
df_spec_filt

Unnamed: 0,0.025,0.03,0.035,0.04,0.045,0.05,0.055,0.06,0.065,0.07,...,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.6
2016-01-01 00:55:00,202.00,149.00,181.00,191.00,180.00,174.00,186.00,157.00,176.00,146.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 01:55:00,159.00,157.00,139.00,148.00,156.00,150.00,159.00,142.00,148.00,163.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 02:55:00,139.00,156.00,125.00,124.00,149.00,135.00,139.00,124.00,145.00,152.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 03:55:00,148.00,141.00,173.00,155.00,163.00,156.00,153.00,156.00,150.00,159.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 04:55:00,160.00,169.00,186.00,190.00,188.00,170.00,187.00,187.00,165.00,172.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 19:00:00,201.76,223.91,230.33,228.92,232.26,216.62,212.48,217.32,213.63,238.68,...,249.85,202.20,220.75,205.54,242.29,207.12,239.12,225.23,239.38,240.88
2022-12-31 20:00:00,217.23,221.01,245.80,234.46,232.26,242.46,244.31,237.89,227.60,234.02,...,240.53,241.58,238.59,256.53,231.30,237.89,241.58,271.21,259.60,230.15
2022-12-31 21:00:00,187.96,202.64,213.63,236.13,220.57,214.07,214.86,217.05,243.25,210.46,...,255.65,249.93,267.69,239.12,279.74,239.30,266.46,250.55,270.77,257.58
2022-12-31 22:00:00,198.51,197.36,207.38,198.68,195.60,213.10,237.89,226.29,223.12,229.36,...,275.87,253.01,251.52,256.88,271.82,229.45,243.87,264.09,280.97,252.40


In [52]:
df_spec_filt.to_csv('data/FMI_nbp_dir.csv')

In [64]:
[float(x) for x in df_spec_filt.columns.to_list()]

[0.025,
 0.03,
 0.035,
 0.04,
 0.045,
 0.05,
 0.055,
 0.06,
 0.065,
 0.07,
 0.075,
 0.08,
 0.085,
 0.09,
 0.095,
 0.1,
 0.105,
 0.11,
 0.115,
 0.12,
 0.125,
 0.13,
 0.135,
 0.14,
 0.145,
 0.15,
 0.155,
 0.16,
 0.165,
 0.17,
 0.175,
 0.18,
 0.185,
 0.19,
 0.195,
 0.2,
 0.205,
 0.21,
 0.215,
 0.22,
 0.225,
 0.23,
 0.235,
 0.24,
 0.245,
 0.25,
 0.26,
 0.27,
 0.28,
 0.29,
 0.3,
 0.31,
 0.32,
 0.33,
 0.34,
 0.35,
 0.36,
 0.37,
 0.38,
 0.39,
 0.4,
 0.41,
 0.42,
 0.43,
 0.44,
 0.45,
 0.46,
 0.47,
 0.48,
 0.49,
 0.5,
 0.51,
 0.52,
 0.53,
 0.54,
 0.55,
 0.56,
 0.57,
 0.58,
 0.6]