In [51]:
import netCDF4 as nc
import matplotlib.pyplot as plt
import numpy.ma as ma
import numpy as np
import os

import pandas as pd

In [5]:
datapath = r'D:\Project_data\2023-eag249\FMI_data'

In [7]:
# folder has data from three buoys separated by years
files = os.listdir(datapath)
files[0].split('_')[1][0:4]

'2016'

In [52]:
# just for fun challenge to do this in OOP

class FmiDataLoad(object):
    datapath = r'D:\Project_data\2023-eag249\FMI_data'
    start_f_ind = 0  # 0.025 Hz
    end_f_ind = 80  # 0.6 Hz

    def __init__(self, station: str, ds_params: list):
        if self.validate_station_name(station):
            self.stationname = station
        if self.validate_param_names(ds_params):
            self.ds_params = ds_params

        self.available_files = self.get_available_files()
        self.F = None
        self.S = None
        self.SPR = None
        self.D = None

    def load_vars_into_df(self):
        for param in self.ds_params:
            if param == 'S':
                self.S = self.load_param(param)
            if param == 'spr':
                self.SPR = self.load_param(param)
            if param == 'D':
                self.D = self.load_param(param)

    def load_param(self, param) -> pd.DataFrame:
        df = pd.DataFrame()
        for file in self.available_files:
            dset = nc.Dataset(os.path.join(FmiDataLoad.datapath, file))
            time = dset['time']

            data = ma.getdata(dset[param][:])
            df_i = pd.DataFrame(data[FmiDataLoad.start_f_ind: FmiDataLoad.end_f_ind, :].T, index=pd.to_datetime(time[:], unit='s'))
            # df['Date'] = pd.to_datetime(time[:], unit='s')
            df = pd.concat([df, df_i], axis=0)

        # assumption that all the Frequency values between files are the same.. should check
        f = dset['F']  # takes F from last file
        self.F = ma.getdata(f[:][FmiDataLoad.start_f_ind:FmiDataLoad.end_f_ind, 0])
        f_str = [str(x) for x in f[:][FmiDataLoad.start_f_ind:FmiDataLoad.end_f_ind, 0]]
        df.columns = f_str
        return df

    def get_available_files(self) -> list:
        files = os.listdir(FmiDataLoad.datapath)
        available_files = []
        for file in files:
            if file.startswith(self.stationname):
                available_files.append(file)
        return available_files

    @staticmethod
    def validate_param_names(params):
        valid = []
        for param in params:
            valid.append(FmiDataLoad.valid_params(param))
            if np.all(valid):
                return True
            else:
                raise ValueError(f'Not valid parameter names. Expected S, spr; got {param}')

    @staticmethod
    def validate_station_name(station):
        if station in ['BS', 'NBP', 'GOF']:
            return True
        else:
            raise ValueError(f'Not valid stationname, expected BS, NBP, GOF. Got {station}')

    @staticmethod
    def valid_params(param):
        if param in ['S', 'spr', 'D']:
            return True
        else:
            raise ValueError(f'Not valid parameter name! Expected S, or spr; got {param}.')


In [59]:
test = FmiDataLoad(station='BS', ds_params=['S','spr','D'])
test.load_vars_into_df()
test.D

Unnamed: 0,0.025,0.03,0.035,0.04,0.045,0.05,0.055,0.06,0.065,0.07,...,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.6
2016-01-01 00:25:00,162.00,157.00,172.00,152.00,174.00,179.00,180.00,162.00,170.00,165.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 00:55:00,163.00,153.00,167.00,166.00,159.00,165.00,165.00,166.00,183.00,165.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 01:25:00,153.00,166.00,143.00,184.00,165.00,169.00,169.00,149.00,157.00,138.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 01:55:00,180.00,181.00,160.00,134.00,162.00,194.00,155.00,157.00,187.00,165.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
2016-01-01 02:25:00,200.00,184.00,150.00,181.00,152.00,180.00,169.00,169.00,148.00,188.00,...,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00,-9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 21:30:00,228.25,230.71,225.18,236.87,259.11,250.58,253.48,259.64,251.64,261.40,...,274.93,283.46,274.14,303.07,270.71,282.85,294.10,286.98,305.70,288.03
2022-12-31 22:00:00,218.14,224.12,235.20,231.86,239.07,245.40,242.93,234.41,250.76,247.24,...,304.65,281.97,281.35,278.10,267.20,267.20,282.58,282.67,270.63,296.91
2022-12-31 22:30:00,274.67,256.38,238.36,250.85,255.77,265.09,270.10,268.43,275.20,264.56,...,296.21,287.51,284.16,291.46,261.31,253.40,295.59,290.93,302.10,270.80
2022-12-31 23:00:00,246.10,261.31,253.84,252.34,243.99,245.31,255.42,258.67,261.92,270.19,...,299.37,307.46,297.70,311.68,291.90,289.26,282.32,296.65,283.02,276.08


In [15]:
valid = [False, True, False]
[i for i, x in enumerate(valid) if not x]

[0, 2]

In [14]:
x

NameError: name 'x' is not defined