In [1]:
import pandas as pd
import numpy as np

In [2]:
class CleanData:
    def __init__(self, df: pd.DataFrame):
        self.df = df.copy()

    def clean(self, df=None):
        self.df.index.name = "datetime"
        df = df[[c for c in df.columns if c != "CO2 (ppm)"]]
        df = df if df is not None else self.df
        for col in df.columns:
            df = df[df[col] != -9999.0]
            df = df[df[col] != -9999.99]
        df = df[df['wv (m/s)'] >= 0]
        return df

    def interpolate(self, df=None, freq='10T'):
        df = df if df is not None else self.df
        df_resampled = df.groupby(pd.Grouper(freq=freq)).mean()
        missing_before = df_resampled.isna().any(axis=1).sum()
        print(f"==> {missing_before} rows contain missing values and will be interpolated <==")
        df_filled = df_resampled.interpolate().round(2)
        df_filled = df_filled.fillna(method='bfill').fillna(method='ffill')
        return df_filled

    def feature_selection(self, df=None):
        usecols = ['T (degC)', 'rh (%)', 'p (mbar)', 'wv (m/s)']
        weather = self.df[usecols]
        return weather