# Smoothing humidity measurements

There is a periodical noise component to the humidity measurements in the GH, due to a humidifying system being activated something like every $5$ minutes. In order to accuractely assess the effect of other varaibles on the indoors humidity, we would like to filter out the effect of the humidifying system.

### Get dependencies and data

In [1]:
# import dependencies
import os
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np

In [4]:
# define relevant paths
home_path = os.path.dirname(os.getcwd())
data_path = home_path + '\\data\\'
plot_path = home_path + '\\plotting\\plots\\'

In [20]:
# get merged data
data = pd.read_csv(
    data_path + 'data_merged.csv',
    header=[0, 1],
    index_col=[0, 1, 2, 3]
)

# convert index.date col to datetime
#data.index = pd.to_datetime(data.index.values)
data.index = data.index.set_levels(
    levels=pd.to_datetime(data.index.get_level_values(3).values),
    level=3
)

In [21]:
# define X vars - we are only concerned with GH pressure and temperature
x_vars = [
    ('humidity', 'DC_GT101_GM101'),
    ('humidity', 'DC_GT102_GM102'),
    ('humidity', 'DC_GT103_GM103'),
    ('humidity', 'DC_GT104_GM104'),
    ('humidity', 'TA01_GT10X_GM10X'),
    ('humidity', 'TA01_GT401_GM401'),
    ('humidity', 'TA02_GT401_GM401')
]

In [22]:
# filter columns to keep only x_vars
data = data[x_vars].copy()
print('Number of NAs\n')
print(data.isna().sum())
print('\n\n')

# remove NANs
data = data.dropna(
    how='any', 
    subset=x_vars
)

# drop "humidity" from MultiIndex since all from there :)
data = data.droplevel(level=0, axis=1)

Number of NAs

category  sensor_ID       
humidity  DC_GT101_GM101      0
          DC_GT102_GM102      0
          DC_GT103_GM103      0
          DC_GT104_GM104      0
          TA01_GT10X_GM10X    0
          TA01_GT401_GM401    0
          TA02_GT401_GM401    0
dtype: int64



Index(['DC_GT101_GM101', 'DC_GT102_GM102', 'DC_GT103_GM103', 'DC_GT104_GM104',
       'TA01_GT10X_GM10X', 'TA01_GT401_GM401', 'TA02_GT401_GM401'],
      dtype='object', name='sensor_ID')


### Plot the problem

In [None]:
# set seaborn style/theme
sns.set_theme()
sns.set_style('white')

ax = sns.relplot(
    data=data.humidity,
    kind='line',
    palette= params['palette'],
    alpha= params['alpha'],
    dashes=False,
    markers=False,
    legend='full',
    linewidth=1.5
)

ax.set_xticklabels(step=2)

plt.yticks(rotation=45)
plt.xlabel('Timestamp')
plt.ylabel(params['units'], loc='center', rotation=0, fontsize=12, labelpad=30)
plt.title(params['title'])