In [1]:
%load_ext autoreload
%autoreload 2

In [15]:
import glob
import tempfile
from os import path

import bigtree
import matplotlib.pyplot as plt
import pandas as pd
import tstore

from meteostations_vector_cube import ts_utils

In [16]:
ts_df_filepath = "../data/interim/netatmo-lausanne-aug-21/ts-df.csv"

In [17]:
ts_df = pd.read_csv(ts_df_filepath)
ts_df["time"] = pd.to_datetime(ts_df["time"])
ts_df

Unnamed: 0,variable,id,time,value
0,humidity,70:ee:50:00:10:f0,2021-08-12 13:00:06,50.0
1,humidity,70:ee:50:00:10:f0,2021-08-12 14:00:11,49.0
2,humidity,70:ee:50:00:10:f0,2021-08-12 15:00:16,50.0
3,humidity,70:ee:50:00:10:f0,2021-08-12 16:00:12,49.0
4,humidity,70:ee:50:00:10:f0,2021-08-12 17:00:10,53.0
...,...,...,...,...
381071,wind_strength,70:ee:50:7f:98:de,2021-08-23 22:00:06,2.0
381072,wind_strength,70:ee:50:7f:98:de,2021-08-25 01:00:04,3.0
381073,wind_strength,70:ee:50:7f:98:de,2021-08-25 02:00:05,3.0
381074,wind_strength,70:ee:50:7f:98:de,2021-08-25 03:00:06,2.0


There are three types of Netatmo modules: one for temperature and humidity, another for wind and a last one for rain. Note that even for the same station, each of this modules can have its own time indexing.

In [5]:
module1_ts_df = ts_utils.to_module_ts_df(ts_df, ["temperature", "humidity"])
module2_ts_df = ts_utils.to_module_ts_df(ts_df, ["rain_live"])
module3_ts_df = ts_utils.to_module_ts_df(ts_df, ["wind_angle", "wind_strength"])

This is what a module time-series data frame looks like:

In [10]:
module1_ts_df
TSLong(module1_ts_df, id_).to_tsdf

Unnamed: 0_level_0,variable,humidity,temperature
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1
70:ee:50:00:10:f0,2021-08-12 13:00:06,50.0,28.1
70:ee:50:00:10:f0,2021-08-12 14:00:11,49.0,29.5
70:ee:50:00:10:f0,2021-08-12 15:00:16,50.0,29.4
70:ee:50:00:10:f0,2021-08-12 16:00:12,49.0,29.2
70:ee:50:00:10:f0,2021-08-12 17:00:10,53.0,27.9
...,...,...,...
70:ee:50:7f:98:de,2021-08-23 22:00:06,64.0,19.0
70:ee:50:7f:98:de,2021-08-25 01:00:04,69.0,17.5
70:ee:50:7f:98:de,2021-08-25 02:00:05,70.0,17.2
70:ee:50:7f:98:de,2021-08-25 03:00:06,71.0,16.8


Let us now transform this data into a series of `TS` objects. Note that a `TS` can have more than one column **as long as they share the index, i.e., time series**:

In [20]:
module1_ts_ser = ts_utils.ts_ser_from_module_ts_df(module1_ts_df)
module1_ts_ser

70:ee:50:00:10:f0    <tstore.ts.TS object at 0x7d4fe54d9e10>
70:ee:50:00:17:8e    <tstore.ts.TS object at 0x7d4fe5520dd0>
70:ee:50:00:20:88    <tstore.ts.TS object at 0x7d4fe56d0ad0>
70:ee:50:00:30:2a    <tstore.ts.TS object at 0x7d4fe56d0c10>
70:ee:50:00:36:d0    <tstore.ts.TS object at 0x7d4fe539cd10>
                                      ...                   
70:ee:50:7a:c4:fc    <tstore.ts.TS object at 0x7d4fe5ad5d50>
70:ee:50:7a:d2:c2    <tstore.ts.TS object at 0x7d4fe596a090>
70:ee:50:7a:dc:ce    <tstore.ts.TS object at 0x7d4fe59b7990>
70:ee:50:7a:e3:4a    <tstore.ts.TS object at 0x7d4fe59b78d0>
70:ee:50:7f:98:de    <tstore.ts.TS object at 0x7d4fe57ed290>
Length: 748, dtype: TS[dask.Series]

The above is a series of `TS` objects, each with two columns ("humidity" and "temperature"):

In [23]:
module1_ts_ser.iloc[0].data

variable,humidity,temperature
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-12 13:00:06,50.0,28.1
2021-08-12 14:00:11,49.0,29.5
2021-08-12 15:00:16,50.0,29.4
2021-08-12 16:00:12,49.0,29.2
2021-08-12 17:00:10,53.0,27.9
...,...,...
2021-08-25 01:00:04,80.0,11.4
2021-08-25 02:00:05,81.0,10.9
2021-08-25 05:00:07,83.0,11.4
2021-08-25 06:00:09,83.0,13.0


On the other hand, for `module2_ts_df` we would only have one column:

In [21]:
module2_ts_ser = ts_utils.ts_ser_from_module_ts_df(module2_ts_df)
module2_ts_ser

70:ee:50:00:20:88    <tstore.ts.TS object at 0x7d4fe53b8190>
70:ee:50:00:36:d0    <tstore.ts.TS object at 0x7d4fe5506910>
70:ee:50:00:3b:86    <tstore.ts.TS object at 0x7d4fe59ad2d0>
70:ee:50:00:53:26    <tstore.ts.TS object at 0x7d4fe5927e90>
70:ee:50:00:58:ac    <tstore.ts.TS object at 0x7d4fe5793cd0>
                                      ...                   
70:ee:50:7a:6f:5c    <tstore.ts.TS object at 0x7d4fe55af6d0>
70:ee:50:7a:91:c8    <tstore.ts.TS object at 0x7d4fe494dc10>
70:ee:50:7a:d2:c2    <tstore.ts.TS object at 0x7d4fe5435c90>
70:ee:50:7a:e3:4a    <tstore.ts.TS object at 0x7d4fe556d1d0>
70:ee:50:7f:98:de    <tstore.ts.TS object at 0x7d4fe595ef10>
Length: 229, dtype: TS[dask.Series]

In [22]:
module2_ts_ser.iloc[0].data

variable,rain_live
time,Unnamed: 1_level_1
2021-08-12 12:00:08,0.0
2021-08-12 13:00:06,0.0
2021-08-12 14:00:11,0.0
2021-08-12 15:00:16,0.0
2021-08-12 16:00:12,0.0
...,...
2021-08-25 05:00:07,0.0
2021-08-25 06:00:09,0.0
2021-08-25 07:00:08,0.0
2021-08-25 08:00:09,0.0
