# Access InfluxDB from Python
Inspired by https://www.influxdata.com/blog/getting-started-python-influxdb/

In [12]:
from influxdb import InfluxDBClient
from ruamel.yaml import YAML
import pandas as pd
import datetime as dt

## Make the client and connect to a database

In [5]:
with open("secrets.yaml", "r") as file:
    secrets = YAML().load(file)

client = InfluxDBClient(host=secrets["influx"]["host"], 
                        port=secrets["influx"]["port"], 
                        username=secrets["influx"]["username"], 
                        password=secrets["influx"]["password"])

client.get_list_database()

[{'name': '_internal'},
 {'name': 'influx_db_1'},
 {'name': 'influx_db_2'},
 {'name': 'influx_db_3'},
 {'name': 'influx_db_4'}]

In [6]:
client.switch_database('influx_db_4')

## show series

In [7]:
points = [p["key"] for p in client.query("show series").get_points()]
for i, point in enumerate(points):
    print(i, point)

0 %,domain=sensor,entity_id=benni_battery
1 %,domain=sensor,entity_id=benni_moisture
2 %,domain=sensor,entity_id=cooper_se_charging_target
3 %,domain=sensor,entity_id=cooper_se_remaining_battery_percent
4 %,domain=sensor,entity_id=disk_use_percent
5 %,domain=sensor,entity_id=disk_use_percent_config
6 %,domain=sensor,entity_id=ewelink_ds01_battery
7 %,domain=sensor,entity_id=hp_laserjet_pro_m118_m119_black_cartridge_hp_cf294a
8 %,domain=sensor,entity_id=hp_laserjet_pro_m118_m119_imaging_drum_hp_cf232a
9 %,domain=sensor,entity_id=memory_use_percent
10 %,domain=sensor,entity_id=moto_g54_5g_battery_level
11 %,domain=sensor,entity_id=my_sensor
12 %,domain=sensor,entity_id=mz_shutter_ff_bath_illuminance
13 %,domain=sensor,entity_id=mz_shutter_ff_bath_illuminance_9
14 %,domain=sensor,entity_id=mz_shutter_ff_lara_l_illuminance
15 %,domain=sensor,entity_id=mz_shutter_ff_lara_m_illuminance
16 %,domain=sensor,entity_id=mz_shutter_ff_lara_r_illuminance
17 %,domain=sensor,entity_id=mz_shutter_ff_sl

## Query simple 

In [8]:
qstr = f'SELECT * FROM "m³/h" WHERE TIME >= now() -1d'
print(f"{qstr=}")
df = pd.DataFrame.from_records(client.query(qstr).get_points())
df.tail(3)

qstr='SELECT * FROM "m³/h" WHERE TIME >= now() -1d'


Unnamed: 0,time,domain,entity_id,mean_value
18,2024-04-14T07:00:00Z,sensor,watermeter_rate_per_time_unit,0.0153
19,2024-04-14T08:00:00Z,sensor,watermeter_rate_per_time_unit,0.09
20,2024-04-14T09:00:00Z,sensor,watermeter_rate_per_time_unit,0.0102


In [9]:
df.time = pd.to_datetime(df.time)
df = df.set_index("time")
df.head()

Unnamed: 0_level_0,domain,entity_id,mean_value
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-04-13 11:00:00+00:00,sensor,watermeter_rate_per_time_unit,0.03825
2024-04-13 12:00:00+00:00,sensor,watermeter_rate_per_time_unit,0.1071
2024-04-13 13:00:00+00:00,sensor,watermeter_rate_per_time_unit,0.0018
2024-04-13 14:00:00+00:00,sensor,watermeter_rate_per_time_unit,0.0458
2024-04-13 15:00:00+00:00,sensor,watermeter_rate_per_time_unit,0.00345


In [10]:
df.entity_id.value_counts()

entity_id
watermeter_rate_per_time_unit    19
gasmeter2_rate_per_time_unit      2
Name: count, dtype: int64

## Query with datetime filter

In [17]:
unit = "m³"
entity_id = "watermeter_value"
selection = "mean_value"
qstr = f'SELECT {selection} FROM "{unit}" WHERE {entity_id=}'
qstr

'SELECT mean_value FROM "m³" WHERE entity_id=\'watermeter_value\''

Simple query string w/o datetime filter provides all data:

In [18]:
pd.DataFrame.from_records(client.query(qstr).get_points())

Unnamed: 0,time,mean_value
0,2023-10-29T14:00:00Z,579.340333
1,2023-10-29T15:00:00Z,579.342650
2,2023-10-29T16:00:00Z,579.360750
3,2023-10-29T17:00:00Z,579.378600
4,2023-10-29T18:00:00Z,579.287033
...,...,...
2164,2024-04-14T05:00:00Z,609.290233
2165,2024-04-14T06:00:00Z,609.307880
2166,2024-04-14T07:00:00Z,609.321267
2167,2024-04-14T08:00:00Z,609.390420


## Adding a datetime filer
InfluxDB is very intolerant about the quotes (`"`, `'`)

In [23]:
start_date = dt.datetime(2024,4,1).date()
start_time = dt.time(12, 0, 0)
rfc3339_format = '%Y-%m-%dT%H:%M:%S.00000000Z'
start_string = dt.datetime.combine(start_date, start_time).strftime(rfc3339_format)
start_string

'2024-04-01T12:00:00.00000000Z'

In [24]:
qstr = f"""SELECT {selection} FROM "{unit}" WHERE {entity_id=} AND time >= '{start_string}'"""
qstr

'SELECT mean_value FROM "m³" WHERE entity_id=\'watermeter_value\' AND time >= \'2024-04-01T12:00:00.00000000Z\''

In [25]:
pd.DataFrame.from_records(client.query(qstr).get_points()).set_index("time")

Unnamed: 0_level_0,mean_value
time,Unnamed: 1_level_1
2024-04-01T12:00:00Z,607.470350
2024-04-01T14:00:00Z,607.470350
2024-04-01T15:00:00Z,607.470350
2024-04-01T16:00:00Z,607.479475
2024-04-01T18:00:00Z,607.492200
...,...
2024-04-14T05:00:00Z,609.290233
2024-04-14T06:00:00Z,609.307880
2024-04-14T07:00:00Z,609.321267
2024-04-14T08:00:00Z,609.390420
