In [1]:
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
import pytz
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import elasticsearch
import os
pd.options.plotting.backend = "plotly"

In [2]:
avg_over_min = 1
days_look_back = 2

In [3]:
en_date = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
st_date = (datetime.now() - timedelta(days=days_look_back)).strftime('%Y-%m-%dT%H:%M:%S')

en_date_dt = datetime.now().astimezone(pytz.timezone('America/New_York'))
st_date_dt = (datetime.now() - timedelta(days=days_look_back)).astimezone(pytz.timezone('America/New_York'))

st_date_utc = datetime.strptime(st_date, '%Y-%m-%dT%H:%M:%S').astimezone(pytz.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
en_date_utc = datetime.strptime(en_date, '%Y-%m-%dT%H:%M:%S').astimezone(pytz.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')

In [4]:
print(st_date_utc)
print(en_date_utc)

2022-02-26T15:20:35Z
2022-02-28T15:20:35Z


In [5]:
# Import and format Praxis data
uri = 'https://aws.southcoastscience.com/topicMessages?topic=nyu/brooklyn/loc/3/particulates&' \
'startTime=%s&endTime=%s&checkpoint=**:/%i:00' \
% (st_date_utc, en_date_utc, avg_over_min)
print(uri)
praxis_df = pd.DataFrame([])

while uri != '':
    header = {"authorization": "api-key nyu-brooklyn"}
    response = requests.get(uri, headers=header)
    json = response.json()

    data = {}

    data['ts'] = pd.to_datetime([ele['rec'] for ele in json['Items']]).tz_convert(tz='US/Eastern')

    data['praxis_pm1_vals'] = [ele['val']['pm1'] for ele in json['Items']]
    data['praxis_pm2p5_vals'] = [ele['val']['pm2p5'] for ele in json['Items']]
    data['praxis_pm10_vals'] = [ele['val']['pm10'] for ele in json['Items']]

    data['praxis_pm1_vals_adj'] = [ele['exg']['rn20']['pm1'] for ele in json['Items']]
    data['praxis_pm2p5_vals_adj'] = [ele['exg']['rn20']['pm2p5'] for ele in json['Items']]
    data['praxis_pm10_vals_adj'] = [ele['exg']['rn20']['pm10'] for ele in json['Items']]

#     praxis_df = pd.DataFrame(data).set_index('ts').resample(avg_over).mean()

    if 'next' in json:
        uri = json['next']
    else:
        uri = ''
    praxis_df = pd.concat([praxis_df, pd.DataFrame(data)])
    
    time.sleep(0.5)
praxis_df = praxis_df.set_index('ts').resample('%iT' % avg_over_min).mean()
praxis_df = praxis_df.loc[st_date_dt:en_date_dt]

https://aws.southcoastscience.com/topicMessages?topic=nyu/brooklyn/loc/3/particulates&startTime=2022-02-26T15:20:35Z&endTime=2022-02-28T15:20:35Z&checkpoint=**:/1:00


In [6]:
praxis_df

Unnamed: 0_level_0,praxis_pm1_vals,praxis_pm2p5_vals,praxis_pm10_vals,praxis_pm1_vals_adj,praxis_pm2p5_vals_adj,praxis_pm10_vals_adj
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-02-26 10:21:00-05:00,0.8,1.3,6.7,1.2,2.4,8.8
2022-02-26 10:22:00-05:00,0.9,1.3,3.9,1.6,2.2,5.3
2022-02-26 10:23:00-05:00,0.7,1.0,4.1,1.5,2.3,6.2
2022-02-26 10:24:00-05:00,0.8,1.4,4.4,1.5,2.4,9.5
2022-02-26 10:25:00-05:00,0.8,1.4,5.5,1.3,2.3,11.5
...,...,...,...,...,...,...
2022-02-27 06:52:00-05:00,2.2,2.9,5.9,5.9,6.1,14.9
2022-02-27 06:53:00-05:00,2.2,3.1,4.1,6.2,6.1,12.1
2022-02-27 06:54:00-05:00,2.3,3.1,4.0,6.8,6.5,14.2
2022-02-27 06:55:00-05:00,2.2,3.0,7.0,7.3,6.9,16.7


In [7]:
# Import and format Piera Canary data
# 168
canary_df = pd.DataFrame([])

for x in range(1000):
    uri = 'https://sensei.pierasystems.com/api/get-minute-averages/318?page=%i' % x
    response = requests.get(uri)
    json = response.json()
    data = {}
    data['ts'] = pd.to_datetime([ele['time'] for ele in json['data']]).tz_convert(tz='US/Eastern')
    
    data['canary_pm1_vals'] = [ele['pm10'] for ele in json['data']]
    data['canary_pm2p5_vals'] = [ele['pm25'] for ele in json['data']]
    data['canary_pm10_vals'] = [ele['pm100'] for ele in json['data']]
    
    canary_df = pd.concat([canary_df, pd.DataFrame(data)])

    if canary_df['ts'].iloc[-1] < pd.DatetimeIndex([st_date_dt]):
        break
    time.sleep(0.5)
    
canary_df = canary_df.set_index('ts').resample('%iT' % avg_over_min).mean()
canary_df = canary_df.loc[st_date_dt:en_date_dt]

In [8]:
canary_df

Unnamed: 0_level_0,canary_pm1_vals,canary_pm2p5_vals,canary_pm10_vals
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-26 10:21:00-05:00,0.535327,0.770407,0.770407
2022-02-26 10:22:00-05:00,0.314716,0.353896,0.353896
2022-02-26 10:23:00-05:00,0.247698,0.273818,0.273818
2022-02-26 10:24:00-05:00,0.329677,0.460277,0.460277
2022-02-26 10:25:00-05:00,0.293235,0.371595,0.371595
...,...,...,...
2022-02-27 07:10:00-05:00,3.721130,5.758490,5.862890
2022-02-27 07:11:00-05:00,2.974720,4.972900,5.494900
2022-02-27 07:12:00-05:00,2.676880,4.701180,4.805580
2022-02-27 07:13:00-05:00,2.404520,3.240360,3.240360


In [9]:
uri = 'https://api.thingspeak.com/channels/1530870/feeds.json?api_key=U4IJYTKQ1ON601M3&average=%i&days=%i' \
% (avg_over_min, days_look_back)
print(uri)
response = requests.get(uri)
json = response.json()
data = {}
data['ts'] = pd.to_datetime([ele['created_at'] for ele in json['feeds']]).tz_convert(tz='US/Eastern')
    
data['purple_pm1_vals'] = [ele['field1'] for ele in json['feeds']]
data['purple_pm2p5_vals'] = [ele['field2'] for ele in json['feeds']]
data['purple_pm10_vals'] = [ele['field3'] for ele in json['feeds']]

purple_df = pd.DataFrame(data)
    
purple_df = purple_df.set_index('ts').resample('%iT' % avg_over_min).mean().interpolate(method='pad', limit=2)
# purple_df = purple_df.set_index('ts').resample('%iT' % avg_over_min).mean()
purple_df = purple_df.loc[st_date_dt:en_date_dt]

https://api.thingspeak.com/channels/1530870/feeds.json?api_key=U4IJYTKQ1ON601M3&average=1&days=2


In [10]:
purple_df

Unnamed: 0_level_0,purple_pm1_vals,purple_pm2p5_vals,purple_pm10_vals
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-26 10:21:00-05:00,0.00,0.00,0.20
2022-02-26 10:22:00-05:00,0.00,0.00,0.20
2022-02-26 10:23:00-05:00,0.00,0.00,0.11
2022-02-26 10:24:00-05:00,0.00,0.00,0.11
2022-02-26 10:25:00-05:00,0.00,0.00,0.15
...,...,...,...
2022-02-27 06:58:00-05:00,6.75,13.55,15.77
2022-02-27 06:59:00-05:00,6.75,13.55,15.77
2022-02-27 07:00:00-05:00,6.17,12.96,15.46
2022-02-27 07:01:00-05:00,6.17,12.96,15.46


In [11]:
comb_df = canary_df.join(praxis_df)
comb_df = comb_df.join(purple_df)

In [12]:
GROUP = time.time()

def scroll(es, index, body, scroll='2m', size=1000, timeout=25, **kw):
    if isinstance(timeout, int):
        timeout = '{}s'.format(int(timeout))
    page = es.search(index=index, body=body, scroll=scroll, size=size, timeout=timeout, **kw)
    scroll_id, hits = page['_scroll_id'], page['hits']['hits']
    while len(hits):
        yield hits
        page = es.scroll(scroll_id=scroll_id, scroll=scroll)
        scroll_id, hits = page['_scroll_id'], page['hits']['hits']
        
def sensor_query(key=None, nodeid=None, start=None, end=None, k_time="time", group=GROUP):
    match = []
    if key and nodeid:
        match.append({"term": {f'{key}.keyword': nodeid}})
    end = end or 'now'
    if start:
        match.append({"range" : {k_time : {"gte" : start, "lte" : end}}})
    elif end:
        match.append({"range" : {k_time : {"lte" : end}}})
    return { "query": { "bool": {"must": match} } } if match else {}

def download_sensor_data(table, key=None, nodeid=None, start=None, end=None, save=True, k_time='time', **kw):
    query = sensor_query(key, nodeid, start, end, k_time=k_time, **kw)
    print(query)
    
    def pull():
        with tqdm(scroll(es, table, query)) as pbar:
            for i, hits in enumerate(pbar):
                hits = [h['_source'] for h in hits]
                times = [h[k_time] for h in hits]
                pbar.write('{}. n hits: {}. {} - {}'.format(i, len(hits), min(times), max(times)))
                for h in hits:
                    yield h
    if not save:
        return list(pull())

    fname = 'data/{}/{}.json'.format(group, nodeid or table)
    os.makedirs(os.path.dirname(fname), exist_ok=True)
    print(f'Pulling node={nodeid} for ({start} -> {end}) ... saving to {fname}')
    with open(fname, 'w') as f:
        for h in pull():
            f.write(json.dumps(h) + '\n')
    print('all done!')
    return fname

In [13]:
from importlib import reload
import settings
reload(settings)
es = elasticsearch.Elasticsearch('https://es.master1.sonycproject.com', http_auth=('elastic', settings.es_password))

In [14]:
ss = download_sensor_data('status', 'fqdn', 'sonycnode-dca632ceb48d', start='now-%id' % days_look_back, save=False)
data = {}
data['ts'] = pd.to_datetime([datetime.fromtimestamp(int(ele['aq']['dt'])) for ele in ss]).tz_localize(tz='US/Eastern')
    
data['piera7100_pm1_vals'] = [ele['aq']['PM1.0'] for ele in ss]
data['piera7100_pm2p5_vals'] = [ele['aq']['PM2.5'] for ele in ss]
data['piera7100_pm10_vals'] = [ele['aq']['PM10'] for ele in ss]

piera7100_df = pd.DataFrame(data)
    
piera7100_df = piera7100_df.set_index('ts').resample('%iT' % avg_over_min).mean()
# purple_df = purple_df.set_index('ts').resample('%iT' % avg_over_min).mean()
piera7100_df = piera7100_df.loc[st_date_dt:en_date_dt]

{'query': {'bool': {'must': [{'term': {'fqdn.keyword': 'sonycnode-dca632ceb48d'}}, {'range': {'time': {'gte': 'now-2d', 'lte': 'now'}}}]}}}


0it [00:00, ?it/s]

0. n hits: 1000. 2022-02-26T15:21:09.966609 - 2022-02-26T18:29:52.883757
1. n hits: 1000. 2022-02-26T18:35:27.953718 - 2022-02-26T21:01:35.201005
2. n hits: 1000. 2022-02-26T21:01:40.243106 - 2022-02-26T23:09:07.191761
3. n hits: 1000. 2022-02-26T23:09:12.255869 - 2022-02-27T00:43:38.768001
4. n hits: 1000. 2022-02-26T16:15:45.744727 - 2022-02-27T01:42:29.731764
5. n hits: 1000. 2022-02-26T16:50:46.269118 - 2022-02-26T18:57:48.344572
6. n hits: 1000. 2022-02-26T18:57:53.241727 - 2022-02-26T22:40:26.782886
7. n hits: 1000. 2022-02-26T22:40:31.825548 - 2022-02-27T03:12:51.137873
8. n hits: 1000. 2022-02-27T03:12:56.211454 - 2022-02-27T05:35:58.348721
9. n hits: 1000. 2022-02-27T05:36:03.274874 - 2022-02-27T08:38:21.066874
10. n hits: 1000. 2022-02-27T03:05:31.039102 - 2022-02-27T10:44:23.062558
11. n hits: 1000. 2022-02-27T04:15:17.071537 - 2022-02-27T07:02:54.533773
12. n hits: 1000. 2022-02-27T07:02:59.611757 - 2022-02-27T09:46:42.115292
13. n hits: 941. 2022-02-27T02:21:10.258077 - 20

In [15]:
piera7100_df

Unnamed: 0_level_0,piera7100_pm1_vals,piera7100_pm2p5_vals,piera7100_pm10_vals
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-26 10:21:00-05:00,0.616786,0.694051,0.756700
2022-02-26 10:22:00-05:00,0.526186,0.570996,0.587874
2022-02-26 10:23:00-05:00,0.616794,0.687736,0.718134
2022-02-26 10:24:00-05:00,0.644936,0.689896,0.709236
2022-02-26 10:25:00-05:00,0.625922,0.718465,0.871742
...,...,...,...
2022-02-27 05:40:00-05:00,2.471711,2.896749,3.172913
2022-02-27 05:41:00-05:00,2.205641,2.620760,2.976486
2022-02-27 05:42:00-05:00,2.182533,2.525513,2.896925
2022-02-27 05:43:00-05:00,4.027320,4.750913,5.294689


In [16]:
comb_df = comb_df.join(piera7100_df)

In [17]:
comb_df

Unnamed: 0_level_0,canary_pm1_vals,canary_pm2p5_vals,canary_pm10_vals,praxis_pm1_vals,praxis_pm2p5_vals,praxis_pm10_vals,praxis_pm1_vals_adj,praxis_pm2p5_vals_adj,praxis_pm10_vals_adj,purple_pm1_vals,purple_pm2p5_vals,purple_pm10_vals,piera7100_pm1_vals,piera7100_pm2p5_vals,piera7100_pm10_vals
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-02-26 10:21:00-05:00,0.535327,0.770407,0.770407,0.8,1.3,6.7,1.2,2.4,8.8,0.0,0.0,0.20,0.616786,0.694051,0.756700
2022-02-26 10:22:00-05:00,0.314716,0.353896,0.353896,0.9,1.3,3.9,1.6,2.2,5.3,0.0,0.0,0.20,0.526186,0.570996,0.587874
2022-02-26 10:23:00-05:00,0.247698,0.273818,0.273818,0.7,1.0,4.1,1.5,2.3,6.2,0.0,0.0,0.11,0.616794,0.687736,0.718134
2022-02-26 10:24:00-05:00,0.329677,0.460277,0.460277,0.8,1.4,4.4,1.5,2.4,9.5,0.0,0.0,0.11,0.644936,0.689896,0.709236
2022-02-26 10:25:00-05:00,0.293235,0.371595,0.371595,0.8,1.4,5.5,1.3,2.3,11.5,0.0,0.0,0.15,0.625922,0.718465,0.871742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-27 07:10:00-05:00,3.721130,5.758490,5.862890,,,,,,,,,,,,
2022-02-27 07:11:00-05:00,2.974720,4.972900,5.494900,,,,,,,,,,,,
2022-02-27 07:12:00-05:00,2.676880,4.701180,4.805580,,,,,,,,,,,,
2022-02-27 07:13:00-05:00,2.404520,3.240360,3.240360,,,,,,,,,,,,


In [19]:
fig = comb_df[['canary_pm2p5_vals', 'praxis_pm2p5_vals_adj', 'praxis_pm2p5_vals', 'purple_pm2p5_vals', 'piera7100_pm2p5_vals']].plot();
fig.show()