# Testing for:
## Function for Loading the Data from a REST-API for a specifiable place and time space

In [1]:
# Imports
import requests
from datetime import datetime, timedelta
import pandas as pd

# Basic parameters
base_url='http://sensordata.gwdg.de/api/' # Select Endpoint (P1 or P2)
endpoint_url_P1='measurements/P1'
endpoint_url_P2='measurements/P2'

# Select geo-coordinates (values are just examples ..)
latrange=[51,52]
longrange=[9,10]

# Select time range (values are just examples ..)
# 1st Jan 2019 - EURO 4
# 1st Jul 2020 - EUR0 5
start_date = datetime(2018, 12, 30)
end_date = (start_date + timedelta(hours=1))

# Build the query
mydata = '{"timeStart": "'+start_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'",' + \
         '"timeEnd": "'+end_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'", "area":  \
         {"coordinates":['+str(latrange)+','+str(longrange)+']}}'

# Run the query
print(mydata)
response_P1 = requests.post(base_url + endpoint_url_P1, data=mydata)
response_P2 = requests.post(base_url + endpoint_url_P2, data=mydata)

#response_P2.text

{"timeStart": "2018-12-30T00:00:00Z","timeEnd": "2018-12-30T01:00:00Z", "area":           {"coordinates":[[51, 52],[9, 10]]}}


In [2]:
j_P1 = response_P1.json()
del j_P1[1]
df_P1 = pd.DataFrame(j_P1[1], columns =j_P1[0])
df_P1 = df_P1.rename(columns={"P1": "measurement"})
df_P1['Sensor'] = "P1"
df_P1['PM'] = "PM10"
df_P1 = df_P1.reindex(columns = ["measurement", "time", "lat", "lon", "Sensor", "PM", "sensor_id"])

j_P2 = response_P2.json()
del j_P2[1]
df_P2 = pd.DataFrame(j_P2[1], columns =j_P2[0])

In [3]:
df_P1

Unnamed: 0,measurement,time,lat,lon,Sensor,PM,sensor_id
0,,2018-12-30T00:00:01Z,51.978,9.286,P1,PM10,2903
1,,2018-12-30T00:00:01Z,51.546,9.935,P1,PM10,5701
2,7.07,2018-12-30T00:00:06Z,51.270,9.433,P1,PM10,16662
3,,2018-12-30T00:00:06Z,51.450,9.253,P1,PM10,10905
4,,2018-12-30T00:00:07Z,51.270,9.433,P1,PM10,16663
...,...,...,...,...,...,...,...
1034,2.70,2018-12-30T00:59:33Z,51.551,9.874,P1,PM10,13795
1035,,2018-12-30T00:59:34Z,51.551,9.874,P1,PM10,13796
1036,,2018-12-30T00:59:44Z,51.523,9.910,P1,PM10,8949
1037,1.93,2018-12-30T00:59:52Z,51.273,9.543,P1,PM10,16352


In [4]:
df_P2

Unnamed: 0,time,lat,lon,P2,sensor_id
0,2018-12-30T00:00:01Z,51.978,9.286,,2903
1,2018-12-30T00:00:01Z,51.546,9.935,,5701
2,2018-12-30T00:00:06Z,51.270,9.433,2.1,16662
3,2018-12-30T00:00:06Z,51.450,9.253,,10905
4,2018-12-30T00:00:07Z,51.270,9.433,,16663
...,...,...,...,...,...
1034,2018-12-30T00:59:33Z,51.551,9.874,1.9,13795
1035,2018-12-30T00:59:34Z,51.551,9.874,,13796
1036,2018-12-30T00:59:44Z,51.523,9.910,,8949
1037,2018-12-30T00:59:52Z,51.273,9.543,0.9,16352


In [5]:
df_total = pd.concat([df_P1, df_P2['P2']], axis = 1)
df_total = df_P1.append(df_P2, sort = False)
df_total


Unnamed: 0,measurement,time,lat,lon,Sensor,PM,sensor_id,P2
0,,2018-12-30T00:00:01Z,51.978,9.286,P1,PM10,2903,
1,,2018-12-30T00:00:01Z,51.546,9.935,P1,PM10,5701,
2,7.07,2018-12-30T00:00:06Z,51.270,9.433,P1,PM10,16662,
3,,2018-12-30T00:00:06Z,51.450,9.253,P1,PM10,10905,
4,,2018-12-30T00:00:07Z,51.270,9.433,P1,PM10,16663,
...,...,...,...,...,...,...,...,...
1034,,2018-12-30T00:59:33Z,51.551,9.874,,,13795,1.9
1035,,2018-12-30T00:59:34Z,51.551,9.874,,,13796,
1036,,2018-12-30T00:59:44Z,51.523,9.910,,,8949,
1037,,2018-12-30T00:59:52Z,51.273,9.543,,,16352,0.9


## Giving out duplicated values with their frequency

In [6]:
df_P1.pivot_table(index=['sensor_id'], aggfunc='size')

sensor_id
10904     1
10905    13
1112     24
1113     24
11998    24
11999    24
12000    24
12304     2
12305    20
12334    20
12335    20
12972    22
12973    24
13795    25
13796    25
16177    23
16178    23
16352    25
16353    25
16362    23
16363    23
16364    23
16662    18
16663    18
16664    18
18150    24
18935    12
18936    12
2574     24
2902     24
2903     25
3819     24
4412     24
4413     24
463       6
4957     24
4974     21
5257     56
5258     56
5608     24
5609     24
5700     24
5701     25
7828     11
8949     16
9870     24
9946     24
dtype: int64