# Function for Loading the Data from a REST-API for a pd data frame

In [1]:
# Imports:
import requests
from datetime import datetime, timedelta
import pandas as pd

In [18]:
def load_data(lat_start, lat_end, long_start, long_end, start_year, start_month, start_day, delta_hours):
    '''Function for loading the data out of the REST-API'''
    
    '''INPUT:'''
    
    '''lat_start:                          latitude range starting point, type: byte'''
    '''lat_end:                            latitude range ending point, type: byte'''
    '''long_start:                         longitude range starting point, type: byte'''
    '''long_end:                           longitude range ending point, type: byte'''
    '''start_year, start_month, start_day: year / month / day of the measurement to start, type: byte'''
    '''delta_hours:                        time delta to calculate time space of measurement, type: byte'''
    
    '''OUPUT:'''
    
    '''Merged data frame on P1 and P2 is outputted'''
    
    '''Import Data from REST_API'''
    # Basic parameters
    base_url='http://sensordata.gwdg.de/api/' 
    endpoint_url_P1='measurements/P1'          # P1 endpoint
    endpoint_url_P2='measurements/P2'          # P2 endpoint

    # Select geo-coordinates
    latrange=[lat_start, lat_end]
    longrange=[long_start, long_end]

    # Select time range
    start_date = datetime(start_year, start_month, start_day)
    end_date = (start_date + timedelta(hours = delta_hours))

    # Build the query
    mydata = '{"timeStart": "'+start_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'",' + \
             '"timeEnd": "'+end_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'", "area":  \
             {"coordinates":['+str(latrange)+','+str(longrange)+']}}'

    # Run the query
    response_P1 = requests.post(base_url + endpoint_url_P1, data=mydata)
    response_P2 = requests.post(base_url + endpoint_url_P2, data=mydata)
    
    '''Initialize data frames'''
    j_P1 = response_P1.json()                                                                            # convert REST-API data to json at first
    del j_P1[1]                                                                                          # delete 'sensor' string, that causes errors
    df_P1 = pd.DataFrame(j_P1[1], columns =j_P1[0])                                                      # put all in pandas data frame
    df_P1 = df_P1.rename(columns={"P1": "measurement_PM10"})                                             # Change column name for better overview
    l_P1 = list(range(len(df_P1["sensor_id"])))
    for i in range(len(l_P1)):                                                                           # Adding unique measurement_id to merge P1 and P2
        l_P1[i] = str(df_P1["sensor_id"][i]) + "_" + str(df_P1["time"][i])
    df_P1["measurement_id"] = l_P1
    df_P1 = df_P1.reindex(columns = ["measurement_PM10", "time", "lat", "lon", "sensor_id", "measurement_id"])    # rearranging column names for better overview 

    
    j_P2 = response_P2.json()
    del j_P2[1]
    df_P2 = pd.DataFrame(j_P2[1], columns =j_P2[0])
    df_P2 = df_P2.rename(columns={"P2": "measurement_PM2.5"})
    l_P2 = list(range(len(df_P2["sensor_id"])))
    for j in range(len(l_P2)):                                                                           # Adding unique measurement_id to merge P1 and P2
        l_P2[j] = str(df_P2["sensor_id"][j]) + "_" + str(df_P2["time"][j])
    df_P2["measurement_id"] = l_P2
    df_P2 = df_P2.reindex(columns = ["measurement_PM2.5", "measurement_id"])
    
    '''Initialize output'''
    df_total = pd.merge(df_P1, df_P2, on = "measurement_id")                                         # merge data frame on unique measurement_id
    df_total = df_total.reindex(columns = ["measurement_PM10", "measurement_PM2.5", "time", "lat", "lon", "sensor_id", "measurement_id"])
    return df_total                                                                                  # return combined data frame

In [19]:
# Example
df = load_data(lat_start = 51, lat_end = 52, long_start = 9, long_end = 10, start_year = 2018, start_month = 4, start_day = 30, delta_hours = 1)
df

Unnamed: 0,measurement_PM10,measurement_PM2.5,time,lat,lon,sensor_id,measurement_id
0,18.43,11.90,2018-04-30T00:00:03Z,51.732,9.032,9946,9946_2018-04-30T00:00:03Z
1,9.17,8.27,2018-04-30T00:00:11Z,51.340,9.429,4784,4784_2018-04-30T00:00:11Z
2,,,2018-04-30T00:00:12Z,51.340,9.429,4785,4785_2018-04-30T00:00:12Z
3,12.87,8.23,2018-04-30T00:00:14Z,51.520,9.954,11998,11998_2018-04-30T00:00:14Z
4,,,2018-04-30T00:00:14Z,51.520,9.954,11999,11999_2018-04-30T00:00:14Z
...,...,...,...,...,...,...,...
1082,11.50,9.00,2018-04-30T00:59:50Z,51.290,9.641,4973,4973_2018-04-30T00:59:50Z
1083,,,2018-04-30T00:59:51Z,51.290,9.641,4974,4974_2018-04-30T00:59:51Z
1084,12.47,11.17,2018-04-30T00:59:54Z,51.854,9.672,9870,9870_2018-04-30T00:59:54Z
1085,13.43,12.00,2018-04-30T00:59:57Z,51.561,9.987,12334,12334_2018-04-30T00:59:57Z
