# Function for Loading the Data from a REST-API for a pd data frame

In [5]:
# Imports:
import requests
from datetime import datetime, timedelta
import pandas as pd

In [2]:
def load_data(lat_start, lat_end, long_start, long_end, start_year, start_month, start_day, delta_hours):
    '''Function for loading the data out of the REST-API'''
    
    '''INPUT:'''
    
    '''lat_start:                          latitude range starting point, type: byte'''
    '''lat_end:                            latitude range ending point, type: byte'''
    '''long_start:                         longitude range starting point, type: byte'''
    '''long_end:                           longitude range ending point, type: byte'''
    '''start_year, start_month, start_day: year / month / day of the measurement to start, type: byte'''
    '''delta_hours:                        time delta to calculate time space of measurement, type: byte'''
    
    '''OUPUT:'''
    
    '''Merged data frame on P1 and P2 is outputted'''
    
    '''Import Data from REST_API'''
    # Basic parameters
    base_url='http://sensordata.gwdg.de/api/' 
    endpoint_url_P1='measurements/P1'          # P1 endpoint
    endpoint_url_P2='measurements/P2'          # P2 endpoint

    # Select geo-coordinates
    latrange=[lat_start, lat_end]
    longrange=[long_start, long_end]

    # Select time range
    start_date = datetime(start_year, start_month, start_day)
    end_date = (start_date + timedelta(hours = delta_hours))

    # Build the query
    mydata = '{"timeStart": "'+start_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'",' + \
             '"timeEnd": "'+end_date.strftime("%Y-%m-%dT%H:%M:%SZ")+'", "area":  \
             {"coordinates":['+str(latrange)+','+str(longrange)+']}}'

    # Run the query
    response_P1 = requests.post(base_url + endpoint_url_P1, data=mydata)
    response_P2 = requests.post(base_url + endpoint_url_P2, data=mydata)
    
    '''Initialize data frames'''
    j_P1 = response_P1.json()                                                                            # convert REST-API data to json at first
    del j_P1[1]                                                                                          # delete 'sensor' string, that causes errors
    df_P1 = pd.DataFrame(j_P1[1], columns =j_P1[0])                                                      # put all in pandas data frame
    df_P1 = df_P1.rename(columns={"P1": "measurement_PM10"})                                             # Change column name for better overview
    l_P1 = list(range(len(df_P1["sensor_id"])))
    for i in range(len(l_P1)):                                                                           # Adding unique measurement_id to merge P1 and P2
        l_P1[i] = str(df_P1["sensor_id"][i]) + "_" + str(df_P1["time"][i])
    df_P1["measurement_id"] = l_P1
    df_P1 = df_P1.reindex(columns = ["measurement_PM10", "time", "lat", "lon", "sensor_id", "measurement_id"])    # rearranging column names for better overview 

    
    j_P2 = response_P2.json()
    del j_P2[1]
    df_P2 = pd.DataFrame(j_P2[1], columns =j_P2[0])
    df_P2 = df_P2.rename(columns={"P2": "measurement_PM2.5"})
    l_P2 = list(range(len(df_P2["sensor_id"])))
    for j in range(len(l_P2)):                                                                           # Adding unique measurement_id to merge P1 and P2
        l_P2[j] = str(df_P2["sensor_id"][j]) + "_" + str(df_P2["time"][j])
    df_P2["measurement_id"] = l_P2
    df_P2 = df_P2.reindex(columns = ["measurement_PM2.5", "measurement_id"])
    
    '''Initialize output'''
    df_total = pd.merge(df_P1, df_P2, on = "measurement_id")                                         # merge data frame on unique measurement_id
    df_total = df_total.reindex(columns = ["measurement_PM10", "measurement_PM2.5", "time", "lat", "lon", "sensor_id", "measurement_id"])
    return df_total                                                                                  # return combined data frame

In [3]:
# Example
df = load_data(lat_start = 48, lat_end = 49, long_start = 9, long_end = 10, start_year = 2018, start_month = 4, start_day = 30, delta_hours = 1) # Stuttgart now
df

Unnamed: 0,measurement_PM10,measurement_PM2.5,time,lat,lon,sensor_id,measurement_id
0,23.10,5.93,2018-04-30T00:00:01Z,48.787,9.011,179,179_2018-04-30T00:00:01Z
1,,,2018-04-30T00:00:01Z,48.600,9.641,3283,3283_2018-04-30T00:00:01Z
2,,,2018-04-30T00:00:01Z,48.509,9.052,516,516_2018-04-30T00:00:01Z
3,8.43,6.60,2018-04-30T00:00:01Z,48.630,9.162,495,495_2018-04-30T00:00:01Z
4,,,2018-04-30T00:00:01Z,48.630,9.162,498,498_2018-04-30T00:00:01Z
...,...,...,...,...,...,...,...
28086,,,2018-04-30T00:59:59Z,48.790,9.846,6172,6172_2018-04-30T00:59:59Z
28087,17.57,2.80,2018-04-30T00:59:59Z,48.487,9.231,3567,3567_2018-04-30T00:59:59Z
28088,,,2018-04-30T00:59:59Z,48.747,9.175,9915,9915_2018-04-30T00:59:59Z
28089,1.20,0.90,2018-04-30T00:59:59Z,48.810,9.173,8938,8938_2018-04-30T00:59:59Z


In [20]:
df.isnull().values.any() == True

True