# Identifying relevant sensor boxes

In [1]:
import pandas as pd
import numpy as np

import pickle
import requests
import json

import geopy.distance
import folium

## find boxes that submitted data at newyears

In [4]:
# define newyears times for 17-18,18-19,19-20
test_times = {"17_18": "2017-12-30T00:00:01Z,2018-01-02T23:00:00Z",
              "18_19": "2018-12-30T00:00:01Z,2019-01-02T23:00:00Z",
              "19_20": "2019-12-30T00:00:01Z,2020-01-02T23:00:00Z"
             }

### get the data

#### get data from API request

In [5]:
# url components for api request
base_url = "https://api.opensensemap.org/boxes?"
phenom_url = "&phenomenon=PM10"

In [6]:
# make api requestes for those years
for time in test_times:
        print(time)
        date_url = f"date={test_times[time]}"
        re = requests.get(base_url+date_url+phenom_url)
        data = re.json()
        filename = f"data/{time}.json"
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4)

18_19
19_20


#### get data from saved files

In [7]:
# load dfs from files
time_ids = dict()
time_dfs = dict()
for time in test_times:
    with open(f"data/{time}.json") as f: 
        time_dfs[time] = pd.DataFrame(json.load(f))
        time_ids[time] = list(time_dfs[time]["_id"].values)

### inspect the data

#### delete columns not needed

In [9]:
years = ["17_18","18_19","19_20"]
for year in years:
    time_dfs[year].drop(columns = ["name",
                                   "createdAt",
                                   "updatedAt",
                                   "lastMeasurementAt",
                                   "model",
                                   "loc",
                                   "description",
                                   "grouptag",
                                   "weblink",
                                   "image"],
                       inplace=True)

#### select box ids for which data exist for all the years

In [10]:
years = ["18_19","19_20"]

# compare ids
id_set = set()
for year in years:
    if len(id_set):
        id_set = id_set.intersection(set(time_ids[year]))
    else:
        id_set = set(time_ids[year])

In [11]:
# box ids for which there were data for all the years 
box_ids = list(id_set)

Inlcuding 2017-2018 new years, we have significantly less sensors for the comparison. Therefore, we only look at the two years 

In [12]:
# keep only those for which we have info on all the years
boxes = time_dfs["19_20"].copy()
boxes = boxes.loc[boxes["_id"].isin(box_ids),:].reset_index(drop=True)

In [13]:
# get proper location

boxes["coordinates"] = pd.DataFrame(list(boxes["currentLocation"]),index = boxes.index)[["coordinates"]]
boxes = boxes.drop(columns = "currentLocation")

In [14]:
# only keep those boxes with outdoor exposure
boxes = (boxes
         .loc[boxes.exposure.eq("outdoor"),]
         .drop(columns = "exposure"))

In [15]:
# extract sensor ids
relevant_sensors = ['Luftdruck','rel. Luftfeuchte','Temperatur','PM10','PM2.5']
for sensor in relevant_sensors:
    boxes[sensor] = "-"

for ind in boxes.index:
    sensors = boxes.sensors[ind]
    for sensor in sensors:
        if sensor["title"] in relevant_sensors:
            boxes.loc[ind,sensor["title"]] = sensor["_id"]


boxes = boxes.rename(columns = {"Luftdruck": "air_pressure",
                                "rel. Luftfeuchte": "rel_humidity",
                                "Temperatur": "temperature"})

boxes.drop(columns = "sensors",inplace=True)

#### select boxes in Germany

In [16]:
import reverse_geocode

In [17]:
# use reverse geocode to add country
boxes = boxes.reset_index(drop=True)
boxes = pd.concat([boxes,
                   pd.DataFrame(list(boxes.coordinates.apply(lambda x: reverse_geocode.get([x[1],x[0]]))))
                  ],
                  axis=1)

##### Extra: find boxes in Sweden

In [31]:
boxes_sweden = {"Göteborg":boxes.loc[boxes.city.eq("Göteborg")],
                "Malmö" : boxes.loc[boxes.city.eq("Malmö")]}

In [32]:
with open('data/boxes_sweden.pickle', 'wb') as handle:
    pickle.dump(boxes_sweden, handle, protocol=pickle.HIGHEST_PROTOCOL)

##### only keep boxes for Germany

In [27]:
boxes = (boxes
         .loc[boxes.country_code.eq("DE")]
         .reset_index(drop=True))

In [85]:
boxes = boxes.drop(columns = ["country_code","city","country"])

#### plot boxes in Germany

In [201]:
m = folium.Map(location=[51, 7], zoom_start=6)


for box_index in boxes.index:
    
    box = boxes.loc[box_index,]
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color="red").add_to(m)
    
m

#### assign location with HERE APi

In [91]:
def get_adress_from_hereapi(coordinates):
    base = ("https://reverse.geocoder.ls.hereapi.com/6.2/reversegeocode.json"
            + "?apiKey=tIsul856Zr3bfyXGuJDZLjeiz-gDKhFdF4aNf7lcRY8"
            + "&mode=retrieveAddresses")
    requestjson = requests.get(base_url+f"&prox={coordinates[1]},{coordinates[0]}").json()
    address = requestjson["Response"]["View"][0]["Result"][0]["Location"]["Address"]
    
    return_dict = {'City': "",
                    'District': "",
                    'Street': "",
                    'HouseNumber' : "",
                    'PostalCode': ""}
    
    for categ in return_dict:
        try:
            return_dict[categ] = address[categ]
        except:
            pass
    
    
    return return_dict
    

In [92]:
# make api calls for all the boxes
boxes["address"] = boxes["coordinates"].apply(get_adress_from_hereapi)

In [102]:
boxes = pd.concat([boxes,
                   pd.DataFrame(list(boxes["address"].values),columns = ["City","District","Street","HouseNumber","PostalCode"])],
                  axis=1)


In [104]:
boxes = boxes.drop(columns = "address")

## identify ids for different cities

In [131]:
cities = ["Berlin",
          "Hamburg",
          "Düsseldorf",
          "München",
          "Dresden",
          "Bremen",
          "Stuttgart",
          "Nürnberg",
          "Frankfurt am Main",
          "Leipzig",
          "Köln"]

for city in cities:
    print(f"{city}: {boxes[boxes.City.str.contains(city)].shape[0]}")



Berlin: 51
Hamburg: 22
Düsseldorf: 6
München: 22
Dresden: 14
Bremen: 1
Stuttgart: 16
Nürnberg: 10
Frankfurt am Main: 12
Leipzig: 7
Köln: 19


#### Hamburg

In [3]:
# manually
rel_sensors = {"Hamburg":{"zone":["5b816cce7c51910019888ed6"],
                          "regular":["5bc30498bc400c001baa5f2d",
                                     "595b27e794f0520011603111",
                                     "5c273034919bf8001af1fb22",
                                     "5bafbe1a043f3f001b3fc777",
                                     "5b816cce7c51910019888ed6",
                                     "5ac158fb850005001bae6beb",
                                     "5b54905185381900195a1968",
                                     "5a89f594bc2d410019c45c95",
                                     "5be9764e56a1550019e35870",
                                     "5b505ce8c5bbe30019b9ea15",
                                     "59a30ebdd67eb50011fcf42d",
                                     "5bcacf30bb15b70019cd00ef",
                                     "5ae4b1e6223bd80019a51b2a",
                                     "58dae1d3f7b1c100119b008c",
                                     "595ea2d494f052001192c244"]
                         }
              }

#### Berlin

In [4]:
# boxes for a cit
b_city = boxes[boxes.City.str.contains("Berlin")].copy()


In [5]:
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("592ca4b851d3460011ea2635"),"coordinates"].values[0]

In [6]:
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)

In [7]:
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [9]:
m = folium.Map(location=ref_coord[::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [8]:
rel_sensors["Berlin"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Düsseldorf

In [9]:
b_city = boxes[boxes.City.str.contains("Düsseldorf")].copy()

In [13]:
m = folium.Map(location=ref_coord[::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    #if box.center:
    #    markercolor="red"
    #else:
    #    markercolor="blue"
    markercolor = "red"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [10]:
# manually
rel_sensors["Duesseldorf"] = {"zone":[],
                              "regular" :  ["59863226e3b1fa00108bab78",
                                            "5a54ddc6fa02ec000f24306d",
                                            "5af75af5223bd8001993e9b8",
                                            "5be7115fcdfcd0001c693564"]}

#### München

In [11]:
b_city = boxes[boxes.City.str.contains("München")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5b4d11485dc1ec001b5452c7"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [29]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [12]:
rel_sensors["München"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Dresden

In [13]:
b_city = boxes[boxes.City.str.contains("Dresden")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5b76c62683ed9f0019b216fe"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [41]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [14]:
rel_sensors["Dresden"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Köln

In [15]:
b_city = boxes[boxes.City.str.contains("Köln")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5996d4fad67eb500112def51"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [50]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [16]:
rel_sensors["Köln"] = {"zone":["5996d4fad67eb500112def51"],
                         "regular" :  list(b_city.loc[b_city["center"]&(~b_city["_id"].eq("5996d4fad67eb500112def51")),"_id"].values)}

#### Frankfurt

In [17]:
b_city = boxes[boxes.City.str.contains("Frankfurt am Main")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5bdce4f455d0ad001a35cee5"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [59]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [18]:
rel_sensors["Frankfurt"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Stuttgart

In [25]:
b_city = boxes[boxes.City.str.contains("Stuttgart")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5aa517c3396417001b9e2e9c"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [26]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [27]:
rel_sensors["Stuttgart"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Nürnberg

In [39]:
b_city = boxes[boxes.City.str.contains("Nürnberg")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5be1f1d7087001001aefecbe"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [40]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [41]:
rel_sensors["Nuernberg"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Leipzig

In [49]:
b_city = boxes[boxes.City.str.contains("Leipzig")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("59f492c350a2aa000f78b5dc"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [50]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [51]:
rel_sensors["Leipzig"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Münster

In [63]:
b_city = boxes[boxes.City.str.contains("Münster")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5750220bed08f9680c6b4154"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [64]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [65]:
rel_sensors["Muenster"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

#### Duisburg 

In [70]:
b_city = boxes[boxes.City.str.contains("Duisburg")].copy()
# define reference coordinates (city center)
ref_coord = b_city.loc[b_city._id.eq("5ad9ee3e223bd80019fd7ea3"),"coordinates"].values[0]
# calculate distance to reference
b_city["dist"] = b_city["coordinates"].apply(lambda x: geopy.distance.distance((ref_coord[1],ref_coord[0]), (x[1],x[0])).km)
# check distance
b_city["center"] = b_city["dist"].le(6.7)

In [71]:
m = folium.Map(location=ref_coord[0:2][::-1], zoom_start=11)


for box_index in b_city.index:
    
    box = b_city.loc[box_index,]
    
    if box.center:
        markercolor="red"
    else:
        markercolor="blue"
    
    folium.CircleMarker([box.coordinates[1],box.coordinates[0]],
                        radius=2,
                        popup=box._id,
                        color=markercolor).add_to(m)
m

In [72]:
rel_sensors["Duisburg"] = {"zone":[],
                         "regular" :  list(b_city.loc[b_city["center"],"_id"].values)}

## save and load data

In [75]:
#boxes.to_pickle("data/boxes_18-19_19-20.pkl")

with open('data/relevant_censors_cities.pickle', 'wb') as handle:
    pickle.dump(rel_sensors, handle, protocol=pickle.HIGHEST_PROTOCOL)

#with open('relevant_censors_cities.pickle', 'rb') as handle:
#    b = pickle.load(handle)

In [2]:
boxes = pd.read_pickle("data/boxes_18-19_19-20.pkl")