In [1]:
from fastai import *
from fastai.vision import *
from torch import topk
import urllib.request
import requests
import json

In [2]:
classes = ['Gut', 'Mittel', 'Schlecht', 'Sehr_Gut']

In [3]:
learn = load_learner("data_dir")

In [4]:
mapbox_key = "pk.eyJ1IjoianBrdW5rbGVyIiwiYSI6ImNqZzB0MjFjNDBiam8ycXFweGlnMThmbG8ifQ.vA1aff3tTCIX_zQsPj0cTg"

In [5]:
test_addr = "Bajuwarenstraße 11, 93053 Regensburg"

In [6]:
def geocode(addr, api_key):
    """Use Mapbox Geocoding Service to retrieve coordinate pair for given address."""
    base_url = "https://api.mapbox.com/geocoding/v5/mapbox.places/{address}.json"\
                "?access_token={API_KEY}&language=de"
    headers = {'Content-Type': 'application/json',
           'Authorization': 'Bearer {0}'.format(api_key)}
    url = base_url.format(address=addr, API_KEY = api_key)
    
    response = requests.get(url.format(address=addr, API_KEY = mapbox_key), 
                            headers = headers)
    
    if response.status_code == 200:
        data = json.loads(response.content.decode('utf-8'))
        most_relevant = data["features"][0]
        if most_relevant["relevance"] < 0.8:
            return (None, None)
        
        lng = most_relevant["center"][0]
        lat = most_relevant["center"][1]
        rel = most_relevant["relevance"]
        return (lat, lng, rel)
    else:
        return (None, None)

In [7]:
def classifyLocation(lat, lng, relevance):
    """Classify location specified by coordinate pair using our trained CNN model."""
    width, height = 300, 500
    img_base_url = "https://dev.virtualearth.net/REST/v1/Imagery/Map/Aerial/"\
                    "{lat},{lon}/17?ms={width},{height}&od=1&c=de-DE"\
                    "&key=AijbFhynMi9YlUoC5sbBKfrfbnkcMJ34sYBEORQwbsviodnw8nTkkgh5se5COtMs"
    url = img_base_url.format(lat = lat, lon = lng, width = width, height = height)
    with urllib.request.urlopen(url) as response:
        f = BytesIO(response.read())
    
    img = open_image(f)
    prediction = learn.predict(img)
    prob, label = topk(prediction[2],len(classes))
    inv_map = {v: k for k, v in learn.data.c2i.items()}
    labels = list(map(inv_map.get,label.data.tolist()))
    prob = [round(float(x),2) for x in prob.data.tolist()]
    pred_dict = dict(zip(labels, prob))
    output = dict({'prediction': pred_dict})
    output.update({'result': str(prediction[0])})
    output.update({'addr': {'coords': [lng, lat], 'relevance': relevance}})
    
    return json.dumps(output)

In [8]:
classifyLocation(*geocode(test_addr, mapbox_key)) # use tuple unpacking using asterisk *

'{"prediction": {"Sehr_Gut": 0.83, "Gut": 0.1, "Mittel": 0.04, "Schlecht": 0.02}, "result": "Sehr_Gut", "addr": {"coords": [12.118396, 48.994655], "relevance": 1}}'

# API Test

In [9]:
my_api = "http://ma.jankunkler.de/api/v1.0/classify/{addr}?access_token={token}"

In [10]:
my_api_key = "4ab88b42-3a5c-4318-83a6-fca9d06efc4c"

In [11]:
adressen = ["ROBERT-BOSCH-STR. 40, 50769 KOELN", 
            "PRETZENER STRASSE 10, 85435 ERDING",
           "Neupfarrplatz 8, Regensburg",
           "SAARSTR. 92,54290 TRIER",
            "Lotte-Branz-Str. 44, Regensburg"
           ]

In [12]:
for addr in adressen:
    url = my_api.format(addr=addr, token=my_api_key)
    response = requests.get(url)
    if response.status_code == 200:
        data = json.loads(response.content)
        print(data["result"])
    else:
        data = json.loads(response.content)
        print("Status Code: ", response.status_code, data["message"])

Sehr_Gut
Sehr_Gut
Schlecht
Schlecht
Status Code:  400 Error: Could not find geolocation.


# Excel/CSV File Upload

In [13]:
import pandas as pd
import json

In [24]:
def classifyExcel(file):
    
    addr_blueprint = "{street}, {PLZ} {City}, Germany"
    cols = ["Adresse", "Latitude", "Longitude", "Relevanz", 
            "Klassifizierung", "Wahrscheinlichkeit"]
    new_df = pd.DataFrame(columns=cols)
    df = pd.read_excel(file)
    # check if all required columns are present
    if not all(elem in df.columns for elem in ["Straße", "PLZ", "Ort"]):
        raise ValueError("Datensatz enthält nicht alle benötigten Spalten in korrektem Format.")
        
    
    for idx, location in df.iterrows():
        str_addr = addr_blueprint.format(street=location["Straße"], 
                                         PLZ=location["PLZ"], 
                                         City=location["Ort"])
        result = classifyLocation(*geocode(str_addr, mapbox_key))
        data = json.loads(result)
        lat,lng = data["addr"]["coords"]
        relevance = data["addr"]["relevance"]
        pred = data["result"]
        prob = data["prediction"][pred]
        new_df.loc[idx] = [str_addr, lat, lng, relevance, pred.replace("_", " "), prob]
    
    return new_df

In [25]:
test = classifyExcel("data/teststandorte_regensburg.xlsx")

In [26]:
test

Unnamed: 0,Adresse,Latitude,Longitude,Relevanz,Klassifizierung,Wahrscheinlichkeit
0,"Johann-Hösl-Straße 1, 93053 Regensburg, Germany",12.111314,48.994558,0.99,Sehr Gut,0.51
1,"Franz-Hartl-Straße 1, 93053 Regensburg, Germany",12.109925,48.994072,0.99,Schlecht,0.54
2,"Merowingerstraße 1, 93053 Regensburg, Germany",12.10555,48.993489,0.99,Gut,0.55
3,"Bajuwarenstraße 11, 93053 Regensburg, Germany",12.118396,48.994655,0.99,Sehr Gut,0.83
4,"Von-Seeckt-Straße 11, 93053 Regensburg, Germany",12.122505,49.000513,0.99,Schlecht,0.43
5,"Erbprinz-Franz-Joseph-Straße 21, 93053 Regensb...",12.118282,49.002104,0.99,Schlecht,0.84
6,"Hornstraße 6, 93053 Regensburg, Germany",12.115376,49.005072,0.99,Schlecht,0.47
7,"Friedenstraße 23, 93053 Regensburg, Germany",12.099463,49.010704,0.99,Gut,0.63
8,"Weiße-Lilien-Straße 13, 93047 Regensburg, Germany",12.0984,49.017035,0.99,Schlecht,0.96
