In [1]:
import pandas as pd
import numpy as np
import copy
import os
import math
import datetime
from helpers import wgs_to_rd
from exceptions import crown_unknown, bgt_unknown
from rootvolume import rootvolume_calc, height_classifier, crown_classifier
from timedependency import *
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

52.3761973 4.8936216
(121389.48637130698, 487705.9987338648)
121389.48637130698 487705.9987338648
(52.376197302366684, 4.893621596667002)


In [2]:
wallengebied_data = pd.read_csv('data/wallengebied_validation.csv')
ijburg_data = pd.read_csv('data/ijburg_validation.csv')
sarphati_data = pd.read_csv('data/sarphati_validation.csv')
display(wallengebied_data.head())
display(sarphati_data.head())

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,OBJECTNUMMER,Soortnaam_NL,Boomnummer,Soortnaam_WTS,Boomtype,Boomhoogte,Plantjaar,Eigenaar,...,LAT,Unnamed: 17,Gemeente height,Predicted height,Predicted crown,Cobra height,Cobra crown,Species,BGT_class,Colors
0,0,186057,186058,Onbekend,0,Onbekend,,Onbekend,0,Gemeente Amsterdam,...,52.369158,,,0.0,0.0,,,0.0,open_ground,y
1,1,187138,187139,Onbekend,0,Onbekend,,Onbekend,0,Gemeente Amsterdam,...,52.367219,,,0.0,0.0,,,0.0,light_load,b
2,2,187168,187169,Onbekend,0,Onbekend,,Onbekend,0,Gemeente Amsterdam,...,52.367094,,,0.0,0.0,,,0.0,light_load,b
3,3,187169,187170,Onbekend,0,Onbekend,,Onbekend,0,Gemeente Amsterdam,...,52.36705,,,0.0,0.0,,,0.0,heavy_load,r
4,4,187567,187568,Onbekend,0,Onbekend,,Onbekend,0,Gemeente Amsterdam,...,52.368733,,,0.0,0.0,,,0.0,open_ground,y


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,OBJECTNUMMER,Soortnaam_NL,Boomnummer,Soortnaam_WTS,Boomtype,Boomhoogte,Plantjaar,Eigenaar,...,LAT,Unnamed: 17,Gemeente height,Predicted height,Predicted crown,Cobra height,Cobra crown,Species,BGT_class,Colors
0,0,11665,11666,Witte paardenkastanje,592133,Aesculus hippocastanum,Boom niet vrij uitgroeiend,Onbekend,1996,Gemeente Amsterdam,...,52.355025,,,0.0,0.0,,,0.0,open_ground,y
1,1,11667,11668,Rode paardenkastanje (cultuurvariëteit),592131,Aesculus carnea `Plantierensis`,Boom niet vrij uitgroeiend,Onbekend,1996,Gemeente Amsterdam,...,52.353562,,,0.0,0.0,,,0.0,open_ground,y
2,2,21793,21794,Veldesdoorn,582473,Acer campestre,Boom vrij uitgroeiend,Onbekend,1990,Gemeente Amsterdam,...,52.354215,,,16.248494,12.047352,,,0.0,open_ground,y
3,3,22942,22943,Witte paardenkastanje (cultuurvariëteit),583439,Aesculus hippocastanum `Pyramidalis`,Boom niet vrij uitgroeiend,Onbekend,1994,Gemeente Amsterdam,...,52.355121,,,0.0,0.0,,,0.0,open_ground,y
4,4,23041,23042,Tamme kastanje,583421,Castanea sativa,Boom niet vrij uitgroeiend,6 tot 9 m.,2003,Gemeente Amsterdam,...,52.354667,,,0.0,0.0,,,0.0,open_ground,y


In [7]:
def count(df): 
    total = 0
    underestimate = 0
    overestimate = 0
    correct = 0
    
    for i, tree in df.iterrows():
        type = tree['Boomtype']
        if type == 'Vormboom' or type == 'Knotboom':
            continue
        if not df.at[i, 'Predicted height']:
            continue        
        if math.isnan(df.at[i, 'Gemeente height']):
            continue
            
        # predicted height class
        height_model = df.at[i, 'Predicted height']
        predicted_height_class = height_classifier(height_model)
        
        # gemeente height class
        height_gemeente = df.at[i, 'Gemeente height']
        gemeente_height_class = height_classifier(height_gemeente)
        
        # 3 is the smallest height class, 1 the largest
        if predicted_height_class > gemeente_height_class:
            underestimate += 1
        elif predicted_height_class < gemeente_height_class:
            overestimate += 1
        elif predicted_height_class == gemeente_height_class:           
            correct += 1
            
        total += 1
    
    return total, underestimate, overestimate, correct

In [8]:
total, underestimate, overestimate, correct = count(wallengebied_data)
print('WALLENGEBIED')
print(total, underestimate, overestimate, correct)
print('correct: ' + str(round(correct/total*100, 1)) + '%')
print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')

WALLENGEBIED
836 26 472 338
correct: 40.4%
underestimates: 3.1%
overestimates: 56.5%


In [9]:
total, underestimate, overestimate, correct = count(ijburg_data)
print('IJBURG')
print(total, underestimate, overestimate, correct)
print('correct: ' + str(round(correct/total*100, 1)) + '%')
print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')

IJBURG
1091 8 712 371
correct: 34.0%
underestimates: 0.7%
overestimates: 65.3%


In [10]:
total, underestimate, overestimate, correct = count(sarphati_data)
print('SARPHATI')
print(total, underestimate, overestimate, correct)
print('correct: ' + str(round(correct/total*100, 1)) + '%')
print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')

SARPHATI
165 0 100 65
correct: 39.4%
underestimates: 0.0%
overestimates: 60.6%


# BGT influence

In [15]:
for i in ['open_ground', 'light_load', 'moderate_load', 'heavy_load']:
    total, underestimate, overestimate, correct = count(wallengebied_data[wallengebied_data['BGT_class'] == i])
    print('WALLENGEBIED ' + i)
    print(total, underestimate, overestimate, correct)
    print('correct: ' + str(round(correct/total*100, 1)) + '%')
    print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
    print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')
    print()

WALLENGEBIED open_ground
181 4 111 66
correct: 36.5%
underestimates: 2.2%
overestimates: 61.3%

WALLENGEBIED light_load
391 12 209 170
correct: 43.5%
underestimates: 3.1%
overestimates: 53.5%

WALLENGEBIED moderate_load
117 5 67 45
correct: 38.5%
underestimates: 4.3%
overestimates: 57.3%

WALLENGEBIED heavy_load
59 4 31 24
correct: 40.7%
underestimates: 6.8%
overestimates: 52.5%



In [20]:
for i in ['open_ground', 'light_load', 'moderate_load', 'heavy_load']:
    total, underestimate, overestimate, correct = count(ijburg_data[ijburg_data['BGT_class'] == i])
    print('IJBURG ' + i)
    print(total, underestimate, overestimate, correct)
    print('correct: ' + str(round(correct/total*100, 1)) + '%')
    print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
    print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')
    print()

IJBURG open_ground
588 8 469 111
correct: 18.9%
underestimates: 1.4%
overestimates: 79.8%

IJBURG light_load
202 0 87 115
correct: 56.9%
underestimates: 0.0%
overestimates: 43.1%

IJBURG moderate_load
260 0 127 133
correct: 51.2%
underestimates: 0.0%
overestimates: 48.8%

IJBURG heavy_load
24 0 15 9
correct: 37.5%
underestimates: 0.0%
overestimates: 62.5%



In [21]:
for i in ['open_ground', 'light_load', 'moderate_load', 'heavy_load']:
    total, underestimate, overestimate, correct = count(sarphati_data[sarphati_data['BGT_class'] == i])
    print('SARPHATI ' + i)
    print(total, underestimate, overestimate, correct)
    print('correct: ' + str(round(correct/total*100, 1)) + '%')
    print('underestimates: ' + str(round(underestimate/total*100, 1)) + '%')
    print('overestimates: ' + str(round(overestimate/total*100, 1)) + '%')
    print()

SARPHATI open_ground
123 0 79 44
correct: 35.8%
underestimates: 0.0%
overestimates: 64.2%

SARPHATI light_load
34 0 18 16
correct: 47.1%
underestimates: 0.0%
overestimates: 52.9%

SARPHATI moderate_load
2 0 2 0
correct: 0.0%
underestimates: 0.0%
overestimates: 100.0%

SARPHATI heavy_load
0 0 0 0


ZeroDivisionError: division by zero