In [1]:
import sys
import geopandas
import pandas as pd
import numpy as np
#from pyproj import CRS
try:
    from osgeo import ogr, osr, gdal
except:
    sys.exit('ERROR: cannot find GDAL/OGR modules')

## change coordinate system (CRS)

In [2]:
wo_geo = geopandas.read_file('/home/philipp/Data/edin_diss/GIS_tax/wo_2016_2020.gdb', layer='Stichtag_20180101')

In [None]:
# define crs of orthophotos
new_crs = CRS.from_user_input('PROJCS["Austria_Lambert",GEOGCS["GCS_BESSEL_AUT",DATUM["D_BESSEL_AUT",SPHEROID["Bessel_1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["latitude_of_origin",47.5],PARAMETER["central_meridian",13.333333333],PARAMETER["standard_parallel_1",46],PARAMETER["standard_parallel_2",49],PARAMETER["false_easting",400000],PARAMETER["false_northing",400000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')
# reproject to new crs
wo_geo = wo_geo.to_crs(new_crs)

In [None]:
# save to file
wo_geo.to_file("/home/philipp/Data/edin_diss/GIS_tax/gis_wo_2018.shp")

## read data - GIS_tax

In [2]:
wo_geo = geopandas.read_file('/home/philipp/Data/edin_diss/GIS_tax/2019/gis_wo_2019_fly.shp')

In [3]:
wo_geo.crs

{'init': 'epsg:31287'}

In [4]:
# filter abteilung -1
#wo_geo = wo_geo.loc[wo_geo['ABTEILUNG'] != -1,:]

In [5]:
# create unique ID WO
wo_geo['WO'] = wo_geo['FORSTBETRI'].astype(str) + \
wo_geo['REVIER_NR'].astype(str).str.zfill(2) + \
wo_geo['ABTEILUNG'].astype(str).str.zfill(3) + \
wo_geo['UNTERABTEI'] + \
wo_geo['TEILFLAECH'].astype(str)

In [6]:
wo_geo['FLUGJAHR'] = wo_geo['FLUGJAHR'].fillna(0)
wo_geo['FLUGJAHR'] = wo_geo['FLUGJAHR'].astype(int)

In [7]:
wo_geo.columns = ['obj_id', 'fb', 'fr', 'abt', 'uabt',
       'teilfl', 'color_code', 'link_id', 'id', 'admin', 'creation',
       'timeliness', 'length', 'area', 'fly_date', 'year_fly', 'geometry', 'WO']

In [8]:
wo_geo = wo_geo.drop(['obj_id', 'color_code', 'link_id', 'id', 'creation', 'timeliness'], axis=1)

In [9]:
wo_geo.head()

Unnamed: 0,fb,fr,abt,uabt,teilfl,admin,length,area,fly_date,year_fly,geometry,WO
0,171,6,-1,0,1,627,727.253895,10833.208608,26.06.2019,2019,"POLYGON ((599204.590 469767.385, 599201.570 46...",171060-101
1,171,6,-1,0,1,627,6502.341493,313967.596697,26.06.2019,2019,"POLYGON ((600106.511 472648.712, 600112.570 47...",171060-101
2,171,6,-1,0,1,627,78.122831,210.1562,26.06.2019,2019,"POLYGON ((600219.830 470593.835, 600239.880 47...",171060-101
3,171,6,-1,0,1,627,726.659414,20317.657233,26.06.2019,2019,"POLYGON ((598732.040 471838.245, 598724.480 47...",171060-101
4,171,6,-1,0,1,627,727.576909,21098.354447,26.06.2019,2019,"POLYGON ((599268.870 471089.755, 599254.750 47...",171060-101


In [11]:
wo_geo.to_file("/home/philipp/Data/edin_diss/GIS_tax/2019/gis_wo_2019.shp")

## load SAP tax data

In [4]:
wo_geo = geopandas.read_file('/home/philipp/Data/edin_diss/GIS_tax/2019/gis_wo_2019.shp')

In [5]:
# get infromation about which Teiloperate were active in a giveb year

In [2]:
# set year
year = 2020

path_sap_info = '/home/philipp/Data/edin_diss/SAP_tax/edin_meta_data.xlsx'
sap_info = pd.read_excel(path_sap_info, engine='openpyxl')

In [3]:
sap_info.head()

Unnamed: 0,FB,FR,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,171,1,1208,1208,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1356,1356
1,171,2,1208,1208,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1356,1356
2,171,3,1208,1208,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1356,1356
3,171,4,1208,1208,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1356,1356
4,171,5,1208,1208,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1356,1356


In [4]:
# array with all TOs in a given year
tos = sap_info.loc[:,year].unique()

In [5]:
tos

array([1356, 1353, 1049, 1053, 1052, 1100, 1050, 1051, 1284, 1042, 1073,
       1092, 1311, 1342, 1151, 1194, 1086, 1043, 1144, 1197, 1055, 1303,
       1085, 1345, 1302, 1140, 1312, 1065, 1066, 1360, 1088, 1044, 1196,
       1313, 1343, 1354, 1093, 1045, 1361, 1082, 1341, 1355, 1156, 1250])

In [20]:
dict_sap_tax = {'Merkmalausprägung': np.uint8, \
        'AuswKatTyp': np.uint8, \
        'Teiloperats-ID': np.uint16, \
        'Forstbetrieb': np.uint8, \
        'Debitor': int, \
        'TO-Bezeichnung': str, \
        'Status': np.uint8, \
        'Beg. Laufzeit': str, \
        'Ende Laufzeit': str, \
        'Operat-ID': np.uint16, \
        'vorgeschl. Hiebssatz': int, \
        'Verantwortlicher': str, \
        'Erfassungsstatus': str, \
        'Migriert?': str, \
        'GUID': str, \
        'Forstbetrieb.1': np.uint8, \
        'Teiloperats-ID.1': np.uint16, \
        'Forstrevier': np.uint8, \
        'Abteilung': np.uint16, \
        'Unterabteil.': str, \
        'Teilfl.': np.uint8, 
        'Debitor.1': int, \
        'Bearbeitungsblock': np.uint8, \
        'WE-Typ': str, \
        'Betriebsklasse': np.uint16, \
        'Umtriebszeit': np.uint8, \
        'Nebengrund Art': np.uint8, \
        'Ertragssituation':str,
        'Bewirtschaftungsform': str, \
        'Schutzwaldkategorie': str, \
        'Fläche in HA': np.float64, \
        'Seehöhe': np.uint16, \
        'Exposition': str, \
        'Neigung': np.uint8, \
        'Standorteinheit': np.uint8, \
        'Vegetationstyp': str,
        'Waldtyp': str, \
        'Wuchsgebiet': str, \
        'Überh. Laubholz': np.uint16, \
        'Überh. Nadelhz.': np.uint16, \
        'fr. Schälschade': str, \
        'Verbissgrad': np.uint8, \
        'SchutzwaldProjNr': str, \
        'Schlussgrad': np.uint8, \
        'Stabilität': np.uint8, \
        'VJ Bedingung': np.uint8, \
        'VJ Situation': np.uint8, \
        'Erreichbark. des BZ': str, \
        'Selektiver Verbiss': np.uint8, \
        'Erfassungsstatus.1': str, \
        'Storno': str, \
        'Angelegt von': str, \
        'Angelegt am': str, \
        'Uhrzeit': str, \
        'Geändert von': str, \
        'Geändert am': str, \
        'Uhrzeit.1': str, \
        'Waldort':str, \
        'GUID.1':str, \
        'GUID.2':str, \
        'Forstbetrieb.2': np.uint8, \
        'Teiloperats-ID.2': np.uint16, \
        'Forstrevier.1': np.uint8, \
        'Abteilung.1': np.uint16, \
        'Unterabteil..1': str, \
        'Teilfl..1': np.uint8, \
        'Best.-Schicht': np.uint8, \
        'Debitor.2': int, \
        'Schichtanteil': np.uint8, \
        'Schichtalter': np.uint16, \
        'S-Best.grad': np.float16, \
        'Erfassungsstatus.2': str, \
        'Storno.1':str, \
        'GUID.3': str, \
        'GUID.4': str, \
        'Forstbetrieb.3': np.uint8, \
        'Teiloperats-ID.3': np.uint16, \
        'Forstrevier.2': np.uint8, \
        'Abteilung.2': np.uint16, \
        'Unterabteil..2': str, \
        'Teilfl..2': np.uint8, \
        'Best.-Schicht.1': np.uint8, \
        'Baumart': str, \
        'Debitor.3': int, \
        'Baumartenanteil': np.uint8, \
        'BaumartenBestockgrad': np.float16, \
        'Schälgrad': np.uint8,
        'Ertragsklasse': np.float16, \
        'Vorrat / ha': np.float32, \
        'Laubholzvorrat / ha': np.float32, \
        'Nadelholzvorrat / ha': np.float32, \
        'Vorrat am Ort': np.float32, \
        'Laubholzvorrat Ort': np.float32, \
        'Nadelholzvorrat Ort': np.float32, \
        'GSOLL / HA': np.float32, \
        'GIST / HA': np.float32, \
        'lauf. Zuwachs / HA': np.float16, \
        'DGZU / HA': np.float16, \
        'HDZ': np.float16, \
        'LGZ': np.float16, \
        'Erfassungsstatus.3': str, \
        'Storno.2': str, \
        'GUID.5': str, \
        'GUID.6': str, \
        'Forstbetrieb.4': np.uint8, \
        'Teiloperats-ID.4': np.uint16, \
        'Forstrevier.3': np.uint8, \
        'Abteilung.3': np.uint16, \
        'Unterabteil..3': str, \
        'Teilfl..3': np.uint8, \
        'Best.-Schicht.2': np.uint8, \
        'Nutzungsnummer': np.uint8, \
        'Maßnahmenart': str, \
        'Massnahme geplant': str, \
        'Massnahmengruppe': str, \
        'Angriffsfläche': np.float32, \
        'Nutzung LH': np.uint16, \
        'Nutzung NH': np.uint16, \
        'Nutzungssumme': np.uint16, \
        'Nutzdringlichkeit': np.uint8, \
        'Bewpfl.': np.uint8, \
        'Zeitpunkt': np.uint8, \
        'Rückungsart': np.uint8, \
        'Schlägerungsart': np.uint8, \
        'Erfassungsstatus.4': str, \
        'Storno.3': str, \
        'Nutztext': str, \
        'Alter der 1. Schicht': np.uint16, \
        'TAX: Altersklasse': str, \
        'Repr. Fläche Schicht': np.float32, \
        'Produktionskategorie': str, \
        'Geschäftsjahr': np.uint8, \
        'Abmaßbeleg': str, \
        'Maßnahme': str, \
        'Geschäftsfeld': str, \
        'Bezeichnung': str, \
        'Pflanzen Ist': str, \
        'Baumarten Ist': str, \
        'Repr. Fläche Baumart': np.float32, \
        'Ertragstafelnummer': np.uint8, \
        'Ertragstafelbezeich': str, \
        'Anmerkung': str, \
        'Zeile1': str, \
        'Zeile2': str, \
        'Zeile3': str, \
        'Zeile4': str, \
        'Zeile5': str, \
        'Zeile6': str, \
        'Zeile7': str, \
        'Zeile8': str, \
        'Bestockungsziel': str, \
        'Flächenanteil': np.float32}

In [24]:
def get_data(tos):
    # set sap tax path directory
    path_sap_tax_dir = '/home/philipp/Data/edin_diss/SAP_tax'

    wo_sap_list = []

    for to in tos:
        # get fb
        fb = sap_info.loc[sap_info[year] == to, 'FB'].unique()[0]
        # create path to file
        path_sap_tax_file = path_sap_tax_dir + '/' + str(fb) + '/TO_' + str(to) + '.XLS'

        print(path_sap_tax_file)

        # read dat from file
        wo_sap_list.append(pd.read_csv(path_sap_tax_file, 
                                       sep='\t',
                                       encoding = "ISO-8859-1", 
                                       decimal=',', 
                                       error_bad_lines=False))
        # create unique ID WO
        #wo_sap['WO'] = wo_sap['Forstbetrieb'].astype(str) + \
        #wo_sap['Forstrevier'].astype(str) + \
        #wo_sap['Abteilung'].astype(str) + \
        #wo_sap['Unterabteil.'] + \
        #wo_sap['Teilfl.'].astype(str)

        #wo_sap_list.append(wo_sap)

    return pd.concat(wo_sap_list, ignore_index=True)

In [25]:
wo_sap = get_data([1284])

/home/philipp/Data/edin_diss/SAP_tax/172/TO_1284.XLS


In [29]:
wo_sap = wo_sap.fillna(0)

In [30]:
for col, dtype in dict_sap_tax.items():
    print(col, dtype)
    wo_sap[col] =  wo_sap[col].astype(dtype)

Merkmalausprägung <class 'numpy.uint8'>
AuswKatTyp <class 'numpy.uint8'>
Teiloperats-ID <class 'numpy.uint16'>
Forstbetrieb <class 'numpy.uint8'>
Debitor <class 'int'>
TO-Bezeichnung <class 'str'>
Status <class 'numpy.uint8'>
Beg. Laufzeit <class 'str'>
Ende Laufzeit <class 'str'>
Operat-ID <class 'numpy.uint16'>
vorgeschl. Hiebssatz <class 'int'>
Verantwortlicher <class 'str'>
Erfassungsstatus <class 'str'>
Migriert? <class 'str'>
GUID <class 'str'>
Forstbetrieb.1 <class 'numpy.uint8'>
Teiloperats-ID.1 <class 'numpy.uint16'>
Forstrevier <class 'numpy.uint8'>
Abteilung <class 'numpy.uint16'>
Unterabteil. <class 'str'>
Teilfl. <class 'numpy.uint8'>
Debitor.1 <class 'int'>
Bearbeitungsblock <class 'numpy.uint8'>
WE-Typ <class 'str'>
Betriebsklasse <class 'numpy.uint16'>
Umtriebszeit <class 'numpy.uint8'>
Nebengrund Art <class 'numpy.uint8'>
Ertragssituation <class 'str'>
Bewirtschaftungsform <class 'str'>
Schutzwaldkategorie <class 'str'>
Fläche in HA <class 'numpy.float64'>
Seehöhe <cla

In [31]:
wo_sap

Unnamed: 0,Merkmalausprägung,AuswKatTyp,Teiloperats-ID,Forstbetrieb,Debitor,TO-Bezeichnung,Status,Beg. Laufzeit,Ende Laufzeit,Operat-ID,...,Zeile1,Zeile2,Zeile3,Zeile4,Zeile5,Zeile6,Zeile7,Zeile8,Bestockungsziel,Flächenanteil
0,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,"ST vergrast, felsig, mördlich der E Trasse ei...","Fi-Verjüngungskegel, Bringung durch E-Trasse ...","BE ein AH, LH grtls am S-Rand, Älteres nördl ...",0,0,0,0,0,3AH 4LA 3FI,0.0
1,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
2,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
3,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
4,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53418,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
53419,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
53420,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0
53421,0,0,1284,172,220442,FE 2019,2,01.01.2020,31.12.2029,293,...,0,0,0,0,0,0,0,0,0,0.0


In [19]:
dict_sap_tax = {'Merkmalausprägung': np.uint8, \
        'AuswKatTyp': np.uint8, \
        'Teiloperats-ID': np.uint16, \
        'Forstbetrieb': np.uint8, \
        'Debitor': int, \
        'TO-Bezeichnung': str, \
        'Status': np.uint8, \
        'Beg. Laufzeit': str, \
        'Ende Laufzeit': str, \
        'Operat-ID': np.uint16, \
        'vorgeschl. Hiebssatz': int, \
        'Verantwortlicher': str, \
        'Erfassungsstatus': str, \
        'Migriert?': str, \
        'GUID': str, \
        'Forstbetrieb.1': np.uint8, \
        'Teiloperats-ID.1': np.uint16, \
        'Forstrevier': np.uint8, \
        'Abteilung': np.uint16, \
        'Unterabteil.': str, \
        'Teilfl.': np.uint8, 
        'Debitor.1': int, \
        'Bearbeitungsblock': np.uint8, \
        'WE-Typ': str, \
        'Betriebsklasse': np.uint16, \
        'Umtriebszeit': np.uint8, \
        'Nebengrund Art': np.uint8, \
        'Ertragssituation':str,
        'Bewirtschaftungsform': str, \
        'Schutzwaldkategorie': str, \
        'Fläche in HA': np.float64, \
        'Seehöhe': np.uint16, \
        'Exposition': str, \
        'Neigung': np.uint8, \
        'Standorteinheit': np.uint8, \
        'Vegetationstyp': str,
        'Waldtyp': str, \
        'Wuchsgebiet': str, \
        'Überh. Laubholz': np.uint16, \
        'Überh. Nadelhz.': np.uint16, \
        'fr. Schälschade': str, \
        'Verbissgrad': np.uint8, \
        'SchutzwaldProjNr': str, \
        'Schlussgrad': np.uint8, \
        'Stabilität': np.uint8, \
        'VJ Bedingung': np.uint8, \
        'VJ Situation': np.uint8, \
        'Erreichbark. des BZ': str, \
        'Selektiver Verbiss': np.uint8, \
        'Erfassungsstatus.1': str, \
        'Storno': str, \
        'Angelegt von': str, \
        'Angelegt am': str, \
        'Uhrzeit': str, \
        'Geändert von': str, \
        'Geändert am': str, \
        'Uhrzeit.1': str, \
        'Waldort':str, \
        'GUID.1':str, \
        'GUID.2':str, \
        'Forstbetrieb.2': np.uint8, \
        'Teiloperats-ID.2': np.uint16, \
        'Forstrevier.1': np.uint8, \
        'Abteilung.1': np.uint16, \
        'Unterabteil..1': str, \
        'Teilfl..1': np.uint8, \
        'Best.-Schicht': np.uint8, \
        'Debitor.2': int, \
        'Schichtanteil': np.uint8, \
        'Schichtalter': np.uint16, \
        'S-Best.grad': np.float16, \
        'Erfassungsstatus.2': str, \
        'Storno.1':str, \
        'GUID.3': str, \
        'GUID.4': str, \
        'Forstbetrieb.3': np.uint8, \
        'Teiloperats-ID.3': np.uint16, \
        'Forstrevier.2': np.uint8, \
        'Abteilung.2': np.uint16, \
        'Unterabteil..2': str, \
        'Teilfl..2': np.uint8, \
        'Best.-Schicht.1': np.uint8, \
        'Baumart': str, \
        'Debitor.3': int, \
        'Baumartenanteil': np.uint8, \
        'BaumartenBestockgrad': np.float16, \
        'Schälgrad': np.uint8,
        'Ertragsklasse': np.float16, \
        'Vorrat / ha': np.float32, \
        'Laubholzvorrat / ha': np.float32, \
        'Nadelholzvorrat / ha': np.float32, \
        'Vorrat am Ort': np.float32, \
        'Laubholzvorrat Ort': np.float32, \
        'Nadelholzvorrat Ort': np.float32, \
        'GSOLL / HA': np.float32, \
        'GIST / HA': np.float32, \
        'lauf. Zuwachs / HA': np.float16, \
        'DGZU / HA': np.float16, \
        'HDZ': np.float16, \
        'LGZ': np.float16, \
        'Erfassungsstatus.3': str, \
        'Storno.2': str, \
        'GUID.5': str, \
        'GUID.6': str, \
        'Forstbetrieb.4': np.uint8, \
        'Teiloperats-ID.4': np.uint16, \
        'Forstrevier.3': np.uint8, \
        'Abteilung.3': np.uint16, \
        'Unterabteil..3': str, \
        'Teilfl..3': np.uint8, \
        'Best.-Schicht.2': np.uint8, \
        'Nutzungsnummer': np.uint8, \
        'Maßnahmenart': str, \
        'Massnahme geplant': str, \
        'Massnahmengruppe': str, \
        'Angriffsfläche': np.float32, \
        'Nutzung LH': np.uint16, \
        'Nutzung NH': np.uint16, \
        'Nutzungssumme': np.uint16, \
        'Nutzdringlichkeit': np.uint8, \
        'Bewpfl.': np.uint8, \
        'Zeitpunkt': np.uint8, \
        'Rückungsart': np.uint8, \
        'Schlägerungsart': np.uint8, \
        'Erfassungsstatus.4': str, \
        'Storno.3': str, \
        'Nutztext': str, \
        'Alter der 1. Schicht': np.uint16, \
        'TAX: Altersklasse': str, \
        'Repr. Fläche Schicht': np.float32, \
        'Produktionskategorie': str, \
        'Geschäftsjahr': np.uint8, \
        'Abmaßbeleg': str, \
        'Maßnahme': str, \
        'Geschäftsfeld': str, \
        'Bezeichnung': str, \
        'Pflanzen Ist': str, \
        'Baumarten Ist': str, \
        'Repr. Fläche Baumart': np.float32, \
        'Ertragstafelnummer': np.uint8, \
        'Ertragstafelbezeich': str, \
        'Anmerkung': str, \
        'Zeile1': str, \
        'Zeile2': str, \
        'Zeile3': str, \
        'Zeile4': str, \
        'Zeile5': str, \
        'Zeile6': str, \
        'Zeile7': str, \
        'Zeile8': str, \
        'Bestockungsziel': str, \
        'Flächenanteil': np.float32}

In [16]:
wo_sap.columns[100:]

Index(['Erfassungsstatus.3', 'Storno.2', 'GUID.5', 'GUID.6', 'Forstbetrieb.4',
       'Teiloperats-ID.4', 'Forstrevier.3', 'Abteilung.3', 'Unterabteil..3',
       'Teilfl..3', 'Best.-Schicht.2', 'Nutzungsnummer', 'Maßnahmenart',
       'Massnahme geplant', 'Massnahmengruppe', 'Angriffsfläche', 'Nutzung LH',
       'Nutzung NH', 'Nutzungssumme', 'Nutzdringlichkeit', 'Bewpfl.',
       'Zeitpunkt', 'Rückungsart', 'Schlägerungsart', 'Erfassungsstatus.4',
       'Storno.3', 'Nutztext', 'Alter der 1. Schicht', 'TAX: Altersklasse',
       'Repr. Fläche Schicht', 'Produktionskategorie', 'Geschäftsjahr',
       'Abmaßbeleg', 'Maßnahme', 'Geschäftsfeld', 'Bezeichnung',
       'Pflanzen Ist', 'Baumarten Ist', 'Repr. Fläche Baumart',
       'Ertragstafelnummer', 'Ertragstafelbezeich', 'Anmerkung', 'Zeile1',
       'Zeile2', 'Zeile3', 'Zeile4', 'Zeile5', 'Zeile6', 'Zeile7', 'Zeile8',
       'Bestockungsziel', 'Flächenanteil'],
      dtype='object')

In [9]:
wo_sap['Unterabteil.'].unique()

array(['M', 'P', 'B', 'F', 'D', 'C', 'A', 'O', 'E', 'T', 'I', 'J', 'H',
       'S', 'N', 'G', 'Q', 'Y', 'R', 'V', 'L', 'K', 'W', 'U', 'X', 'Z',
       '1', '5', '4', '3', '8', '9', '6', '7', '2', 5, 8, 2, 1, 4, 3, 7,
       9], dtype=object)

In [11]:
wo_sap = get_data(tos)

/home/philipp/Data/edin_diss/SAP_tax/171/TO_1356.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/171/TO_1353.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/171/TO_1049.XLS


  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/172/TO_1053.XLS
/home/philipp/Data/edin_diss/SAP_tax/172/TO_1052.XLS
/home/philipp/Data/edin_diss/SAP_tax/172/TO_1100.XLS
/home/philipp/Data/edin_diss/SAP_tax/172/TO_1050.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/172/TO_1051.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/172/TO_1284.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/173/TO_1042.XLS
/home/philipp/Data/edin_diss/SAP_tax/173/TO_1073.XLS
/home/philipp/Data/edin_diss/SAP_tax/173/TO_1092.XLS


  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/173/TO_1311.XLS
/home/philipp/Data/edin_diss/SAP_tax/173/TO_1342.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/174/TO_1151.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/174/TO_1194.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/175/TO_1086.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/175/TO_1043.XLS
/home/philipp/Data/edin_diss/SAP_tax/175/TO_1144.XLS
/home/philipp/Data/edin_diss/SAP_tax/175/TO_1197.XLS
/home/philipp/Data/edin_diss/SAP_tax/176/TO_1055.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/176/TO_1303.XLS
/home/philipp/Data/edin_diss/SAP_tax/176/TO_1085.XLS
/home/philipp/Data/edin_diss/SAP_tax/176/TO_1345.XLS
/home/philipp/Data/edin_diss/SAP_tax/176/TO_1302.XLS
/home/philipp/Data/edin_diss/SAP_tax/177/TO_1140.XLS
/home/philipp/Data/edin_diss/SAP_tax/177/TO_1312.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/178/TO_1065.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/178/TO_1066.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/178/TO_1360.XLS
/home/philipp/Data/edin_diss/SAP_tax/179/TO_1088.XLS
/home/philipp/Data/edin_diss/SAP_tax/179/TO_1044.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/179/TO_1196.XLS
/home/philipp/Data/edin_diss/SAP_tax/179/TO_1313.XLS
/home/philipp/Data/edin_diss/SAP_tax/180/TO_1343.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/180/TO_1354.XLS


  if (await self.run_code(code, result,  async_=asy)):


/home/philipp/Data/edin_diss/SAP_tax/181/TO_1093.XLS
/home/philipp/Data/edin_diss/SAP_tax/181/TO_1045.XLS
/home/philipp/Data/edin_diss/SAP_tax/181/TO_1361.XLS
/home/philipp/Data/edin_diss/SAP_tax/181/TO_1082.XLS
/home/philipp/Data/edin_diss/SAP_tax/182/TO_1341.XLS
/home/philipp/Data/edin_diss/SAP_tax/182/TO_1355.XLS
/home/philipp/Data/edin_diss/SAP_tax/182/TO_1156.XLS
/home/philipp/Data/edin_diss/SAP_tax/182/TO_1250.XLS


In [12]:
wo_sap['Unterabteil.'].unique()

array(['F', '4', '3', '5', '7', '1', '9', '8', '6', '2', 'A', 'B', 'D',
       'C', 'G', 'E', 'S', 'H', 'R', 'P', 'O', 'N', 'L', 'K', 'M', 'I',
       'X', 'J', 'T', 'U', 'Z', 'V', 'W', 'Q', 'Y', 5, 8, 2, 1, 4, 3, 7,
       9], dtype=object)

In [14]:
wo_sap['Unterabteil.'] = wo_sap['Unterabteil.'].astype(str)

In [15]:
# create unique ID WO
wo_sap['WO'] = wo_sap['Forstbetrieb'].astype(str) + \
wo_sap['Forstrevier'].astype(str).str.zfill(2) + \
wo_sap['Abteilung'].astype(str).str.zfill(3) + \
wo_sap['Unterabteil.'] + \
wo_sap['Teilfl.'].astype(str)

In [16]:
wo_sap.head()

Unnamed: 0,Merkmalausprägung,AuswKatTyp,Teiloperats-ID,Forstbetrieb,Debitor,TO-Bezeichnung,Status,Beg. Laufzeit,Ende Laufzeit,Operat-ID,...,Zeile2,Zeile3,Zeile4,Zeile5,Zeile6,Zeile7,Zeile8,Bestockungsziel,Flächenanteil,WO
0,0,,1356,171,220442,FE 2019,2,01.01.2020,31.12.2029,291,...,BE einzelne Aspen LA,,,,,,,7EI 3HB,0.0,17103158F0
1,0,,1356,171,220442,FE 2019,2,01.01.2020,31.12.2029,291,...,,,,,,,,,0.0,17103158F0
2,0,,1356,171,220442,FE 2019,2,01.01.2020,31.12.2029,291,...,,,,,,,,,0.0,17103158F0
3,0,,1356,171,220442,FE 2019,2,01.01.2020,31.12.2029,291,...,,,,,,,,,0.0,17103158F0
4,0,,1356,171,220442,FE 2019,2,01.01.2020,31.12.2029,291,...,,,,,,,,,0.0,17103158F0


In [17]:
# stoe

# filter data
wo_sap_stoe = wo_sap.loc[wo_sap['Best.-Schicht'] == 0, ['WO', 'Forstbetrieb', 'Forstrevier', 'Abteilung', 
                                                        'Unterabteil.', 'Teilfl.', 'Beg. Laufzeit', 'Umtriebszeit', 
                                                        'Nebengrund Art', 'Ertragssituation', 'Bewirtschaftungsform', 
                                                        'Schutzwaldkategorie', 'Seehöhe', 'Exposition', 'Neigung', 
                                                        'Standorteinheit', 'Vegetationstyp', 'Wuchsgebiet']]

wo_sap_stoe.head()

Unnamed: 0,WO,Forstbetrieb,Forstrevier,Abteilung,Unterabteil.,Teilfl.,Beg. Laufzeit,Umtriebszeit,Nebengrund Art,Ertragssituation,Bewirtschaftungsform,Schutzwaldkategorie,Seehöhe,Exposition,Neigung,Standorteinheit,Vegetationstyp,Wuchsgebiet
0,17103158F0,171,3,158,F,0,01.01.2020,120,,I,W,,400,NW,35,76.0,WW,4.2
9,1710466345,171,4,663,4,5,01.01.2020,0,4.0,,,,0,,0,,,
10,1710466344,171,4,663,4,4,01.01.2020,0,4.0,,,,0,,0,,,
11,1710466343,171,4,663,4,3,01.01.2020,0,4.0,,,,0,,0,,,
12,1710433032,171,4,330,3,2,01.01.2020,0,3.0,,,,0,,0,,,


In [18]:
# wood volume

# filter data
wo_sap_v = wo_sap.loc[wo_sap['Best.-Schicht.1'] > 0, ['WO', 'Vorrat / ha', 'Laubholzvorrat / ha', 
                                                      'Nadelholzvorrat / ha', 'Vorrat am Ort', 
                                                      'Laubholzvorrat Ort', 'Nadelholzvorrat Ort',]]

# group by WO (ID) and sum all values
wo_sap_v = wo_sap_v.groupby(['WO']).sum().reset_index()

wo_sap_v.head()

Unnamed: 0,WO,Vorrat / ha,Laubholzvorrat / ha,Nadelholzvorrat / ha,Vorrat am Ort,Laubholzvorrat Ort,Nadelholzvorrat Ort
0,17101506A0,752.94,526.58,226.36,6543.06,4575.98,1967.08
1,17101506B1,562.0,508.8,53.2,455.22,412.14,43.08
2,17101506B2,220.4,220.4,0.0,138.86,138.86,0.0
3,17101506C1,533.2,533.2,0.0,1727.56,1727.56,0.0
4,17101506C2,375.8,330.48,45.32,1101.08,968.3,132.78


In [19]:
# wood cuts

# filter data
wo_sap_ma = wo_sap.loc[wo_sap['Nutzungsnummer'] > 0, ['WO', 'Maßnahmenart', 'Massnahmengruppe', 'Angriffsfläche', 
                                                      'Nutzung LH', 'Nutzung NH', 'Nutzungssumme', 'Nutzdringlichkeit', 
                                                      'Bewpfl.', 'Zeitpunkt', 'Rückungsart', 'Schlägerungsart']]

wo_sap_ma.head()

Unnamed: 0,WO,Maßnahmenart,Massnahmengruppe,Angriffsfläche,Nutzung LH,Nutzung NH,Nutzungssumme,Nutzdringlichkeit,Bewpfl.,Zeitpunkt,Rückungsart,Schlägerungsart
8,17103158F0,DF,VN,0.4,20.0,0.0,20.0,1,1,3,30.0,1.0
667,17103158A1,DF,VN,3.3,200.0,0.0,200.0,2,1,3,30.0,1.0
793,17101508B3,DF,VN,2.0,160.0,0.0,160.0,1,1,3,36.0,6.0
800,17101508A1,LI,EN,1.0,90.0,0.0,90.0,2,1,3,35.0,1.0
811,17101508B2,DF,VN,4.0,440.0,0.0,440.0,1,1,3,35.0,4.0


In [20]:
# filter just VN

In [21]:
wo_sap_maf = wo_sap_ma[wo_sap_ma['Massnahmengruppe'] == 'VN']
wo_sap_maf = wo_sap_maf[wo_sap_maf['Maßnahmenart'] != 'ZV']
wo_sap_maf = wo_sap_maf[wo_sap_maf['Maßnahmenart'] != 'UE']
wo_sap_maf = wo_sap_maf[wo_sap_maf['Maßnahmenart'] != 'LL']

In [22]:
wo_sap_maf['Maßnahmenart'].unique()

array(['DF', 'DE', 'ND'], dtype=object)

In [23]:
# get dublicated records
#wo_d = wo_sap_maf[wo_sap_maf.duplicated(subset = 'WO')]

In [24]:
# group by WO (ID) and sum all values
wo_sap_maf = wo_sap_maf.groupby(['WO']).sum().reset_index()

In [25]:
wo_sap_maf['ma'] = 'DF'

In [26]:
wo_sap_maf

Unnamed: 0,WO,Angriffsfläche,Nutzung LH,Nutzung NH,Nutzungssumme,Nutzdringlichkeit,Bewpfl.,Zeitpunkt,Rückungsart,Schlägerungsart,ma
0,17101506A0,16.0,1060.0,520.0,1580.0,4,2,6,72.0,2.0,DF
1,17101506B2,0.8,60.0,0.0,60.0,2,2,6,70.0,8.0,DF
2,17101506C1,5.2,420.0,20.0,440.0,2,2,6,70.0,8.0,DF
3,17101506C2,5.2,300.0,40.0,340.0,2,2,6,70.0,8.0,DF
4,17101506G1,3.0,100.0,140.0,240.0,2,2,6,70.0,8.0,DF
...,...,...,...,...,...,...,...,...,...,...,...
38237,18209366L1,2.5,0.0,120.0,120.0,1,1,1,35.0,4.0,DF
38238,18209366L2,0.1,0.0,10.0,10.0,2,1,1,35.0,4.0,DF
38239,18209366M2,1.9,0.0,80.0,80.0,1,1,1,35.0,4.0,DF
38240,18209367H2,0.6,0.0,20.0,20.0,2,1,1,30.0,2.0,DF


In [27]:
# species


In [28]:
# filter just necessary columns
wo_sap_ba = wo_sap.loc[wo_sap['Best.-Schicht.1'] > 0, 
                    ['WO','Best.-Schicht.1', 'Schichtanteil', 'Schichtalter', 
                     'S-Best.grad', 'Baumart','Baumartenanteil', 'BaumartenBestockgrad']]

In [29]:
wo_sap_sch = wo_sap.loc[(wo_sap['Best.-Schicht'] > 0) & (wo_sap['Best.-Schicht.1'] == 0), 
                    ['WO','Best.-Schicht.1', 'Schichtanteil', 'Schichtalter', 
                     'S-Best.grad', 'Baumart','Baumartenanteil', 'BaumartenBestockgrad']]

In [30]:
wo_sap_sch.shape

(370584, 8)

In [31]:
wo_sap_ba.shape

(548124, 8)

In [32]:
wos_unique = wo_sap_ba['WO'].unique()

In [33]:
wos_unique

array(['17103158F0', '17103158A1', '17101508B3', ..., '18208020B2',
       '18209324C4', '18209323P4'], dtype=object)

In [34]:
# filter one column
one_wo = wo_sap_ba.loc[(wo_sap_ba['WO'] == wos_unique[80122])]
one_wo

Unnamed: 0,WO,Best.-Schicht.1,Schichtanteil,Schichtalter,S-Best.grad,Baumart,Baumartenanteil,BaumartenBestockgrad
556995,17605304A1,1,100,225,0.7,FI,100,0.7


In [35]:
%%timeit
if one_wo.loc[one_wo['Best.-Schicht.1'] == 1, 'S-Best.grad'].array >= 0.5:
    pass#print('true')
#else:
    #print('false')

201 µs ± 583 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [36]:
#%%timeit
if (one_wo['Best.-Schicht.1'].unique().size == 1):
    dic = extract_species(one_wo)
elif(one_wo.loc[one_wo['Best.-Schicht.1'] == 1, 'S-Best.grad'].iloc[0] >= 0.5):
    print('sec')
    dic = extract_species(one_wo.loc[one_wo['Best.-Schicht.1'] == 1])
else:
    print('third')
    # wo id
    wo_id = one_wo.iloc[0,0]
    
    # age
    bg_max = one_wo['S-Best.grad'].max()
    s_ages = np.sort(one_wo['Schichtalter'].unique())[::-1]
    for s_age in s_ages:
        s_bg = one_wo.loc[one_wo['Schichtalter']==s_age, 'S-Best.grad'].iloc[0]
        if (s_bg >= 0.5) | (s_bg == bg_max):
            age = s_age
            break
            
    # species
    one_wo_gb = one_wo.groupby(by=["Baumart"], as_index=False, sort=False).sum()
    one_wo_gb['Baumartenanteil'] = (one_wo_gb['BaumartenBestockgrad']/one_wo_gb['BaumartenBestockgrad'].sum() * 100).round(0).astype(int)
    one_wo_gb['WO'] = wo_id
    one_wo_gb['Schichtalter'] = age
    dic = extract_species(one_wo_gb)
dic

NameError: name 'extract_species' is not defined

In [37]:
lh_set = {'EI', 'EL', 'ES', 'EA', 'FA', 'FE', 'GB', 'WP', 'GE', 'AV', 'HB', 'HP', \
          'KB', 'LI', 'ME', 'PO', 'RO', 'RK', 'BU', 'RE', 'SW', 'ER', 'JN', 'SP', \
          'LS', 'SL', 'SG', 'SA', 'QR', 'ST', 'QP', 'TK', 'TB', 'UL', 'NU', 'WD', \
          'WO', 'LW', 'EZ', 'AH', 'AS', 'RU', 'BI', 'EE', 'EK', 'GP', 'KA', 'PA'}
nh_set = {'FZ', 'GK', 'AG', 'HT', 'JL', 'CJ', 'KK', 'KO', 'AN', 'FO', 'AB', 'CH', \
          'SF', 'SN', 'PU', 'KW', 'TH', 'TA', 'ZI', 'AZ', 'BK', 'AC', 'EB', 'OF', \
          'PM', 'TA'}
ba_set = {'LA', 'KI', 'SK', 'DG'}

def extract_species(one_wo):
    ba_dict = dict()
    ba_dict['WO'] = one_wo['WO'].iloc[0]
    ba_dict['alter'] = one_wo['Schichtalter'].iloc[0]
    ba_dict['BL'] = 0
    ba_dict['FI'] = 0
    ba_dict['LH'] = 0
    ba_dict['NH'] = 0

    # loop over all species
    for _, data in one_wo[['Baumart', 'Baumartenanteil']].transpose().items():

        if data[0] == 'BL':
            ba_dict['BL'] += data[1]
        elif (data[0] == 'FI') | (data[0] == 'TA'):
            ba_dict['FI'] += data[1]
        elif data[0] in ba_set:
            ba_dict[data[0]] = data[1]
        elif data[0] in lh_set:
            ba_dict['LH'] += data[1]
        elif data[0] in nh_set:
            ba_dict['NH'] += data[1]

    return ba_dict

In [38]:
def get_age_species(one_wo):
    if (one_wo['Best.-Schicht.1'].unique().size == 1):
        dic = extract_species(one_wo)
    elif(one_wo.loc[one_wo['Best.-Schicht.1'] == 1, 'S-Best.grad'].iloc[0] >= 0.5):
        dic = extract_species(one_wo.loc[one_wo['Best.-Schicht.1'] == 1])
    else:
        # wo id
        wo_id = one_wo.iloc[0,0]

        # age
        bg_max = one_wo['S-Best.grad'].max()
        s_ages = np.sort(one_wo['Schichtalter'].unique())[::-1]
        for s_age in s_ages:
            s_bg = one_wo.loc[one_wo['Schichtalter']==s_age, 'S-Best.grad'].iloc[0]
            if (s_bg >= 0.5) | (s_bg == bg_max):
                age = s_age
                break

        # species
        one_wo_gb = one_wo.groupby(by=["Baumart"], as_index=False, sort=False).sum()
        one_wo_gb['Baumartenanteil'] = (one_wo_gb['BaumartenBestockgrad']/one_wo_gb['BaumartenBestockgrad'].sum() * 100).round(0).astype(int)
        one_wo_gb['WO'] = wo_id
        one_wo_gb['Schichtalter'] = age
        dic = extract_species(one_wo_gb)
    return dic

In [39]:
dd = dict()
for wo_unique in wos_unique:
    one_wo = wo_sap_ba.loc[(wo_sap_ba['WO'] == wo_unique)]
    dd[wo_unique] = get_age_species(one_wo)

UnboundLocalError: local variable 'age' referenced before assignment

In [None]:
# transpose dataframe
wo_sap_ba = pd.DataFrame(dd).transpose()
# fill nan values with 0
wo_sap_ba = wo_sap_ba.fillna(0)

In [None]:
wo_sap_ba.head()

### merge all SAP data

In [None]:
# merge SAP stoe & SAP tree species
wo_sap = pd.merge(wo_sap_stoe, wo_sap_ba, how='left', on='WO', sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

# merge SAP all & SAP volume
wo_sap = pd.merge(wo_sap, wo_sap_v, how='left', on='WO', sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

# merge SAP all & SAP planned wood cut
wo_sap = pd.merge(wo_sap, wo_sap_maf, how='left', on='WO', sort=False,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

wo_sap.head()

In [None]:
wo_sap.columns

In [None]:
wo_sap.columns = ['WO', 'fb_sap', 'fr_sap', 'abt_sap', 'uabt_sap', 'teilfl_sap', \
                  'start_term', 'uz', 'non_forest_type', 'economy', 'ww_sw', 'sw_type', \
                  'sea_level', 'exp', 'slope', 'site_type', 'veg_type', 'growth_area', \
                  'age', 'BL', 'FI', 'LH', 'NH', 'LA', 'KI', 'DG', 'SK', \
                  'mass_ha', 'mass_ha_lh', 'mass_ha_nh', 'mass_tot', 'mass_tot_lh', \
                  'mass_tot_nh', 'cut_area', 'cut_lh', 'cut_nh', 'cut_sum', \
                  'dr', 'bp', 'zp', 'ru', 'sg', 'ma']

In [None]:
wo_sap['year_fe'] = wo_sap['start_term'].str[-4:].astype(int)

In [None]:
wo_sap.to_csv('/home/philipp/Data/edin_diss/SAP_2019.csv')

In [None]:
wo_sap = pd.read_csv('/home/philipp/Data/edin_diss/SAP_2019.csv', index_col='Unnamed: 0')

## load SAP nutz

In [3]:
def get_data_sap_nutz():
    # set sap tax path directory
    path_sap_nutz_dir = '/home/philipp/Data/edin_diss/SAP_nutz'

    sap_nutz_list = []

    for fb in range(171,183):
        # create path to file
        path_sap_file = path_sap_nutz_dir + '/FB' + str(fb) + '_2007_2019.xlsx'
        print(path_sap_file)
        # read dat from file
        sap_nutz_list.append(pd.read_excel(path_sap_file))

    return pd.concat(sap_nutz_list, ignore_index=True)

In [4]:
sap_nutz = get_data_sap_nutz()

/home/philipp/Data/edin_diss/SAP_nutz/FB171_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB172_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB173_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB174_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB175_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB176_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB177_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB178_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB179_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB180_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB181_2007_2019.xlsx
/home/philipp/Data/edin_diss/SAP_nutz/FB182_2007_2019.xlsx


In [5]:
# fill nan in 'Ergebnis'
sap_nutz = sap_nutz.fillna('Ergebnis')
# filter subtotals
sap_nutz = sap_nutz[sap_nutz['planmäßig'] != 'Ergebnis']
# reset index
sap_nutz = sap_nutz.reset_index(drop=True)

In [6]:
sap_nutz

Unnamed: 0,Abmaßjahr,Waldort,Nutzungsart,planmäßig,Menge Efm
0,2007,#/171/1/710C1,Räumung,planmäßig,175.18
1,2007,#/171/1/740A0,Räumung,planmäßig,314.12
2,2007,#/171/3/146C1,Durchforstung,planmäßig,28.40
3,2007,#/171/4/421B0,Schäden andere,zufällig,25.20
4,2007,#/171/5/294G0,Schäden Wind,zufällig,8.39
...,...,...,...,...,...
340306,2019,1355/182/5/297O5,Schäden Käfer,zufällig,1.74
340307,2019,1355/182/5/297O5,Schäden Wind,zufällig,5.22
340308,2019,1355/182/5/297O6,Schäden Käfer,zufällig,1.74
340309,2019,1355/182/5/297O7,Schäden Käfer,zufällig,1.16


In [7]:
## add Waldort
ids = sap_nutz['Waldort'].str.split('/')
# clear Waldort
sap_nutz['Waldort'] = ids.str[1] + ids.str[2].str.zfill(2) + ids.str[3].str.zfill(5)

In [8]:
sap_nutz['Waldort']

0         17101710C1
1         17101740A0
2         17103146C1
3         17104421B0
4         17105294G0
             ...    
340306    18205297O5
340307    18205297O5
340308    18205297O6
340309    18205297O7
340310    18205521G1
Name: Waldort, Length: 340311, dtype: object

In [9]:
# group by WO (ID) and sum all values
sap_nutz = sap_nutz.groupby(['Abmaßjahr','Waldort']).sum().reset_index()

In [10]:
# rename columns
sap_nutz.columns = ['cut_year', 'WO', 'cut_volume']
# change data type
sap_nutz['cut_year'] = sap_nutz['cut_year'].astype(int)
sap_nutz['cut_volume'] = sap_nutz['cut_volume'].astype(int)

In [11]:
sap_nutz

Unnamed: 0,cut_year,WO,cut_volume
0,2007,17101506C1,7
1,2007,17101507B1,4
2,2007,17101508D1,736
3,2007,17101510A2,8
4,2007,17101510C1,477
...,...,...,...
259961,2019,18209366H1,29
259962,2019,18209366H2,2
259963,2019,18209367F1,27
259964,2019,18209367K1,37


In [None]:
###########################
# prepare data for merge
###########################

## Merge SAP & GIS data

In [50]:
wo_sap.head()

Unnamed: 0,WO,fb_sap,fr_sap,abt_sap,uabt_sap,teilfl_sap,start_term,uz,non_forest_type,economy,...,cut_lh,cut_nh,cut_sum,dr,bp,zp,ru,sg,ma,year_fe
0,17101648B1,171,1,648,B,1,01.01.2009,120,,I,...,,,,,,,,,,2009
1,17101652C1,171,1,652,C,1,01.01.2009,120,,I,...,100.0,0.0,100.0,2.0,1.0,2.0,30.0,1.0,DF,2009
2,17101736B4,171,1,736,B,4,01.01.2009,120,,I,...,,,,,,,,,,2009
3,17101658A1,171,1,658,A,1,01.01.2009,120,,I,...,200.0,0.0,200.0,2.0,1.0,2.0,10.0,1.0,DF,2009
4,17101506D1,171,1,506,D,1,01.01.2009,120,,I,...,,,,,,,,,,2009


In [51]:
wo_geo.head()

Unnamed: 0,fb,fr,abt,uabt,teilfl,admin,length,area,fly_date,year_fly,geometry,WO
0,171,6,-1,0,1,627,727.253895,10833.208608,26.06.2019,2019,"POLYGON ((599204.590 469767.385, 599201.570 46...",171060-101
1,171,6,-1,0,1,627,6502.341493,313967.596697,26.06.2019,2019,"POLYGON ((600106.511 472648.712, 600112.570 47...",171060-101
2,171,6,-1,0,1,627,78.122831,210.1562,26.06.2019,2019,"POLYGON ((600219.830 470593.835, 600239.880 47...",171060-101
3,171,6,-1,0,1,627,726.659414,20317.657233,26.06.2019,2019,"POLYGON ((598732.040 471838.245, 598724.480 47...",171060-101
4,171,6,-1,0,1,627,727.576909,21098.354447,26.06.2019,2019,"POLYGON ((599268.870 471089.755, 599254.750 47...",171060-101


In [52]:
wo_sap = wo_sap.fillna(0)

In [53]:
wo_sap.columns

Index(['WO', 'fb_sap', 'fr_sap', 'abt_sap', 'uabt_sap', 'teilfl_sap',
       'start_term', 'uz', 'non_forest_type', 'economy', 'ww_sw', 'sw_type',
       'sea_level', 'exp', 'slope', 'site_type', 'veg_type', 'growth_area',
       'age', 'BL', 'FI', 'LH', 'NH', 'LA', 'KI', 'DG', 'SK', 'mass_ha',
       'mass_ha_lh', 'mass_ha_nh', 'mass_tot', 'mass_tot_lh', 'mass_tot_nh',
       'cut_area', 'cut_lh', 'cut_nh', 'cut_sum', 'dr', 'bp', 'zp', 'ru', 'sg',
       'ma', 'year_fe'],
      dtype='object')

In [54]:
wo_sap.iloc[:,:20].head()

Unnamed: 0,WO,fb_sap,fr_sap,abt_sap,uabt_sap,teilfl_sap,start_term,uz,non_forest_type,economy,ww_sw,sw_type,sea_level,exp,slope,site_type,veg_type,growth_area,age,BL
0,17101648B1,171,1,648,B,1,01.01.2009,120,0.0,I,W,0,400,SO,27,87.0,BW,5.1,140.0,0.0
1,17101652C1,171,1,652,C,1,01.01.2009,120,0.0,I,W,0,400,SW,27,87.0,WW,5.1,100.0,0.0
2,17101736B4,171,1,736,B,4,01.01.2009,120,0.0,I,W,0,300,S,18,87.0,WW,5.1,5.0,0.0
3,17101658A1,171,1,658,A,1,01.01.2009,120,0.0,I,W,0,300,SW,18,87.0,WW,5.1,60.0,0.0
4,17101506D1,171,1,506,D,1,01.01.2009,120,0.0,I,W,0,300,NO,27,88.0,WW,5.1,135.0,0.0


In [55]:
wo_sap['non_forest_type'] = wo_sap['non_forest_type'].astype(int)
wo_sap['sea_level'] = wo_sap['sea_level'].astype(int)
wo_sap['slope'] = wo_sap['slope'].astype(int)
wo_sap['site_type'] = wo_sap['site_type'].astype(int)
wo_sap['growth_area'] = wo_sap['growth_area'].astype(str)

wo_sap['age'] = wo_sap['age'].astype(int)
wo_sap['BL'] = wo_sap['BL'].astype(int)
wo_sap['FI'] = wo_sap['FI'].astype(int)
wo_sap['LH'] = wo_sap['LH'].astype(int)
wo_sap['NH'] = wo_sap['NH'].astype(int)
wo_sap['LA'] = wo_sap['LA'].astype(int)
wo_sap['KI'] = wo_sap['KI'].astype(int)
wo_sap['DG'] = wo_sap['DG'].astype(int)
wo_sap['SK'] = wo_sap['SK'].astype(int)

wo_sap['cut_lh'] = wo_sap['cut_lh'].astype(int)
wo_sap['cut_nh'] = wo_sap['cut_nh'].astype(int)
wo_sap['cut_sum'] = wo_sap['cut_sum'].astype(int)
wo_sap['dr'] = wo_sap['dr'].astype(int)
wo_sap['bp'] = wo_sap['bp'].astype(int)
wo_sap['zp'] = wo_sap['zp'].astype(int)
wo_sap['ru'] = wo_sap['ru'].astype(int)
wo_sap['sg'] = wo_sap['sg'].astype(int)
wo_sap['year_fe'] = wo_sap['year_fe'].astype(int)

In [59]:
# merge GIS & SAP
wo = wo_geo.merge(wo_sap, on='WO', how="outer")

In [60]:
wo.to_file('/home/philipp/Data/edin_diss/GIS_wo/wo_2019.shp')

In [58]:
wo_geo.columns

Index(['fb', 'fr', 'abt', 'uabt', 'teilfl', 'admin', 'length', 'area',
       'fly_date', 'year_fly', 'geometry', 'WO'],
      dtype='object')

In [None]:
#####################################################################

## calculating cut in between year_fe and year_fly

In [12]:
wo = geopandas.read_file('/home/philipp/Data/edin_diss/GIS_wo/wo_2018.shp')

In [13]:
wo.columns

Index(['fb', 'fr', 'abt', 'uabt', 'teilfl', 'admin', 'length', 'area',
       'year_fly', 'WO', 'fb_sap', 'fr_sap', 'abt_sap', 'uabt_sap',
       'teilfl_sap', 'start_term', 'uz', 'non_forest', 'economy', 'ww_sw',
       'sw_type', 'sea_level', 'exp', 'slope', 'site_type', 'veg_type',
       'Wuchsgebie', 'mass_ha', 'mass_ha_lh', 'mass_ha_nh', 'mass_tot',
       'mass_tot_l', 'mass_tot_n', 'cut_area', 'cut_lh', 'cut_nh', 'cut_sum',
       'dr', 'bp', 'zp', 'ru', 'sg', 'ma', 'year_fe', 'geometry'],
      dtype='object')

In [14]:
wo[['year_fe', 'year_fly']]

Unnamed: 0,year_fe,year_fly
0,2009,2018
1,2009,2018
2,2009,2018
3,2009,2018
4,2009,2018
...,...,...
361702,2015,2013
361703,2015,2013
361704,2015,2013
361705,2015,2013


In [15]:
wo_data = wo.copy()

In [16]:
wo_data

Unnamed: 0,fb,fr,abt,uabt,teilfl,admin,length,area,year_fly,WO,...,cut_nh,cut_sum,dr,bp,zp,ru,sg,ma,year_fe,geometry
0,171,1,506,3,1,529,2402.546849,11624.655500,2018,1710150631,...,0,0,0,0,0,0,0,0,2009,"POLYGON ((617139.640 493799.045, 617151.260 49..."
1,171,1,506,9,2,529,381.617564,3111.435375,2018,1710150692,...,0,0,0,0,0,0,0,0,2009,"POLYGON ((616697.800 493322.945, 616686.870 49..."
2,171,1,506,A,0,529,1367.653182,93101.347400,2018,17101506A0,...,150,500,2,1,2,30,1,DF,2009,"POLYGON ((617710.960 493854.385, 617742.710 49..."
3,171,1,506,B,1,529,241.038183,611.014350,2018,17101506B1,...,0,0,0,0,0,0,0,0,2009,"POLYGON ((617347.760 493783.145, 617356.300 49..."
4,171,1,506,B,1,529,723.923359,11906.891550,2018,17101506B1,...,0,0,0,0,0,0,0,0,2009,"POLYGON ((617454.430 493486.115, 617421.800 49..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361702,177,2,150,K,3,586,870.510273,15996.235168,2013,17702150K3,...,0,0,0,0,0,0,0,0,2015,"POLYGON ((391074.570 334679.455, 391058.580 33..."
361703,177,2,150,K,4,586,368.791677,3216.480581,2013,17702150K4,...,0,0,0,0,0,0,0,0,2015,"POLYGON ((391281.580 334929.315, 391284.149 33..."
361704,177,2,150,M,0,586,420.411756,8861.101587,2013,17702150M0,...,0,0,0,0,0,0,0,0,2015,"POLYGON ((391203.790 334595.855, 391193.606 33..."
361705,177,2,150,M,0,586,567.951391,15139.044672,2013,17702150M0,...,0,0,0,0,0,0,0,0,2015,"POLYGON ((391038.461 334407.163, 391030.150 33..."


In [17]:
np_nutz = sap_nutz.values

In [18]:
np_wo = wo_data[['WO','year_fe', 'year_fly', 'area', 'admin']].values

In [19]:
np_wo

array([['1710150631', 2009, 2018, 11624.6554996582, 529],
       ['1710150692', 2009, 2018, 3111.4353746856077, 529],
       ['17101506A0', 2009, 2018, 93101.34739962808, 529],
       ...,
       ['17702150M0', 2015, 2013, 8861.101586502582, 586],
       ['17702150M0', 2015, 2013, 15139.0446723904, 586],
       ['1770703574', 2017, 2013, 10201.78817499057, 610]], dtype=object)

In [20]:
np_nutz

array([[2007, '17101506C1', 7],
       [2007, '17101507B1', 4],
       [2007, '17101508D1', 736],
       ...,
       [2019, '18209367F1', 27],
       [2019, '18209367K1', 37],
       [2019, '18209368A1', 224]], dtype=object)

In [22]:
## filtering and taking the sum of cuts 
## between year_fe and year_photo
## for every wo

#%%time

tenth = np_wo.shape[0] // 20
state = np_wo.shape[0] // 20
state_per = 5

for i in range(np_wo.shape[0]):
    if i == state:
        print('{}%'.format(state_per))
        state_per += 5
        state += tenth
        
    # mask WO
    mask_wo = np_nutz[:,1] == np_wo[i, 0]
    # mask earlier than fe year
    mask_fe = np_nutz[:,0] >= np_wo[i, 1]
    # mask later than photo year
    mask_photo = np_nutz[:,0] < np_wo[i, 2]
    # create final mask
    mask = mask_wo * mask_fe * mask_photo
    # filter array with mask
    cut = np_nutz[mask,-1].sum()
    np_wo[i, -1] = cut

5%
10%
15%
20%
25%
30%
35%
40%
45%
50%
55%
60%
65%
70%
75%
80%
85%
90%
95%
100%


In [23]:
wo_cut = pd.DataFrame(np_wo, columns=['WO', 'year_fe', 'year_photo', 'area', 'cut'])

In [24]:
wo_cut

Unnamed: 0,WO,year_fe,year_photo,area,cut
0,1710150631,2009,2018,11624.7,0
1,1710150692,2009,2018,3111.44,0
2,17101506A0,2009,2018,93101.3,712
3,17101506B1,2009,2018,611.014,0
4,17101506B1,2009,2018,11906.9,0
...,...,...,...,...,...
361702,17702150K3,2015,2013,15996.2,0
361703,17702150K4,2015,2013,3216.48,0
361704,17702150M0,2015,2013,8861.1,0
361705,17702150M0,2015,2013,15139,0


In [25]:
wo_cut.to_csv('/home/philipp/Data/edin_diss/SAP_nutz/cut_till_2018.csv')

## merge wo_2018 & cut_2018

In [159]:
wo_cut = pd.read_csv('/home/philipp/Data/edin_diss/SAP_nutz/cut_till_2018.csv')

In [28]:
wo = wo.merge(wo_cut, on='WO')

In [40]:
wo = wo.drop(['year_fe_y', 'year_photo', 'area_y'], axis=1)

In [42]:
wo['cut'] = wo['cut'].astype(int)

In [45]:
wo.to_file('/home/philipp/Data/edin_diss/GIS_wo/wo_2018.shp')

In [None]:
##############################################################################

In [155]:
wo['Vorrat am Ort'] = wo['Vorrat am Ort'].fillna(0)

In [171]:
wo.loc[(wo['Vorrat am Ort'] == 0) & (wo['cut'] != 0), ['WO', 'Nutzdringlichkeit', 'Vorrat am Ort', 'cut']]

Unnamed: 0,WO,Nutzdringlichkeit,Vorrat am Ort,cut
23,17101506D2,2.0,0.0,55.4
24,17101506D2,2.0,0.0,55.4
25,17101506D2,2.0,0.0,55.4
26,17101506D2,2.0,0.0,55.4
477,17101533E1,,0.0,320.36
...,...,...,...,...
907637,18207532B2,,0.0,62.9
907638,18207532B2,,0.0,62.9
907639,18207532B2,,0.0,62.9
907640,18207532B2,,0.0,62.9


In [174]:
wo.columns = ['OBJECTID', 'fb', 'fr', 'abt', 'uabt',
       'TEILFLAECH', 'FARBCODE', 'LINKID', 'ID', 'VERWALTUNG', 'Erstellung',
       'Datenaktua', 'SHAPE_Leng', 'SHAPE_Area', 'FLUGJAHR', 'geometry', 'WO',
       'Unnamed', 'Forstbetrieb', 'Forstrevier', 'Abteilung',
       'Unterabteil', 'Teilfl', 'Beg_Laufzeit', 'Umtriebszeit',
       'Nebengrund_Art', 'Ertragssituation', 'Bewirtschaftungsform',
       'Schutzwaldkategorie', 'Seehoehe', 'Exposition', 'Neigung',
       'Standorteinheit', 'Vegetationstyp', 'Wuchsgebiet', 'Vorrat_ha',
       'Laubholzvorrat_ha', 'Nadelholzvorrat_ha', 'Vorrat_Ort',
       'Laubholzvorrat_Ort', 'Nadelholzvorrat_Ort', 'Angriffsflaeche',
       'Nutzung LH', 'Nutzung NH', 'Nutzungssumme', 'Nutzdringlichkeit',
       'Bewpfl', 'Zeitpunkt', 'Rueckungsart', 'Schlaegerungsart', 'FE_year',
       'year_fe', 'year_photo', 'area', 'cut']

In [175]:
wo.to_file('/home/philipp/Data/edin_diss/test_wo.shp')

In [158]:
wo['v_out'] = wo['cut'] / wo['Vorrat am Ort']

ZeroDivisionError: float division by zero

In [157]:
wo[['WO','Vorrat am Ort', 'cut']]

Unnamed: 0,WO,Vorrat am Ort,cut
0,1710150631,0.00,0
1,1710150692,0.00,0
2,17101506A0,3083.47,712.67
3,17101506B1,295.88,0
4,17101506B1,295.88,0
...,...,...,...
927706,17702150M0,1181.57,0
927707,17702150M0,1181.57,0
927708,17702150M0,1181.57,0
927709,17702150M0,1181.57,0


In [131]:
mask1 = np_nutz[:,1] == '17101506C1'

In [132]:
mask2 = np_nutz[:,0] >= 2009

In [133]:
mask3 = np_nutz[:,0] < 2018

In [134]:
mask = mask1*mask2*mask3

In [135]:
mask

array([False, False, False, ..., False, False, False])

In [139]:
np_nutz[mask,-1].sum()

24.05

In [87]:
wo_data

Unnamed: 0,OBJECTID,FORSTBETRI,REVIER_NR,ABTEILUNG,UNTERABTEI,TEILFLAECH,FARBCODE,LINKID,ID,VERWALTUNG,...,Angriffsfläche,Nutzung LH,Nutzung NH,Nutzungssumme,Nutzdringlichkeit,Bewpfl.,Zeitpunkt,Rückungsart,Schlägerungsart,FE_year
0,5.0,171,1,506,3,1,A,{E77BB096-11C9-4939-9331-B4B305C4096A},1450402,529,...,,,,,,,,,,2009
1,6.0,171,1,506,9,2,A,{99BC61C0-CE00-48BB-9140-A0C6D3EB5EA1},1450449,529,...,,,,,,,,,,2009
2,7.0,171,1,506,A,0,A,{D5C7009D-5ED1-44F3-9A10-2EA9AC58094E},1450404,529,...,9.3,350.0,150.0,500.0,2.0,1.0,2.0,30.0,1.0,2009
3,8.0,171,1,506,B,1,A,{E3993FFC-AC51-468D-9B23-A733AF0079C9},1450511,529,...,,,,,,,,,,2009
4,9.0,171,1,506,B,1,A,{EA311B6D-6915-49CE-B17A-43499CA9077D},1450405,529,...,,,,,,,,,,2009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370528,191316.0,177,2,150,K,3,11000000000,{8AFA0E7C-F3B3-4A29-9D07-0256BEF8009B},1618552,586,...,,,,,,,,,,2015
370529,191317.0,177,2,150,K,4,11000000000,{E4CF5974-C830-4C9A-8314-AE15DA5A8F49},1618557,586,...,,,,,,,,,,2015
370530,191318.0,177,2,150,M,0,48010030300,{C0791AFB-6FD1-419F-B5C9-B937A816768C},1618564,586,...,,,,,,,,,,2015
370531,191319.0,177,2,150,M,0,48010030300,{AF31F1D8-4666-4DC0-958B-86E49406698F},1618574,586,...,,,,,,,,,,2015


## create code 'typ' for ground truth

In [42]:
wo.head()

Unnamed: 0,fb,fr,abt,uabt,teilfl,admin,length,area,year_fly,geometry,...,cut_lh,cut_nh,cut_sum,dr,bp,zp,ru,sg,ma,year_fe
0,171,1,506,3,1,529,2402.546849,11624.6555,2018,"POLYGON ((617139.640 493799.045, 617151.260 49...",...,0,0,0,0,0,0,0,0,0,2009
1,171,1,506,9,2,529,381.617564,3111.435375,2018,"POLYGON ((616697.800 493322.945, 616686.870 49...",...,0,0,0,0,0,0,0,0,0,2009
2,171,1,506,A,0,529,1367.653182,93101.3474,2018,"POLYGON ((617710.960 493854.385, 617742.710 49...",...,350,150,500,2,1,2,30,1,DF,2009
3,171,1,506,B,1,529,241.038183,611.01435,2018,"POLYGON ((617347.760 493783.145, 617356.300 49...",...,0,0,0,0,0,0,0,0,0,2009
4,171,1,506,B,1,529,723.923359,11906.89155,2018,"POLYGON ((617454.430 493486.115, 617421.800 49...",...,0,0,0,0,0,0,0,0,0,2009


In [53]:
wo.columns

Index(['fb', 'fr', 'abt', 'uabt', 'teilfl', 'admin', 'length', 'area',
       'year_fly', 'geometry', 'WO', 'fb_sap', 'fr_sap', 'abt_sap', 'uabt_sap',
       'teilfl_sap', 'start_term', 'uz', 'non_forest_type', 'economy', 'ww_sw',
       'sw_type', 'sea_level', 'exp', 'slope', 'site_type', 'veg_type',
       'Wuchsgebiet', 'mass_ha', 'mass_ha_lh', 'mass_ha_nh', 'mass_tot',
       'mass_tot_lh', 'mass_tot_nh', 'cut_area', 'cut_lh', 'cut_nh', 'cut_sum',
       'dr', 'bp', 'zp', 'ru', 'sg', 'ma', 'year_fe', 'typ'],
      dtype='object')

In [58]:
conditions = [
    (wo['non_forest_type'] == 1) | (wo['non_forest_type'] == 4) | (wo['non_forest_type'] == 5) | (wo['non_forest_type'] == 6),
    (wo['non_forest_type'] == 2),
    (wo['non_forest_type'] == 3),
    (wo['non_forest_type'] == 7),
    (wo['non_forest_type'] == 8) | (wo['non_forest_type'] == 9),
    (wo['mass_ha_nh'] == 0) & (wo['mass_ha_lh'] == 0),
    (wo['mass_ha_nh'] >= wo['mass_ha_lh']), 
    (wo['mass_ha_nh'] < wo['mass_ha_lh'])]

choices = [3, 4, 5, 6, 7, 0, 1, 2]

wo['typ'] = np.select(conditions, choices, default=np.nan)

In [59]:
wo['typ'].unique()

array([5., 7., 2., 0., 1., 6., 3., 4.])

In [62]:
wo[wo['non_forest_type']==8].shape

(25838, 46)

In [61]:
wo.shape

(361707, 46)

In [66]:
wo[wo['typ']==4].shape

(4188, 46)

In [68]:
wo['typ'] = wo['typ'].astype(int)

In [69]:
# create geo-dataframe
wo_typ = wo[['typ', 'geometry']]

In [70]:
wo_typ

Unnamed: 0,typ,geometry
0,5,"POLYGON ((617139.640 493799.045, 617151.260 49..."
1,7,"POLYGON ((616697.800 493322.945, 616686.870 49..."
2,2,"POLYGON ((617710.960 493854.385, 617742.710 49..."
3,2,"POLYGON ((617347.760 493783.145, 617356.300 49..."
4,2,"POLYGON ((617454.430 493486.115, 617421.800 49..."
...,...,...
361702,2,"POLYGON ((391074.570 334679.455, 391058.580 33..."
361703,0,"POLYGON ((391281.580 334929.315, 391284.149 33..."
361704,1,"POLYGON ((391203.790 334595.855, 391193.606 33..."
361705,1,"POLYGON ((391038.461 334407.163, 391030.150 33..."


In [71]:
# save to file
wo_typ.to_file("/home/philipp/Data/edin_diss/GIS_ground_truth/typ_2018/gis_typ_wwie_2018.shp")