In [8]:
import pandas as pd
import numpy as np

In [9]:
path = '/home/philipp/Data/obf_autman/xlsx/'
data = pd.read_excel(path + 'Manuale_Dross_Abt.53_110.xlsx')
data['WO'] = data['WO'].str.replace(' ', '')

In [10]:
# logging of training

from datetime import datetime

class Log(object):

    def __init__(self, name):
        self.exp_name = name

    def start_log(self, typ, mode='a'):
        '''
            start log
        '''
        with open("log/{}.txt".format(self.exp_name), mode=mode, encoding='utf-8') as f:
            f.write('#' * 80 + '\n')
            f.write('#' * 29 + '   ' + typ + '   ' + '#' * 29 + '\n')
            f.write('#' * 80 + '\n')
            f.write('\n')


    def add_log(self, text):
        '''
            save log
        '''
        with open("log/{}.txt".format(self.exp_name), mode='a', encoding='utf-8') as f:
            f.write(text + '\n')


In [14]:
class Check(object):

    def __init__(self, data):
        
        self.data = data
        
        self.logging = Log('log')
        self.logging.start_log('CHECK', mode='w')

        self.chk_type = ['#U', '#W', '#STOE', '#VTYP', '#VB', '#VG', '#UENH', '#UELH', '#BZ', '#PZ', '#ST',\
                         '#BE', '#MA', '#SWE', '#LRV', '#WO', '#N']

        # wo
        self.chk_u = [80, 100, 120, 140, 160, 200]

        self.chk_w = ['WI', 'WA', 'SSI', 'SSA', 'OSI', 'OSA', 'BSI', 'BSA']

        self.chk_stoe = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 21, 22, 23,\
         25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 41, 42, 43, 44, 51, 52, 53, 54,\
         55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 71, 72, 73, 74, 75, 76, 81, 82, 83,\
         84, 85, 86, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98]

        self.chk_vtyp = []

        self.chk_vg = [0, 1, 2, 3, 4, 5]
        self.chk_swe = ['grün', 'gelb', 'rot', 'Grün', 'Gelb', 'Rot']

        # bz
        self.chk_ba = ["AS", "AZ", "AH", "PM", "BI", "FB", "BL", "AC", "DG", "EE",\
         "EK", "EB", "EI", "EL", "ES", "EA", "FA", "FI", "FE", "FZ", "GK", "GB", "WP",\
         "GE", "AG", "AV", "HB", "HT", "HP", "JL", "CJ", "KK", "KB", "KO", "LA", "LI",\
         "ME", "AN", "FO", "PO", "AB", "RO", "RK", "BU", "RE", "SW", "CH", "ER", "SK",\
         "JN", "SP", "SF", "LS", "SL", "SN", "SG", "PU", "SA", "QR", "ST", "KW", "TH",\
         "QP", "TK", "TB", "UL", "NU", "WD", "KI", "TA", "WO", "LW", "EZ", "ZI", "SL",\
         "SN"]

        # nutz
        self.chk_nutz_ma = ['AD', 'AE', 'AF', 'AG', 'AS', 'BU', 'DE', 'DF', 'DP',\
         'EG', 'FM', 'JF', 'JP', 'KE', 'KF', 'KH', 'LI', 'LL', 'NB', 'ND', 'PA',\
         'PL', 'RM', 'SB', 'TR', 'UE', 'ZE', 'ZN', 'ZV']

        self.chk_nutz_dring = [1, 2, 3, 4, 5, 6]
        self.chk_nutz_bewil = [1, 2, 3]
        self.chk_nutz_zeitp = [1, 2, 3]
        self.chk_nutz_schlg = [0, 1, 2, 3, 4, 5, 6]
        self.chk_nutz_rueck = [0, 10, 23, 26, 29, 30, 31, 35, 36, 38, 40, 90]
    
    
    # check Nutzungen
    def check_bz(self):
        
        self.logging.start_log('BZ')
        # filter dataset
        data_temp = data.loc[~pd.isna(data['BZ']),['WO', 'BZ']].copy()
        
        for idx, inst in data_temp.iterrows():
            self.check_bz_instance(inst)
            

    # check Bestockungsziele
    def check_bz_instance(self, inst):
        '''
        Checks Bestockungsziel (bz) for validity and returns the splited data.
        '''
        nums = []
        bz_split = inst['BZ'][2:].split()

        for bz_inst in bz_split:
            try:
                # -2: -> because BZ is included in BZ-instance
                if bz_inst[-2:] in self.chk_ba:
                    nums.append(int(bz_inst[:-2]))
                else:
                    self.logging.add_log('Not a valid tree species - {} - {}'.format(bz_inst[-2:], inst['WO']))

            except:
                self.logging.add_log('Input not valid - {} - {}'.format(bz_inst[-2:], inst['WO']))

        if sum(nums) != 10:
            self.logging.add_log('BZ does not add up to 10 - {} - {}'.format(bz_inst[-2:], inst['WO']))
    
    
    # check Nutzungen
    def check_nutz(self):
        
        self.logging.start_log('Nutzung')
        
        # get number of max measures (Nutzungen) per unit (Bestand)
        loops = int(self.data.columns[-2][5:])
        
        for i in range(loops):
            # create columns
            col = ['WO', 'S_{}'.format(i), 'MA_{}'.format(i), 'Fl_{}'.format(i), 'LH_{}'.format(i),\
                   'NH_{}'.format(i), 'Dring_{}'.format(i), 'Zeit_{}'.format(i), 'Bew_{}'.format(i),\
                   'Sch_{}'.format(i), 'Rueck_{}'.format(i), 'Text_{}'.format(i)]
            
            # filter dataset
            data_temp = data.loc[~pd.isna(data['MA_{}'.format(i)]),col].copy()
            # remove suffix in column names 
            data_temp.columns = ['WO', 'S', 'MA', 'Fl', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch',\
                                 'Rueck', 'Text']
            
            for idx, inst in data_temp.iterrows():
                self.check_nutz_instance(inst)

            
    # check instance in Nutzungen
    def check_nutz_instance(self, inst):
        
        # MA
        if inst['MA'] not in self.chk_nutz_ma:
            self.logging.add_log('MA not valid{} - {}'.format(inst['MA'], inst['WO']))

        # Area
        try:
            if not isinstance(inst['Fl'], float):
                if isinstance(inst['Fl'], int):
                    float(inst['Fl'])
                elif inst['Fl'].find(',') >= 0:
                    float(inst['Fl'].replace(',','.'))
                else:
                    float(inst['Fl'])
        except:
            add_log(self, text)       
            self.logging.add_log('Fl not valid {} - {}'.format(inst['Fl'], inst['WO']))

        # LH
        try:
            int(inst['LH'])
        except:
            self.logging.add_log('LH not valid - {}'.format(inst['WO']))

        # NH
        try:
            int(inst['NH'])
        except:
            self.logging.add_log('NH not valid - {}'.format(inst['WO']))

        # Dring
        try:
            if int(inst['Dring']) not in self.chk_nutz_dring:
                self.logging.add_log('Dring not valid - {} - {}'.format(int(inst['Dring']), inst['WO']))
        except:
            self.logging.add_log('Dring not valid - {}'.format(inst['WO']))

        # Zeit
        try:
            if int(inst['Zeit']) not in self.chk_nutz_zeitp:
                self.logging.add_log('Zeit not valid - {} - {}'.format(int(inst['Zeit']), inst['WO']))
        except:
            self.logging.add_log('Zeit not valid - {}'.format(inst['WO']))

        # Bew
        try:
            if int(inst['Bew']) not in self.chk_nutz_bewil:
                self.logging.add_log('Bew not valid - {} - {}'.format(int(inst['Bew']), inst['WO']))
        except:
            self.logging.add_log('Bew not valid - {}'.format(inst['WO']))

        # Sch
        try:
            if int(inst['Sch']) not in self.chk_nutz_schlg:
                self.logging.add_log('Sch not valid - {} - {}'.format(int(inst['Sch']), inst['WO']))
        except:
            self.logging.add_log('Sch not valid - {}'.format(inst['WO']))

        # Rueck
        try:
            if int(inst['Rueck']) not in self.chk_nutz_rueck:
                self.logging.add_log('Rueck not valid')
        except:
            self.logging.add_log('Rueck not valid')

In [15]:
check = Check(data)

In [16]:
check.check_bz()

WO         054K1
BZ    BZ 6SL 4KI
Name: 77, dtype: object
WO             054K2
BZ    BZ 2LA 5KI 3SL
Name: 78, dtype: object
WO             054L2
BZ    BZ 2FI 5SL 3KI
Name: 80, dtype: object
WO             054L3
BZ    BZ 3KI 5QP 2SL
Name: 81, dtype: object
WO             054X0
BZ    BZ 5QP 2HB 3KI
Name: 95, dtype: object
WO         055A3
BZ    BZ 8QP 2SL
Name: 98, dtype: object
WO             055B1
BZ    BZ 4KI 4QP 2SL
Name: 99, dtype: object
WO         055C1
BZ    BZ 7QP 3SL
Name: 101, dtype: object
WO         057A0
BZ    BZ 7DG 3SL
Name: 103, dtype: object
WO         057D0
BZ    BZ 7EI 3SL
Name: 108, dtype: object
WO             058A1
BZ    BZ 6FI 2TA 2SL
Name: 114, dtype: object
WO             058A2
BZ    BZ 5FI 3TA 2SL
Name: 115, dtype: object
WO         058A4
BZ    BZ 7DG 3SL
Name: 117, dtype: object
WO             058B3
BZ    BZ 7DG 1SN 2SL
Name: 122, dtype: object
WO             058C1
BZ    BZ 6DG 2SL 2TA
Name: 123, dtype: object
WO         058C2
BZ    BZ 8DG 2SL
Name: 124, dtype

In [7]:
check.check_nutz()

In [19]:
data

Unnamed: 0,Seite,Check,WO,UZ,W,STOE,VTYP,VB,UENH,UELH,...,Fl_2,LH_2,NH_2,Dring_2,Zeit_2,Bew_2,Sch_2,Rueck_2,Text_2,End
0,0,pass,053A1,,,,,,,,...,,,,,,,,,,
1,1,pass,053A2,,,,,,,,...,,,,,,,,,,
2,2,pass,053A3,,,,,,,,...,,,,,,,,,,
3,3,pass,053A4,,,,,,,,...,,,,,,,,,,
4,4,pass,053B1,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,877,pass,110C0,,,,,,,,...,,,,,,,,,,
878,878,pass,110D0,,,,,,,,...,,,,,,,,,,
879,879,pass,110E0,,,,,,,,...,,,,,,,,,,
880,880,pass,110F0,,,,,,,,...,,,,,,,,,,


In [92]:
int(data.columns[-2][5:])

2

In [74]:
data_temp = data.loc[~pd.isna(data['MA_0']),['WO', 'S_0', 'MA_0', 'Fl_0', 'LH_0', 'NH_0', 'Dring_0',
 'Zeit_0', 'Bew_0', 'Sch_0', 'Rueck_0', 'Text_0']].copy()

data_temp.columns = ['WO', 'S', 'MA', 'Fl', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch', 'Rueck', 'Text']

In [87]:
for idx, inst in data_temp.iterrows():
    
    # MA
    if inst['MA'] not in check.chk_nutz_ma:
        print('log: MA not valid')
    
    # Area
    try:
        if not isinstance(inst['Fl'], float):
            if isinstance(inst['Fl'], int):
                float(inst['Fl'])
            elif inst['Fl'].find(',') >= 0:
                float(inst['Fl'].replace(',','.'))
            else:
                float(inst['Fl'])
    except:
        print('log: Fl not valid {} - {}'.format(inst['Fl'], inst['WO']))
    
    # LH
    try:
        int(inst['LH'])
    except:
        print('log: LH not valid - {}'.format(inst['WO']))
    
    # NH
    try:
        int(inst['NH'])
    except:
        print('log: NH not valid - {}'.format(inst['WO']))
        
    # Dring
    try:
        if int(inst['Dring']) not in check.chk_nutz_dring:
            print('log: Dring not valid - {} - {}'.format(int(inst['Dring']), inst['WO']))
    except:
        print('log: Dring not valid - {}'.format(inst['WO']))
        
    # Zeit
    try:
        if int(inst['Zeit']) not in check.chk_nutz_zeitp:
            print('log: Zeit not valid - {} - {}'.format(int(inst['Zeit']), inst['WO']))
    except:
        print('log: Zeit not valid - {}'.format(inst['WO']))
        
    # Bew
    try:
        if int(inst['Bew']) not in check.chk_nutz_bewil:
            print('log: Bew not valid - {} - {}'.format(int(inst['Bew']), inst['WO']))
    except:
        print('log: Bew not valid - {}'.format(inst['WO']))
        
    # Sch
    try:
        if int(inst['Sch']) not in check.chk_nutz_schlg:
            print('log: Sch not valid - {} - {}'.format(int(inst['Sch']), inst['WO']))
    except:
        print('log: Sch not valid - {}'.format(inst['WO']))
        
    # Rueck
    try:
        if int(inst['Rueck']) not in check.chk_nutz_rueck:
            print('log: Rueck not valid')
    except:
        print('log: Rueck not valid')

log: Sch not valid - 31 - 055C1
log: Sch not valid - 31 - 068J0
log: Bew not valid - 23 - 105A1
log: Sch not valid - 23 - 105A1


In [82]:
x = data_temp.loc[data_temp['WO'] == '109M0', 'Fl'].values[0]

In [86]:
if not isinstance(x, float):
    if x.find(',') >= 0:
        print(1)
    else:
        print(type(x))

AttributeError: 'int' object has no attribute 'find'

In [63]:
i = '3,3'
if i.find(',') >= 0:
    print(float(i.replace(',','.')))
else:
    print(float(i))

3.3


In [44]:
check.chk_nutz_schlg

[1, 2, 3, 4, 5, 6]

In [76]:
data_temp[data_temp['WO'] == '109M0']

Unnamed: 0,WO,S,MA,Fl,LH,NH,Dring,Zeit,Bew,Sch,Rueck,Text
870,109M0,1.0,SB,3,0.0,150.0,2.0,1.0,2.0,1.0,26.0,


In [43]:
data_temp.loc[:,'Sch_0'].astype(int)

0      4
2      4
4      4
5      4
6      4
      ..
863    4
866    2
869    1
870    1
874    1
Name: Sch_0, Length: 596, dtype: int64

In [None]:
# MA
S_0                                                      1.0
MA_0       str - check_ma
Fl_0       float - is_float
LH_0       int - is_int
NH_0       int - is_int
Dring_0    int - [1, 2, 3]
Zeit_0     int - [1, 2, 3]
Bew_0      int - [1, 2, 3]
Sch_0      int - [1, 2, 3, 4, 5, 6]
Rueck_0    int - [1, 2, 3]
Text_0     str

In [7]:
i = 0
['S_{}'.format(i), 'MA_{}'.format(i), 'Fl_{}'.format(i), 'LH_{}'.format(i), 'NH_{}'.format(i), \
'Dring_{}'.format(i), 'Zeit_{}'.format(i), 'Bew_{}'.format(i), 'Sch_{}'.format(i), 'Rueck_{}'.format(i),\
'Text_{}'.format(i)]

['S_0',
 'MA_0',
 'Fl_0',
 'LH_0',
 'NH_0',
 'Dring_0',
 'Zeit_0',
 'Bew_0',
 'Sch_0',
 'Rueck_0',
 'Text_0']

In [5]:
data.columns

Index(['Seite', 'Check', 'WO', 'UZ', 'W', 'STOE', 'VTYP', 'VB', 'UENH', 'UELH',
       'X', 'BZ', 'PZ', 'ST', 'BE', 'MA', 'SWE', 'LRV', 'XX', 'S_0', 'MA_0',
       'Fl_0', 'LH_0', 'NH_0', 'Dring_0', 'Zeit_0', 'Bew_0', 'Sch_0',
       'Rueck_0', 'Text_0', 'S_1', 'MA_1', 'Fl_1', 'LH_1', 'NH_1', 'Dring_1',
       'Zeit_1', 'Bew_1', 'Sch_1', 'Rueck_1', 'Text_1', 'S_2', 'MA_2', 'Fl_2',
       'LH_2', 'NH_2', 'Dring_2', 'Zeit_2', 'Bew_2', 'Sch_2', 'Rueck_2',
       'Text_2', 'End'],
      dtype='object')

In [8]:
index = data['WO'].values
col = ['WO', 'MA', 'Fl', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch', 'Rueck', 'Text']

In [9]:
# create dataframe with wo
df = pd.DataFrame(index=index, columns=col)
df = df.fillna(0) # with 0s rather than NaNs

In [10]:
df


Unnamed: 0,WO,MA,Fl,LH,NH,Dring,Zeit,Bew,Sch,Rueck,Text
053A1,0,0,0,0,0,0,0,0,0,0,0
053A2,0,0,0,0,0,0,0,0,0,0,0
053A3,0,0,0,0,0,0,0,0,0,0,0
053A4,0,0,0,0,0,0,0,0,0,0,0
053B1,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
110C0,0,0,0,0,0,0,0,0,0,0,0
110D0,0,0,0,0,0,0,0,0,0,0,0
110E0,0,0,0,0,0,0,0,0,0,0,0
110F0,0,0,0,0,0,0,0,0,0,0,0


In [25]:
nutz = data[['S_0', 'MA_0', 'Fl_0', 'LH_0', 'NH_0', 'Dring_0', 'Zeit_0', 'Bew_0', 'Sch_0', 'Rueck_0', 'Text_0']]
col = ['S', 'MA', 'Fl', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch', 'Rueck', 'Text']
nutz.columns = col
nutz = nutz.set_index(index)
nutz = nutz.fillna(0)
nutz[['S', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch', 'Rueck']] = nutz[['S', 'LH', 'NH', 'Dring', 'Zeit', 'Bew', 'Sch', 'Rueck']].astype(int)

In [26]:
nutz

Unnamed: 0,S,MA,Fl,LH,NH,Dring,Zeit,Bew,Sch,Rueck,Text
053A1,1,LI,15,0,160,1,2,2,4,35,"TA fördern, FI entnehmen und Kronenpflege bei ..."
053A2,0,0,0,0,0,0,0,0,0,0,0
053A3,1,DF,117,0,70,3,1,2,4,35,0
053A4,0,0,0,0,0,0,0,0,0,0,0
053B1,1,PL,15,0,230,1,2,2,4,35,TA erhalten KI geringe Kronen entnehmen
...,...,...,...,...,...,...,...,...,...,...,...
110C0,0,0,0,0,0,0,0,0,0,0,0
110D0,0,0,0,0,0,0,0,0,0,0,0
110E0,0,0,0,0,0,0,0,0,0,0,0
110F0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
##############   Nutz   ##############

In [None]:
# wo
index = ['506A1', '506A2', '506A3', '506A4', '506B0', '506C0',]

# create dataframe with wo
df = pd.DataFrame(index=index, columns=chk_ba)
df = df.fillna(0) # with 0s rather than NaNs

In [None]:
##########################################

In [7]:

data.loc[~pd.isna(data['UZ']),'UZ']

20     U 140
71     U 120
95     U 140
99     U 200
161    U 160
162    U 160
273    U 140
307    U 120
381    U 140
382    U 140
383    U 140
399    U 160
404    U 120
414    U 140
483    U 200
490    U 200
535    U 120
556    U 120
558    U 120
583    U 200
664    U 140
712    U 140
759    U 140
763    U 160
791    U 200
793    U 160
798    U 160
800    U 140
804    U 140
809    U 200
835    U 140
840    U 120
865    U 200
869    U 140
Name: UZ, dtype: object

In [8]:
def clean(item, dtype, rmv=0):
    if dtype == 'int':
        return int(item[rmv:])
    else:
        return item[rmv:]

In [14]:
def check(typ, item):
    
    if typ == 'UZ':
        if item in check_u:
            return('pass')
        else:
            return('error')
        
    elif typ == 'W':
        if item in check_w:
            return('pass')
        else:
            return('error')
    
    elif typ == 'STOE':
        if item in check_stoe:
            return('pass')
        else:
            return('error')
        
    elif typ == 'VB':
        if item in [0,1,2,3,4,5]:
            return('pass')
        else:
            return('error')
        
    elif typ == 'UENH':
        if item >= 0:
            return('pass')
        else:
            return('error')
        
    elif typ == 'UELH':
        if item >= 0:
            return('pass')
        else:
            return('error')

In [15]:
col_dict = {'UZ':['int', 2], 'W': ['str', 2], 'STOE':['int', 5], 'VTYP':['str', 5], \
            'VB':['int', 2], 'UENH':['int', 5], 'UELH':['int', 5]}

In [16]:
for col, info in col_dict.items():
    print(col, info)
    
    # loop and check
    for ins in data.loc[~pd.isna(data[col]),col]:
        item = clean(ins, info[0], info[1])
        print(check(col, item))

UZ ['int', 2]
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
W ['str', 2]
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
STOE ['int', 5]
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
VTYP ['str', 5]
VB ['int', 2]
pass
pass
UENH ['int', 5]
UELH ['int', 5]
pass


In [22]:
# loop and check
for ins in data.loc[~pd.isna(data['UZ']),'UZ']:
    if clean(ins) in check_u:
        print(ins)
    else:
        print('error')

140
120
140
200
160
160
140
120
140
140
140
160
120
140
200
200
120
120
120
200
140
140
140
160
200
160
160
140
140
200
140
120
200
140


In [27]:
data.loc[~pd.isna(data['STOE']),'STOE']

95     STOE 72
108    STOE 72
117    STOE 72
141    STOE 72
144    STOE 72
256    STOE 72
414    STOE 72
483    STOE 12
501    STOE 04
507    STOE 04
646    STOE 72
763    STOE 61
765    STOE 72
863    STOE 72
Name: STOE, dtype: object

In [26]:
# loop and check
for ins in data.loc[~pd.isna(data['W']),'W']:
    if ins[2:] in check_w:
        print(ins[2:])
    else:
        print('error')

SSA
SSI
SSI
OSA
OSA
OSA
OSA
OSA
OSA
OSA
OSA
SSA
SSA
SSA
SSA
WI
SSI
WA
SSI
OSA
OSI
OSI
SSI
SSA
SSI
SSI
WI
WI
SSA
WI
SSA
SSI


In [29]:
# loop and check
for ins in data.loc[~pd.isna(data['STOE']),'STOE']:
    if int(ins[5:]) in check_stoe:
        print(ins[5:])
    else:
        print('error')

72
72
72
72
72
72
72
12
04
04
72
61
72
72


In [None]:
# WO
UZ         int - check_u
W          str - check_w
STOE       int - check_stoe
VTYP       str - # to do
VB         int - [0, 1, 2, 3, 4, 5]
UENH       int - is_int
UELH       int - is_int
X          NaN
BZ         # to do
PZ         # to do
ST         str
BE         str
MA         str
SWE                                                      NaN
LRV                                                      NaN
XX                                                       NaN

# MA
S_0                                                      1.0
MA_0       str - check_ma
Fl_0       float - is_float
LH_0       int - is_int
NH_0       int - is_int
Dring_0    int - [1, 2, 3]
Zeit_0     int - [1, 2, 3]
Bew_0      int - [1, 2, 3]
Sch_0      int - [1, 2, 3, 4, 5, 6]
Rueck_0    int - [1, 2, 3]
Text_0     str

In [1]:
bz = "8DG 2SL"

In [8]:
babz = x.split()

In [10]:
babz

['8DG', '2SL']

In [4]:
chk_ba = ["AS", "AZ", "AH", "PM", "BI", "FB", "BL", "AC", "DG", "EE",\
 "EK", "EB", "EI", "EL", "ES", "EA", "FA", "FI", "FE", "FZ", "GK", "GB", "WP",\
 "GE", "AG", "AV", "HB", "HT", "HP", "JL", "CJ", "KK", "KB", "KO", "LA", "LI",\
 "ME", "AN", "FO", "PO", "AB", "RO", "RK", "BU", "RE", "SW", "CH", "ER", "SK",\
 "JN", "SP", "SF", "LS", "SL", "SN", "SG", "PU", "SA", "QR", "ST", "KW", "TH",\
 "QP", "TK", "TB", "UL", "NU", "WD", "KI", "TA", "WO", "LW", "EZ", "ZI", "SL",\
 "SN"]

In [11]:
def check_bz(bz_str):
    '''
    Checks Bestockungsziel (bz) for validity and returns the splited data.
    '''
    nums = []
    bas = []
    
    bz_split = bz_str.split()
    
    for bz_inst in bz_split:
        try:
            if bz_inst[-2:] in chk_ba:
                nums.append(int(bz_inst[:-2]))
                bas.append(bz_inst[-2:])
            else:
                print('Not a valid tree species')

        except:
            print('Input not valid')
            
    if sum(nums) != 10:
        print('BZ does not add up to 10')
        
    return nums, bas

In [17]:
check_bz('2BU 6KI 2KB')

([2, 6, 2], ['BU', 'KI', 'KB'])