# Data Cleaning and Selection Patientendaten

## Imports and Read

In [1]:
import pandas as pd
import datetime
import numpy as np
import re

In [2]:
print("Start Clean and Preprocessing data patient")

Start Clean and Preprocessing data patient


In [3]:
df = pd.read_excel(r'../raw_data/Hypophysenpatienten.xlsx',sheet_name='no duplicate PID')

In [4]:
df.columns

Index(['%ID', 'Fall Nr.', 'Datum/Zeit', 'Modalität', 'Exam Code', 'Exam Name',
       'Abteilung', 'Arbeitsplatz.Kürzel', 'Aufnahmeart', 'PID', 'Grösse',
       'Ausfälle prä', 'Prolaktin', 'IGF1', 'Cortisol', 'fT4',
       'weiteres Labor', 'Qualität', 'ED', 'OP Datum', 'Ausfälle post',
       'Diagnose', 'Kategorie', 'Patient Alter', 'Zuweiser',
       'AnforderungDatum', 'ÜberweiserIntern.Bereich',
       'ÜberweiserIntern.Klinik'],
      dtype='object')

## Basic Cleaning, Column Selection, Anomaly Correction and Format definition

In [5]:
# define needed columns
column_list = ['PID',"Datum/Zeit","Arbeitsplatz.Kürzel",'Grösse',
       'Ausfälle prä', 'Qualität', 'ED','OP Datum', 'Ausfälle post',
       'Diagnose', 'Kategorie', 'Patient Alter',
       'Prolaktin',"IGF1", 'Cortisol','fT4','weiteres Labor']


### Data Type Definition


In [6]:
#TODO: check Tristan
# not parseable correct values corrected
df.loc[3,'ED'] = datetime.datetime(2006,1,1,0,0,0,0)
df.loc[12,'ED'] = datetime.datetime(2008,1,1,0,0,0,0)

#TODO: check Tristan
# correct a value which is not datetime parseable
df.loc[df['OP Datum'] == '2006, 2009', 'OP Datum'] = datetime.datetime(2006,1,1,0,0,0,0)

In [7]:
# make datetime values
df["Datum/Zeit"] = pd.to_datetime(df["Datum/Zeit"])
df["ED"] = pd.to_datetime(df["ED"])
df["OP Datum"] = pd.to_datetime(df["OP Datum"])

In [8]:
# TODO: anomaly? check tristan
# rows where Entry Date is after Operationdate?
df[df['OP Datum'] < df['ED']][['ED','OP Datum']]

Unnamed: 0,ED,OP Datum
20,2023-09-02,2009-02-18


### Impute NAN Lab Data

In [9]:
# TODO: set impute value
df.loc[df["Prolaktin"].isna(),'Prolaktin'] = '0'
df.loc[df["IGF1"].isna(),'IGF1'] = '0'
df.loc[df["Cortisol"].isna(),'Cortisol'] = '0'
df.loc[df["fT4"].isna(),'fT4'] = '0'


### Unit Conversion Lab Data

In [10]:
df["Prolaktin"].unique()

array(['0', '173mU/l', '743mU/l', '687mU/l', '7.8 ug/l'], dtype=object)

In [11]:
# get indices which need to be converted
indices_to_divide = df.loc[df["Prolaktin"].str.contains('ug/l'),'Prolaktin'].index 
# remove units and strings
df['Prolaktin'] = df['Prolaktin'].str.rstrip(r'mU/l')
df['Prolaktin'] = df['Prolaktin'].str.rstrip(r'ug/l')
df['Prolaktin'] = df['Prolaktin'].astype(float)
# ug/l -> mU/l (ug/l * 21.2)
df.loc[indices_to_divide,'Prolaktin'] = df.loc[indices_to_divide,'Prolaktin'] * 21.2


In [12]:
df["IGF1"].unique()

array(['0', '6.3nmol/l', '20.2nmol/l', '75.4ng/ml', '208 ng/ml'],
      dtype=object)

In [13]:
# get indices which need to be converted
indices_to_divide = df.loc[df["IGF1"].str.contains('ng/ml'),'IGF1'].index 
# remove units and strings
df['IGF1'] = df['IGF1'].str.rstrip(r'nmol/l')
df['IGF1'] = df['IGF1'].str.rstrip(r'ng/ml')
df['IGF1'] = df['IGF1'].astype(float)
# ng/ml -> nmol/l (ng/ml / 7.65)
df.loc[indices_to_divide,'IGF1'] = df.loc[indices_to_divide,'IGF1'] / 7.65


In [14]:
df["Cortisol"].unique()

array(['0', 329, 271, 110, '311 nmol/l'], dtype=object)

In [15]:
# # get indices which need to be converted
# indices_to_divide = df.loc[df["Cortisol"].str.contains('ng/ml'),'Cortisol'].index 
# # remove units and strings
# df['Cortisol'] = df['Cortisol'].str.rstrip(r'nmol/l')
# df['Cortisol'] = df['IGF1'].str.rstrip(r'ng/ml')
# df['Cortisol'] = df['Cortisol'].astype(float)
# # ng/ml -> nmol/l (ng/ml *  27.59)
# df.loc[indices_to_divide,'Cortisol'] = df.loc[indices_to_divide,'Cortisol'] / 7.65


In [16]:
df["fT4"].unique()

array(['0', 10.1, 8.4, 7.3, '14.6 pmol/l'], dtype=object)

In [17]:
# # get indices which need to be converted
# indices_to_divide = df.loc[df["fT4"].str.contains('ng/ml'),'fT4'].index 
# # remove units and strings
# df['fT4'] = df['fT4'].str.rstrip(r'nmol/l')
# df['fT4'] = df['fT4'].str.rstrip(r'ng/ml')
# df['fT4'] = df['fT4'].astype(float)
# # ng/ml -> nmol/l (ng/ml / 7.65)
# df.loc[indices_to_divide,'fT4'] = df.loc[indices_to_divide,'fT4'] / 7.65


In [18]:
# Patient ID Duplicate Check
assert len(df[df["PID"].duplicated()]) == 0

# Case Nr Duplicate Check
assert len(df[df["Fall Nr."].duplicated()]) == 0

In [19]:
# select and rename columns
df = df[column_list]
df= df.rename(columns={"Fall Nr.": "Case_ID","PID": "Patient_ID",
                       "Datum/Zeit": "Date_MRI","ED": "Entry_date", "OP Datum": "Operation_date",
                       "Arbeitsplatz.Kürzel":"ID_MRI_Machine","Grösse": "Adenoma_size","Qualität": "Label_Quality",
                       "Patient Alter":"Patient_age","Kategorie":"Category","Diagnose":"Diagnosis",
                       "Prolaktin":"Prolactin","weiteres Labor":"Lab_additional"})

In [20]:
# set category data type in pandas, check datatypes
df['ID_MRI_Machine'] = df['ID_MRI_Machine'].astype('category')
df['Adenoma_size'] = df['Adenoma_size'].astype('category')
df['Label_Quality'] = df['Label_Quality'].astype('category')
df['Diagnosis'] = df['Diagnosis'].astype('category')
df['Category'] = df['Category'].astype('category')
df.dtypes

Patient_ID                 int64
Date_MRI          datetime64[ns]
ID_MRI_Machine          category
Adenoma_size            category
Ausfälle prä              object
Label_Quality           category
Entry_date        datetime64[ns]
Operation_date    datetime64[ns]
Ausfälle post             object
Diagnosis               category
Category                category
Patient_age                int64
Prolactin                float64
IGF1                     float64
Cortisol                  object
fT4                       object
Lab_additional            object
dtype: object

In [21]:
# replace and correct wrong namings from labelers
df["Ausfälle prä"]= df["Ausfälle prä"].str.replace("intak","intakt")
df["Ausfälle prä"]= df["Ausfälle prä"].str.replace("goando","gonado")
df["Ausfälle post"]= df["Ausfälle post"].str.replace("goando","gonado")
df["Ausfälle post"]= df["Ausfälle post"].str.replace("adh","ADH")

## One Hot Encode Categorical Values

To use and analyse the categorical data we need to one-hot encode them. This is done by splitting the comma separated strings into single strings and then create a one-hot-encoded column of each individual value. This column is then added to the original dataframe.

In [22]:
# Split the 'Ausfälle prä' column into separate strings
df['Ausfälle prä'] = df['Ausfälle prä'].str.split(', ')
# Create a set to store all unique disfunctions
unique_disfunctions = set()

# Iterate over the 'Ausfälle prä' column to gather unique disfunctions
for value in df['Ausfälle prä']:
    if isinstance(value, list):
        unique_disfunctions.update(value)
    elif isinstance(value, str):
        unique_disfunctions.add(value)

# Iterate over the unique disfunctions and create one-hot encoded columns
for disfunction in unique_disfunctions:
    df["Pre_OP_hormone_"+ disfunction] = df['Ausfälle prä'].apply(lambda x: 1 if (isinstance(x, list) and disfunction in x) or (x == disfunction) else 0)
# drop the original 'Ausfälle prä' column
df = df.drop('Ausfälle prä', axis=1)

In [23]:
# Split the 'Ausfälle post' column into separate strings
df['Ausfälle post'] = df['Ausfälle post'].str.split(', ')

# Create a set to store all unique disfunctions
unique_disfunctions = set()

# Iterate over the 'Ausfälle post' column to gather unique disfunctions
for value in df['Ausfälle post']:
    if isinstance(value, list):
        unique_disfunctions.update(value)
    elif isinstance(value, str):
        unique_disfunctions.add(value)

# Iterate over the unique disfunctions and create one-hot encoded columns
for disfunction in unique_disfunctions:
    df["Post_OP_hormone_"+ disfunction] = df['Ausfälle post'].apply(lambda x: 1 if (isinstance(x, list) and disfunction in x) or (x == disfunction) else 0)

# drop the original 'Ausfälle post' column
df = df.drop('Ausfälle post', axis=1)

In [24]:
df.columns

Index(['Patient_ID', 'Date_MRI', 'ID_MRI_Machine', 'Adenoma_size',
       'Label_Quality', 'Entry_date', 'Operation_date', 'Diagnosis',
       'Category', 'Patient_age', 'Prolactin', 'IGF1', 'Cortisol', 'fT4',
       'Lab_additional', 'Pre_OP_hormone_', 'Pre_OP_hormone_thyreo',
       'Pre_OP_hormone_intakt', 'Pre_OP_hormone_hyperprolaktin',
       'Pre_OP_hormone_hyperprolaktin stressbedingt',
       'Pre_OP_hormone_morbus cushing', 'Pre_OP_hormone_gonado',
       'Pre_OP_hormone_gondao', 'Pre_OP_hormone_coritco',
       'Pre_OP_hormone_keine', 'Pre_OP_hormone_SIADH',
       'Pre_OP_hormone_cortico', 'Pre_OP_hormone_inaktiv',
       'Pre_OP_hormone_hyperprolakin stressinduziert',
       'Pre_OP_hormone_hypogonado', 'Pre_OP_hormone_somato',
       'Pre_OP_hormone_Kompression', 'Pre_OP_hormone_hypothyreo',
       'Post_OP_hormone_thyreo', 'Post_OP_hormone_intakt',
       'Post_OP_hormone_hyperprolaktin', 'Post_OP_hormone_gonado',
       'Post_OP_hormone_LH und FSH immunohistoch. Express

In [25]:
df= df[['Patient_ID', 'Date_MRI', 'ID_MRI_Machine','Entry_date', 'Operation_date', 'Adenoma_size',
         'Diagnosis',
       'Category', 'Patient_age', 'Prolactin',"IGF1", 'Cortisol','fT4','Lab_additional',
       'Pre_OP_hormone_cortico', 'Pre_OP_hormone_gonado',
       'Pre_OP_hormone_somato', 'Pre_OP_hormone_thyreo',
       'Pre_OP_hormone_hyperprolaktin', 'Pre_OP_hormone_keine',
       'Pre_OP_hormone_intakt', 'Post_OP_hormone_cortico',
       'Post_OP_hormone_gonado', 'Post_OP_hormone_somato',
       'Post_OP_hormone_ADH', 'Post_OP_hormone_thyreo',
       'Post_OP_hormone_hyperprolaktin', 'Post_OP_hormone_keine',
       'Post_OP_hormone_intakt', 'Label_Quality',]]

In [26]:
df.to_csv(r'../raw_data/label_data.csv',index=False)

In [27]:
print("End Clean and Preprocessing patient data")

End Clean and Preprocessing patient data


# Data Cleaning and Selection Labor data

In [28]:
print("Start Clean and Preprocessing labor data")

Start Clean and Preprocessing labor data


## Read

In [370]:
labor_data_raw = pd.read_excel("../raw_data/labor_data.xlsx")
labor_data = pd.DataFrame({})

In [371]:
# change float with , to float with .
labor_data[labor_data_raw.columns[0]] = [re.sub('"([^",]+),([^"]+)"',r"\1.\2", re.sub('"([^",]+),([^",]+),([^"]+)"',r"\1.\2.\3", string[0])) for string in labor_data_raw.values]

In [372]:
labor_data= labor_data.iloc[:,0].str.split(",",expand=True)
labor_data.columns = labor_data_raw.columns[0].split(",")

In [373]:
labor_data[labor_data=="NULL"] = np.nan

In [374]:
ids = {'Ã¼': 'ü', 'Ã¤': 'ä', "Ã„":"Ä"}

for column in labor_data.columns:
    for old, new in ids.items():
        labor_data[column] = labor_data[column].str.replace(old, new, regex=False)
clean_result = lambda result: re.sub(r'(?<!\d)\.', '', re.sub(r'[^\d.]', '', str(result))) #clean < zahl / > zahl / 1 A zahl
labor_data["Resultat"] = labor_data["Resultat"].apply(clean_result) 
labor_data = labor_data[labor_data["Resultat"] != ""]
labor_data["Resultat"] = labor_data["Resultat"].astype(float)
labor_data["Normwert"] = labor_data["Normwert"].str.lower().str.replace(" ", "").replace("",np.nan)
labor_data["Warnung"] = labor_data["Warnung"].replace('   ',np.nan)
labor_data = labor_data[labor_data["Datum_Resultat"]!= "01.01.1900"] # delete unknown 01.01.1900 dates
labor_data["Datum_Resultat"] = pd.to_datetime(labor_data["Datum_Resultat"],dayfirst=True)

labor_data["PATIENT_NR"] = labor_data["PATIENT_NR"].astype(int)

In [408]:
labor_data = labor_data.rename(columns={"FALL_NR":"Case_ID"})
labor_data_for_modell = labor_data.drop(columns = ["Warnung","Einheit","Auftraggeber","Normwert","Analyse"])

In [409]:
# Compute minimum date for each patient and analysis
min_dates = labor_data_for_modell.groupby(['PATIENT_NR', "Analyse-ID"])['Datum_Resultat'].min().reset_index()
# Merge with the original DataFrame to filter rows with minimum dates
labor_data_for_modell = pd.merge(labor_data_for_modell, min_dates, on=['PATIENT_NR', 'Analyse-ID', 'Datum_Resultat'])


In [410]:
# mean of results of same date
labor_data_for_modell = labor_data_for_modell.groupby(["PATIENT_NR","Analyse-ID","Datum_Resultat"])["Resultat"].agg([np.mean]).reset_index()

In [411]:
patient_data = pd.read_excel(r'../raw_data/Hypophysenpatienten.xlsx',sheet_name='w duplicates').loc[:,["PID","Datum/Zeit","Fall Nr."]].rename(columns={"PID":"PATIENT_NR"})

In [415]:
patient_data[patient_data["PATIENT_NR"] == 300266681]

Unnamed: 0,PATIENT_NR,Datum/Zeit,Fall Nr.
40,300266681,2023-02-09 10:29:00,41597445
137,300266681,2022-08-09 13:09:00,41597445
189,300266681,2022-03-02 13:22:00,41264589
313,300266681,2021-05-26 15:56:00,41230376


In [412]:
labor_data_for_modell = pd.merge(labor_data_for_modell,patient_data,on="PATIENT_NR",how = "right")
labor_data_for_modell = labor_data_for_modell[labor_data_for_modell["Datum/Zeit"] > labor_data_for_modell["Datum_Resultat"]].drop(columns="Datum/Zeit")

In [413]:
labor_data_for_modell[labor_data_for_modell.loc[:,["PATIENT_NR","Analyse-ID","Fall Nr."]].duplicated()]

Unnamed: 0,PATIENT_NR,Analyse-ID,Datum_Resultat,mean,Fall Nr.
575,300266681,20396,2021-06-02,6.33,0041597445
576,300266681,FSH,2021-05-28,29.70,0041597445
577,300266681,FT4,2021-05-28,14.90,0041597445
578,300266681,LH,2021-05-28,2.10,0041597445
579,300266681,PROL,2021-05-28,5.70,0041597445
...,...,...,...,...,...
3763,300074226,FSH,2018-02-07,25.40,0040333672
3764,300074226,FT4,2018-02-07,12.40,0040333672
3765,300074226,LH,2018-02-07,9.00,0040333672
3766,300074226,PROL,2018-02-07,11.60,0040333672


In [414]:
labor_data_for_modell[labor_data_for_modell["PATIENT_NR"] == 300266681]

Unnamed: 0,PATIENT_NR,Analyse-ID,Datum_Resultat,mean,Fall Nr.
151,300266681,20396,2021-06-02,6.33,41597445
152,300266681,FSH,2021-05-28,29.7,41597445
153,300266681,FT4,2021-05-28,14.9,41597445
154,300266681,LH,2021-05-28,2.1,41597445
155,300266681,PROL,2021-05-28,5.7,41597445
156,300266681,TEST,2021-05-31,14.0,41597445
575,300266681,20396,2021-06-02,6.33,41597445
576,300266681,FSH,2021-05-28,29.7,41597445
577,300266681,FT4,2021-05-28,14.9,41597445
578,300266681,LH,2021-05-28,2.1,41597445


In [None]:
labor_data_for_modell

Unnamed: 0,PATIENT_NR,Analyse-ID,Datum_Resultat,mean,Datum/Zeit_x,Datum/Zeit_y
0,300146159,20396,2021-09-15,12.50,2023-05-11 09:00:00,2023-05-11 09:00:00
1,300146159,FSH,2021-09-13,6.32,2023-05-11 09:00:00,2023-05-11 09:00:00
2,300146159,FT4,2021-09-13,8.36,2023-05-11 09:00:00,2023-05-11 09:00:00
3,300146159,LH,2021-09-13,1.69,2023-05-11 09:00:00,2023-05-11 09:00:00
4,300146159,PROL,2021-09-13,13.90,2023-05-11 09:00:00,2023-05-11 09:00:00
...,...,...,...,...,...,...
62117,300019022,PROL,2018-02-07,22.70,2018-02-07 16:46:00,2016-12-05 11:05:00
62118,300019022,PROL,2018-02-07,22.70,2016-12-05 11:05:00,2016-12-05 11:05:00
62305,23613,FSH,2018-01-10,40.50,2018-01-27 15:12:00,2018-01-27 15:12:00
62307,23613,LH,2018-01-10,11.10,2018-01-27 15:12:00,2018-01-27 15:12:00


In [303]:
labor_data_for_modell[labor_data_for_modell.loc[:,["PATIENT_NR","Analyse-ID"]].duplicated()]

Unnamed: 0,PATIENT_NR,Analyse-ID,Fallart,Datum_Resultat,mean
500,606942,20396,ambulant,2021-10-27,42.7
501,606942,FT4,ambulant,2021-10-26,13.4
502,606942,PROL,ambulant,2021-10-26,35.7
528,300135859,20396,Stationär,2019-01-14,17.6
529,300135859,FSH,Stationär,2019-01-11,3.7
...,...,...,...,...,...
3805,297971,PROL,Stationär,2018-02-09,7.9
3808,300019022,FSH,ambulant,2018-02-07,4.1
3809,300019022,FT4,ambulant,2018-02-07,12.3
3810,300019022,LH,ambulant,2018-02-07,3.1


In [270]:
labor_data_for_modell.pivot(index=["PATIENT_NR"],values = ['mean'], columns = ['Analyse-ID'])

ValueError: Index contains duplicate entries, cannot reshape

In [261]:
labor_data_for_modell = labor_data_for_modell.pivot(index=["PATIENT_NR"],values = ['mean'], columns = ['Analyse-ID'])
labor_data_for_modell.columns = labor_data_for_modell.columns.droplevel()
labor_data_for_modell = labor_data_for_modell.reset_index()

KeyError: 'Analyse-ID'

In [260]:
labor_data_for_modell

Analyse-ID,PATIENT_NR,index,20396,24382,24383,24384,COR60,FSH,FT4,LH,PROL,TEST
0,4858,0,7.18,,,,,,,,,
1,4858,1,,,,,,1.2,,,,
2,4858,2,,,,,,,9.41,,,
3,4858,3,,,,,,,,0.6,,
4,4858,4,,,,,,,,,2.2,
...,...,...,...,...,...,...,...,...,...,...,...,...
3026,580000175,5031,21.00,,,,,,,,,
3027,580000175,5032,,,,,,7.9,,,,
3028,580000175,5033,,,,,,,11.70,,,
3029,580000175,5034,,,,,,,,3.9,,


In [45]:
print(f"Sparsity of labordata: {round(labor_data_for_modell.isna().mean().mean(),3)} %")

Sparsity of labordata: 0.799 %


In [46]:
labor_data_for_modell.to_csv(r'../raw_data/labor_data_preprocessed.csv',index=False)

In [47]:
print("End Clean and Preprocessing labor data")

End Clean and Preprocessing labor data


In [48]:
labor_data[labor_data["Resultat"].isin(["1Â\xa0264",'1Â\xa0021',"1Â\xa0087"])]

Unnamed: 0,Case_ID,PATIENT_NR,Analyse,Analyse-ID,Auftraggeber,Datum_Resultat,Resultat,Einheit,Normwert,Fallart,Warnung
84,41796306,307119,Cortisol-60' Synacthen-Test,COR60,STROKE & NEURO-IMC,2023-01-31,1Â 264,nmol/l,>550,Stationär,
8714,41866385,300217600,Cortisol-30' Synacthen-Test,COR30,711 MED. STATION,2023-03-31,1Â 087,nmol/l,>550,Stationär,
30052,41129181,125362,Cortisol-60' Synacthen-Test,COR60,ENDOKRINOLOGIE,2021-03-09,1Â 021,nmol/l,>550,ambulant,


In [49]:
values = []
for value in labor_data["Resultat"]:
    try:
        float(value)
    except:
        values.append(value)

In [50]:
values

['<0.20',
 '<0.20',
 '<0.20',
 '1Â\xa0264',
 '<0.20',
 '<0.30',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.30',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.20',
 '1Â\xa0019',
 '<0.45',
 '<0.20',
 '<0.45',
 '1Â\xa0416',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.30',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.45',
 '<0.20',
 '1Â\xa0029',
 '<0.20',
 '<0.30',
 '<0.30',
 '<0.45',
 '<0.20',
 '1Â\xa0515',
 '<0.20',
 '<0.45',
 '<0.30',
 '<0.20',
 '<0.20',
 '<0.30',
 '<0.20',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.20',
 '>64.0',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.20',
 '<0.45',
 '<0.45',
 '1Â\xa0304',
 '<0.20',
 '1Â\xa0050',
 '<0.20',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.30',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.30',
 '<0.20',
 '<0.45',
 '<0.45',
 '<0.45',
 '<0.45',
 '1Â\xa0169',
 '<0.45',
 '<0.20',
 '<0.20',
 '<0.45',
 '1Â\xa0384',
 '<2.00',
 '<0.30',
 '<0.20',
 '>64.0',
 '<0.20',
 '<0.30',
 '<0.45',
 '<0.20',
 '>6

In [51]:
np.unique(np.array(values))

array(['1Â\xa0003', '1Â\xa0004', '1Â\xa0005', '1Â\xa0006', '1Â\xa0007',
       '1Â\xa0009', '1Â\xa0011', '1Â\xa0016', '1Â\xa0017', '1Â\xa0018',
       '1Â\xa0019', '1Â\xa0021', '1Â\xa0027', '1Â\xa0028', '1Â\xa0029',
       '1Â\xa0031', '1Â\xa0034', '1Â\xa0035', '1Â\xa0038', '1Â\xa0041',
       '1Â\xa0043', '1Â\xa0045', '1Â\xa0047', '1Â\xa0049', '1Â\xa0050',
       '1Â\xa0051', '1Â\xa0052', '1Â\xa0053', '1Â\xa0055', '1Â\xa0058',
       '1Â\xa0059', '1Â\xa0061', '1Â\xa0066', '1Â\xa0070', '1Â\xa0071',
       '1Â\xa0075', '1Â\xa0076', '1Â\xa0079', '1Â\xa0082', '1Â\xa0083',
       '1Â\xa0084', '1Â\xa0085', '1Â\xa0087', '1Â\xa0088', '1Â\xa0089',
       '1Â\xa0093', '1Â\xa0094', '1Â\xa0096', '1Â\xa0097', '1Â\xa0098',
       '1Â\xa0100', '1Â\xa0104', '1Â\xa0105', '1Â\xa0113', '1Â\xa0116',
       '1Â\xa0118', '1Â\xa0119', '1Â\xa0121', '1Â\xa0125', '1Â\xa0126',
       '1Â\xa0133', '1Â\xa0136', '1Â\xa0145', '1Â\xa0146', '1Â\xa0147',
       '1Â\xa0149', '1Â\xa0152', '1Â\xa0156', '1Â\xa0159', '1Â\x

In [52]:
labor_data[labor_data["Resultat"] == "1Â\xa0896"]

Unnamed: 0,Case_ID,PATIENT_NR,Analyse,Analyse-ID,Auftraggeber,Datum_Resultat,Resultat,Einheit,Normwert,Fallart,Warnung
17541,10308286,261047,Cortisol-30' Synacthen-Test,COR30,SPITAL ZOFINGEN,2021-03-11,1Â 896,nmol/l,>550,no,
