In [3]:
import spacy
from spacy.language import Language
from spacy.tokens import Doc, Span
import re

In [1]:
tests_short = [
    "Dr. Max Mustermann",
    "Max Mustermann, Dr.",
    "Prof. Max Mustermann",
    "Max Mustermann, Prof.",
    "Prof. Dr. Max Mustermann",
    "Max Mustermann, Prof. Dr.",
]
tests_long = [
    "Doktor Max Mustermann",
    "Max Mustermann, Doktor",
    "Professor Max Mustermann",
    "Max Mustermann, Professor",
    "Professor Doktor Max Mustermann",
    "Max Mustermann, Professor Doktor",
]

In [9]:
nlp = spacy.load("de_core_news_lg", disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])

In [7]:
nlp = spacy.load("en_core_web_lg", disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])

In [11]:
nlp = spacy.load("xx_ent_wiki_sm", disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])

In [18]:
for s in tests_short:
    doc = nlp(s)
    print("{:40s} -> {:s}".format(s, str(doc.ents)))
print()
for s in tests_long:
    doc = nlp(s)
    print("{:40s} -> {:s}".format(s, str(doc.ents)))

Dr. Max Mustermann                       -> (Max Mustermann,)
Max Mustermann, Dr.                      -> (Max Mustermann, Dr)
Prof. Max Mustermann                     -> (Max Mustermann,)
Max Mustermann, Prof.                    -> (Max Mustermann,)
Prof. Dr. Max Mustermann                 -> (Max Mustermann,)
Max Mustermann, Prof. Dr.                -> (Max Mustermann,)

Doktor Max Mustermann                    -> (Max Mustermann,)
Max Mustermann, Doktor                   -> (Max Mustermann,)
Professor Max Mustermann                 -> (Max Mustermann,)
Max Mustermann, Professor                -> (Max Mustermann,)
Professor Doktor Max Mustermann          -> (Max Mustermann,)
Max Mustermann, Professor Doktor         -> (Max Mustermann,)


In [88]:
def find_title(doc, entity):
    start, end = entity.start, entity.end
    print (f"start={start} end={end} len={len(doc)}")
    # Find prefix
    if start > 1 and (doc[start-2].text == 'Professor' or doc[start-2].text == 'Prof.') and (doc[start-1].text == 'Doktor' or doc[start-1].text == 'Dr.'):
        return 'Prof. Dr.'
    if start > 0 and (doc[start-1].text == 'Professor' or doc[start-1].text == 'Prof.'):
        return 'Prof.'
    if start > 0 and (doc[start-1].text == 'Doktor' or doc[start-1].text == 'Dr.'):
        return 'Dr.'
    # Find suffix
    if end+2 < len(doc) and (doc[end+1].text == 'Professor' or doc[end+1].text == 'Prof.') and (doc[end+2].text == 'Doktor' or doc[end+2].text == 'Dr.'):
        return "Prof. Dr."
    if end+1 < len(doc) and (doc[end+1].text == 'Professor' or doc[end+1].text == 'Prof.'):
        return "Prof."
    if end+1 < len(doc) and (doc[end+1].text == 'Doktor' or doc[end+1].text == 'Dr.'):
        return "Dr."
    return ''

for s in tests_long:
    doc = nlp(s)
    entity = doc.ents[0]

    print("{:40s} -> {:s} - {:s}".format(s, str(doc.ents), find_title(doc, entity)))

start=1 end=3 len=3
Doktor Max Mustermann                    -> (Max Mustermann,) - Dr.
start=0 end=2 len=4
Max Mustermann, Doktor                   -> (Max Mustermann,) - Dr.
start=1 end=3 len=3
Professor Max Mustermann                 -> (Max Mustermann,) - Prof.
start=0 end=2 len=4
Max Mustermann, Professor                -> (Max Mustermann,) - Prof.
start=2 end=4 len=4
Professor Doktor Max Mustermann          -> (Max Mustermann,) - Prof. Dr.
start=0 end=2 len=5
Max Mustermann, Professor Doktor         -> (Max Mustermann,) - Prof. Dr.


In [92]:
string = "|Arman Arzani\n||Researcher\n||+49-201-183-2427 (out of order)\n||arman.arzani.due@gmail.com\n||SA-125\n|\n|Rasit Eskicioglu\n||Visiting Professor\n||+49-201-183-3211\n||rasit.eskicioglu@umanitoba.ca\n||SA-125\n|\n|Alexander Julian Golkowski\n||Researcher\n||+49-201-183-6362\n||alexander.golkowski@uni-due.de\n||SA-328\n|\n|Marcus Handte\n||Senior Researcher\n||+49 176 63309480\n||marcus.handte@uni-due.de\n||SA-121\n|\n|Simon Janzon\n||Researcher\n||+49-201-183-6361 (out of order)\n||janzon.nes@gmail.com\n||SA-327\n|\n|Pedro José Marrón\n||Professor\n||+49 177 7489768\n||pjmarron@locoslab.com\n||SA-123\n|\n|Sayedsepehr Mosavat\n||Researcher\n||+49-201-183-2427\n||smosavat.nes@gmail.com\n||SA-120\n|\n|Bijan Shahbaz Nejad\n||Researcher\n||+49-201-183-6370\n||bijan.shahbaz-nejad@uni-due.de\n||SA-118\n|\n|Peter Roch\n||Researcher\n||+49-201-183-6370 (out of order)\n||peter.roch@uni-due.de\n||SA-118\n|\n|Elke Schulte-Lippern\n||Office Managerin\n||+49 177 7489768\n||eschuli@web.de\n||SA-124\n|\n|Carlos Medina Sánchez\n||Researcher\n||+49-201-183-6362 (out of order)\n||carlosfmedinasanchez@gmail.com\n||SA-328\n|"

string = string.replace("|", "\n")
for line in string.splitlines():

    # Ignore mostly empty lines (lesser than 5 characters and lesser than 2 words)
    if len(line.strip()) < 5 or len(line.split()) < 2:
        continue

    # Ignore lines with years: These are indicators for publications
    # We look for names that stand alone to identify a single person working at somewhere
    if re.findall(r"\d{4}", line):
        continue

    doc = nlp(string)
    print(doc.ents)

(Arman Arzani, arman.arzani.due@gmail.com


SA-125, Alexander Julian Golkowski, Marcus Handte, 176 63309480, Simon Janzon, Pedro José Marrón, 177, Sayedsepehr Mosavat, smosavat.nes@gmail.com, Bijan Shahbaz Nejad, Peter Roch, Elke Schulte-Lippern, Managerin, Carlos Medina Sánchez)
(Arman Arzani, arman.arzani.due@gmail.com


SA-125, Alexander Julian Golkowski, Marcus Handte, 176 63309480, Simon Janzon, Pedro José Marrón, 177, Sayedsepehr Mosavat, smosavat.nes@gmail.com, Bijan Shahbaz Nejad, Peter Roch, Elke Schulte-Lippern, Managerin, Carlos Medina Sánchez)
(Arman Arzani, arman.arzani.due@gmail.com


SA-125, Alexander Julian Golkowski, Marcus Handte, 176 63309480, Simon Janzon, Pedro José Marrón, 177, Sayedsepehr Mosavat, smosavat.nes@gmail.com, Bijan Shahbaz Nejad, Peter Roch, Elke Schulte-Lippern, Managerin, Carlos Medina Sánchez)
(Arman Arzani, arman.arzani.due@gmail.com


SA-125, Alexander Julian Golkowski, Marcus Handte, 176 63309480, Simon Janzon, Pedro José Marrón, 177, Sayedsepeh

In [18]:
import lxml.html
import requests
from langdetect import detect_langs

req = requests.get("https://www.uni-due.de/")
req = requests.get("https://esaga.uni-due.de/members/")
req = requests.get("https://www.uni-due.de/mechatronik/team/kracht.php")

document = lxml.html.document_fromstring(req.text)
# print("\n".join(document.xpath("//text()")))
texts = [s.strip() for s in document.xpath("//text()")]
texts = filter(lambda x: bool(x.strip()), texts)
text = "\n".join(texts)

print(text)
print(detect_langs(text))
print(detect_langs("Wissenschaftlicher Mitarbeiter: Dr.-Ing. Frédéric Etienne Kracht\n|\n|\nDr.-Ing. Frédéric Etienne Kracht\nRaum: MD-227\nTelefon: +49 (0)203 379-3951\nTelefax: +49 (0)203 379-4143\nE-Mail: Frederic.Kracht@uni-due.de\nForschungsprofil\n- Modellierung von Fahrwerkelastizitäten\n- echtzeitfähige Fahrzeugsimulation\nTitel der Dissertation\n-\nModellbildung und Simulation der Dynamik und Elastokinematik von Radaufhängungen für Echtzeitanwendungen\nPreise\n- Alumni Preisträger der Duisburger Ingenieurwissenschaften 2013/2014: Bester Studienabschluss im Masterstudiengang Maschinenbau\n- 2011 Aufnahme in die Studienstiftung des deutschen Volkes\n- Best Paper Award der LSMS2017 & ICSEE2017 - Vortragender (2017 International Conference on Life System Modeling and Simulation & International Conference on Intelligent Computing for Sustainable Energy and Environment, 22.-24.09.2017, Nanjing, China)\n- Promotion im Bereich Ingenieurwissenschaften mit herausragendem Erfolg – Unsere Besten – Ehrung für herausragende Abschlüsse der Universität Duisburg-Essen im Rahmen des Dies academicus 2021\n- Innovationspreis 2020 der Sparkasse am Niederrhein für hervorragende und anwendungsbezogene Dissertationen auf dem Gebiet der Ingenieurwissenschaften\n- Helmut und Gerlinde Schwarz Preis 2022\nAktuelle Forschungsprojekte\n- Fahrsimulatoren des Lehrstuhls für Mechatronik\n- VeLABi – Versuchs- und Leitungszentrum für autonome Binnenschiffe\n- AutoBin – Autonomes Binnenschiff – Simulation und Demonstration von automatisiertem Fahren in der Binnenschifffahrt\n- FernBin - Ferngesteuertes, koordiniertes Fahren in der Binnenschifffahrt\n- HaFoLa - Versuchszentrum für innovative Hafen- und Umschlagtechnologien\nGutachten und Gremienarbeit\n- Mitglied der Global Young Faculty VII\n- Paper Reviewer der 14th International Symposium on Advanced Vehicle Control (AVEC 2018)\n- Mitglied des PhD-Lenkungsausschusses des Joint Degree Programms mit dem Indian Institute of Technology Madras\nLehrstuhlarbeit\n- Verwaltung der Publikationen des Lehrstuhls\n- Verwaltung der Lehrstuhlbibliothek\n- Studienfachberater im Studienschwerpunkt Mechatronik\n- Faculty Advisor des Formula Student Teams der Universität Duisburg-Essen (E-Team)\n- Mitglied des Prüfungsausschusses im Master-Studiengang Automotive Engineeing & Management (Vertreter)\nAktuelle Veranstaltungen\n-\n2023 SS\nVergangene Veranstaltungen (max. 10)\n-\n2022 WS\n-\n2022 SS\n-\n2021 WS\n-\n2021 SS\nVeröffentlichungen\nResearchGate Profil: LINK\nGoogleScholar: LINK\nScopus ID: 57190935028\nORCID: 0000-0002-3167-5217\nSciProfiles: LINK\n-\nAutomatisiert und umweltfreundlich fahren zu Lande und auf dem Wasser : Forschungsstrategie für den Ausbau der Automatisierung innerhalb des straßengebundenen Verkehrs sowie der (Binnen)-SchifffahrtIn: Unikate: Berichte aus Forschung und Lehre (2023) Nr. 59: Mobilität und Transport im Wandel : Strategie und Umsetzung, S. 8 - 21ISSN: 0944-6060; 1869-3881\n-\nFrom Modeling to Optimizing Sustainable Public Transport : A New Methodological ApproachIn: Sustainability Jg. 15 (2023) Nr. 10,ISSN: 2071-1050\n-\nDigital, Online, Take-Home - University Students' Attitude towards Different Examination FormatsIn: 2022 IEEE German Education Conference, GeCon 2022 / 2022 IEEE German Education Conference (GeCon), 11-12 August 2022, Berlin, Germany / Institute of Electrical and Electronics Engineers (Hrsg.) 2022ISBN: 9781665451703; 9781665451710\n-\nVeLABi : Research and control center for autonomous inland vesselsIn: at - Automatisierungstechnik Jg. 70 (2022) Nr. 5, S. 411 - 419ISSN: 0178-2312; 2196-677X\n-\nApplication of Photogrammetric Object Reconstruction for Simulation Environments in the Context of Inland WaterwaysIn: Simulation and Modeling Methodologies, Technologies and Applications: 10th International Conference, SIMULTECH 2020 ; Lieusaint - Paris, France, July 8-10, 2020 ; Revised Selected Papers / International Conference on Simulation and Modeling Methodologies, Technologies and Applications ; SIMULTECH ; July 8-10, 2020, Lieusaint - Paris / Obaidat, Mohammad S.; Oren, Tuncer; De Rango, Floriano (Hrsg.) 2022, S. 1 - 17ISBN: 978-3-030-84810-1; 978-3-030-84811-8\n-\nReal-time capable and modular modeling of wheel suspensions using neural networksIn: AmE 2021: Automotive meets Electronics / 12. GMM-Symposium, 10.-11. März 2021, online event 2021, S. 74 - 80ISBN: 978-3-8007-5487-8; 978-3-8007-5488-5\n-\nAuf dem Weg zum hochautomatisierten Binnenschiff : Versuchs-und Leitungszentrum für autonome BinnenschiffeIn: Making Connected Mobility Work: Technische und betriebswirtschaftliche Aspekte / Making Connected Mobility Work ; 12. Wissenschaftsforum Mobilität 2020 ; 18.06.2020, Duisburg (abgesagt) / Proff, Heike (Hrsg.) 2021, S. 189 - 202ISBN: 978-3-658-32265-6; 978-3-658-32266-3\n-\nPhotogrammetrie plus Geo-Datenbank : eine Alternative zur manuellen Modellierung?In: IFToMM D-A-CH Konferenz 2021der IFToMM Member Organizations Austria, Germany, Switzerland: 18.-19. Februar 2021, Online-Konferenz ; Tagungsband / 7. IFToMM D-A-CH Konferenz 2021, 18./19. Februar 2021, Online-Konferenz 2021, S. 73ISBN: 978-3-940402-45-5\n-\nQuasi-static and dynamic suspension measurements vs. multi-body and real‑time simulation resultsIn: 10th International Munich Chassis Symposium 2019: chassis.tech plus / International Munich Chassis Symposium 2019 ; chassis.tech plus 2019 ; 25.-26.06.2019, München / Pfeffer, Peter E. (Hrsg.) 2020, S. 119 - 134ISBN: 978-3-658-26434-5; 978-3-658-26435-2\n-\nThe AutoBin Project : Key Concepts, Status, and Intended Outcomes\nAutonomous Inland and Short Sea Shipping Conference ; AISS2020 ; 23 October 2020, Duisburg,Duisburg (2020)\n-\nResearch and Control Center for Autonomous Inland Vessels − VeLABi\nAutonomous Inland and Short Sea Shipping Conference ; AISS2020 ; 23 October 2020, Duisburg,Duisburg (2020)\n-\nModellbildung und Simulation der Dynamik und Elastokinematik von Radaufhängungen für EchtzeitanwendungenDuisburg, Essen (2020) XXXI, 212 Seiten\n-\nReal-Time Capable Calculation of Reaction Forces of Multibody Systems Using Optimized Bushings on the Example of a Vehicle Wheel SuspensionIn: Multibody Dynamics 2019: Proceedings of the 9th ECCOMAS Thematic Conference on Multibody Dynamics / ECCOMAS ; European Congress on Computational Methods in Applied Sciences and Engineering ; University of Duisburg-Essen, Duisburg, Germany, 15-18 July 2019 / Kecskeméthy, Andrés; Geu Flores, Francisco (Hrsg.) 2020, S. 409 - 416ISBN: 978-3-030-23131-6; 978-3-030-23132-3\n-\nGeneralized method for real-time object-oriented modeling and simulation of systems applied to a vehicle wheel suspension mechanismIn: Machines, mechanism and robotics: proceedings of iNaCoMM 2017 / iNaCoMM ; 3rd International and 18th National Conference on Machines and Mechanisms ; Mumbai, 13 to 15 December 2017 / Badodkar, D. N.; Dwarakanath, T. A. (Hrsg.) 2019, S. 463 - 474ISBN: 978-981-10-8596-3; 978-981-10-8597-0\n-\nValidation of a suspension model and its eastic behaviour demonstrated at the formula student race car of the University Duisburg-EssenIn: Journal of mechanical engineering / International Conference on Recent Advances in Automotive Engineering & Mobility Research 2017 (ReCAR2017) ; Advances in Automotive Engineering & Mobility Research ; Selangor, Malaysia, 8th-10th August 2017 (2018) Nr. SI7(1), S. 35 - 57ISSN: 1823-5514; 2550-164X\n-\nReal-time Calculation of Reaction Forces and Elasticities in Vehicle Wheel Suspensions\n14th International Symposium on Advanced Vehicle Control ; AVEC 2018 ; Beijing, July 16th to July 20th, 2018,(2018)\n-\nCharacterizing spring durability for automotive ride using artificial neural network analysisIn: International Journal of Engineering & Technology Jg. 7 (2018) Nr. 3.17, S. 47 - 53ISSN: 2227-524X\n-\nCharacterizing spring durability for automotive ride using artificial neural network analysisIn: International Journal of Engineering and Technology (UAE) Jg. 7 (2018) Nr. 3, S. 47 - 53ISSN: 2227-524X\n-\nObserving the Durability Effects of a Formula Student Electric Car using Acceleration and Strain SignalsIn: Mobilität und digitale Transformation: technische und betriebswirtschaftliche Aspekte / Wissenschaftsforum Mobilität ; Mobility and Digital Transformation – Challenges and Future Paths ; Duisburg, 29.06.2017 / Proff, Heike; Fojcik, Thomas Martin (Hrsg.) 2018, S. 259 - 280ISBN: 978-3-658-20778-6; 978-3-658-20779-3\n-\nPassing control between driver and highly automated driving functionsIn: Intelligent computing, networked control, and their engineering applications: Proceedings; Part II / International Conference on Life System Modeling and Simulation ; International Conference on Life System Modeling and Simulation ; LSMS ; ICSEE ; Nanjing, China, September 22-24, 2017 / Yue, Dong; Peng, Chen; Du, Dajun; Zhang, Tengfei; Zheng, Min; Han, Qinglong (Hrsg.) 2017, S. 629 - 638ISBN: 978-981-10-6372-5; 978-981-10-6373-2\n-\nEchtzeitfähige objektorientierte Modellbildung am Beispiel einer Fahrzeug-RadaufhängungIn: Dritte IFToMM D-A-CH Konferenz 2017: Tagungsband / Dritte IFToMM D-A-CH Konferenz, 15./16. Februar 2017, TU Chemnitz 2017, S. 9 - 16ISBN: 978-3-940402-08-0\n-\nAn experimental comparison between a novel and a conventional cooling system for the blown film processIn: Proceedings of PPS-31: the 31st International Conference of the Polymer Processing Society - conference papers / International Conference of the Polymer Processing Society, Jeju Island, Korea, 7-11 June 2015 / Rhee, Byungohk (Hrsg.) 2016ISBN: 978-0-7354-1360-3\n-\nEinfluss der Radaufhängungskomponenten auf die Energieeffizienz des GesamtfahrzeugsIn: Nationale und internationale Trends in der Mobilität: technische und betriebswirtschaftliche Aspekte / 7. Wissenschaftsforum Mobilität ; Duisburg, 18.06.2015 / Proff, Heike; Fojcik, Thomas Martin (Hrsg.) 2016, S. 243 - 263ISBN: 978-3-658-14562-0; 978-3-658-14563-7\n-\nLaufzeitoptimierte Vorderachsenmodellierung unter Abbildung von Elastizitäten mittels Ersatzsteifigkeiten der Lager Runtime-optimized front axis models with elasticity effects using equivalent stiffnessIn: Zweite IFToMM D-A-CH Konferenz 2016 / IFToMM D-A-CH ; 25.-26. Februar 2016, Universität Innsbruck 2016\n-\nA numercial verification and experimental validation of the multi-jet cooling system for the blown film applicationIn: Annual Technical Conference - ANTEC, Conference Proceedings / ANTEC 2015, Orlando, USA, 23. - 25.03.2015 2015, S. 1067 - 1072ISBN: 978-0-9850112-7-7\n-\nA numerical verification and experimental validation of the multi-jet cooling system for the blown film applicationIn: Proceedings of the technical conference & exhibition: Orlando, Florida, USA March 23-25, 2015 / ANTEC 2015 2015ISBN: 978-0-9850112-7-7\n-\nAdvantages and potentials of a blown film cooling system with a complete housing of the tube formation zoneIn: Proceedings of the Regional Conference Graz 2015 - Polymer Processing Society PPS: Conference Papers / Regional Conference. Polymer Processing Society, Graz, Austria, 21-25 September 2015 2015ISBN: 978-0-7354-1441-9\n-\nEinfluss von Elastizitäten in Fahrwerken auf die FahrdynamikIn: Fachtagung Mechatronik 2015: Dortmund (12.03.-13.03.2015) / VDI/VDE Mechatronik 2015, 12.-13. März 2015, Dortmund / Bertram, Torsten; Corves, Burkhard; Janschek, Klaus (Hrsg.) 2015, S. 155 - 160ISBN: 978-3-00-048814-6\n-\nDevelopment of a chassis model including elastic behavior for real-time applicationsIn: 6th International Munich Chassis Symposium 2015: chassis.tech plus / International Munich Chassis Symposium (6. : 2015 : München) / Pfeffer, Peter (Hrsg.) 2015, S. 257 - 281ISBN: 978-3-658-09710-3; 978-3-658-09711-0\n-\nDichtelement zur Dichtung eines Spaltes [Patentschrift](2014)\n-\nSealing element for sealing a gap [Patentschrift](2014)\n-\nSealing element for sealing a gap [Patentschrift](2014)\n2023\n2022\n2021\n2020\n2019\n2018\n2017\n2016\n2015\n2014\n2013\n- S. Bez, W. Esser, F. Hoppe, F. E. Kracht, A. Placzek, J. Sander, M. Schüler, D. Kicza, and H. Thermann, Turbinenleitschaufelsegmente mit integrierter Dichtung und Abrasivschicht zur Unterstützung der Dichtwirkung bei Anwendung des Nut-Feder-Prinzips, Technik Up2date, no. 2, pp. 37, 38, 22.01.2013, 2013.\n2012\n- F. E. Kracht, F. Hoppe, M. Link, R. Kueperkoch, P. Kreutzer, V. Veitsmann, J. Wilkes, and P. Hinkerohe, Mediumgefüllte Dichtungen, Technik Up2date, no. 24, pp. 52, 53, 23.11.2012, 2012.\n- T. Buchal, R. Küperkoch, O. Schneider, H. Thermann, M. Schüler, M. Links, F. E. Kracht, and N. Flohr, Dichtung mit extra elastischen Enden, Technik Up2date, no. 23, pp. 71, 72, 09.11.2012, 2012.\n- F. Ahmad, and F. E. Kracht, Schräg geriffelte Dichtung, Technik Up2date, no. 23, pp. 65, 09.11.2012, 2012."))

Wissenschaftlicher Mitarbeiter: Dr.-Ing. Frédéric Etienne Kracht
Universität Duisburg-Essen
Lehrstuhl für Mechatronik
Orientierung
Kontakt
(Du) +49 (0)203 379-2199
(Du) +49 (0)203 379-4494
(Du) patrizia.fichera@uni-due.de
Suchen
A-Z
DE
English
Studieren an der UDE
Social Media
Navigation
Home
Lehre/Studium
Lehre/Studium
Lehrveranstaltungen
Bachelor- und Masterarbeiten
Master „Automotive Engineering & Management Executive“
Forschung
Forschung
Lehrstuhlschwerpunkte
Mobilität
Robotik
Fakultätsschwerpunkte
Preise
Abgeschlossene Projekte
Publikationen
Veranstaltungen
Transfer
News/Presse
Team
Organisation
Anreise
Stellenausschreibungen
Wissenschaftlicher Mitarbeiter: Dr.-Ing. Frédéric Etienne Kracht
Lehrstuhl für Mechatronik
Dr.-Ing. Frédéric Etienne Kracht
​Raum: MD-227
Telefon: +49 (0)203 379-3951
Telefax: +49 (0)203 379-4143
E-Mail:
Frederic.Kracht@uni-due.de
LinkedIn
XING
ResearchGate
Forschungsprofil
Modellierung von Fahrwerkelastizitäten
echtzeitfähige Fahrzeugsimulation
Titel der Dis

In [33]:
import lxml.html
import requests
from lxml import etree

req = requests.get("https://www.uni-due.de/cenide/netz/personen.php")

parser = etree.HTMLParser(recover=True, no_network=True, remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False, encoding="utf-8", )
root = lxml.html.document_fromstring(req.text)

root = lxml.html.document_fromstring('\n<!DOCTYPE html>\n<html lang="de-1996">\n    <head>\n        <meta name="GENERATOR" content="IMPERIA 9.2.14" />\n\n        <meta charset="utf-8">\n        <title>Personen</title>\n        <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">\n        <link rel="stylesheet" type="text/css" href="/portal/4/css/ude2017.min.css" media="all">\n        <link rel="stylesheet" type="text/css" href="/portal/4/css/legacy.css" media="all">\n        <script src="/portal/4/js/prejquery.js"></script>\n        <link href="/portal/4/js/jquery/jquery-ui/jquery-ui-1.12.1.custom/jquery-ui.min.css" rel="stylesheet">\n        <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">\n        <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">\n        <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">\n        <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">\n        <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">\n        <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">\n        <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">\n        <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">\n        <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">\n        <link rel="icon" type="image/png" sizes="192x192"  href="/android-icon-192x192.png">\n        <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">\n        <link rel="icon" type="image/png" sizes="96x96" href="/favicon-96x96.png">\n        <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">\n        <link rel="manifest" href="/manifest.json">\n        <meta name="msapplication-TileColor" content="#ffffff">\n        <meta name="msapplication-TileImage" content="/ms-icon-144x144.png">\n        <meta name="theme-color" content="#ffffff">\n        <meta name="robots" content="index,follow">\n        <meta name="rubrik"   content="CENIDE 2007 (/cenide) ... Netz (/cenide/netz) ... Netz (/cenide/netz)" />\n        <meta name="X-Imperia-Live-Info" content="6f31e55b-40ae-2401-5560-5694e68b13a1/5236/5584/367356" />\n    </head>\n    <!-- code cleaned by DocumentCleaner.pm 7.7.2017 -->\n<body>\n        <header>\n            <div class="container" id="siteHeader">\n                <a href="https://www.uni-due.de/de/index.php" id="udeLogo"><span>Universität Duisburg-Essen</span><img src="/portal/4/images/UDE-logo-claim.svg" width="1052" height="414" alt=""></a>\n                <div id="orgaunitTitle">\n                    <a href="/cenide/netz">\n                        <img src="/imperia/md/images/cenide/letterbox__230_90_ffffff_be3d7103c3a4f8709288b39fb2a80421_netz_logo_2zeilig_rgb_150.jpg" class="float-right" alt="Logo der Organisationseinheit NETZ">\n                        <h1>NETZ</h1><h2>NanoEnergieTechnikZentrum</h2>\n                    </a>\n                </div>\n            </div>\n        </header>\n        <div id="navigationMainContainer">\n            <div class="container">\n                <div id="headerLine">\n                    <div class="container" id="headerLineContent">\n                        <nav class="col col-auto" id="navigationHeader">\n                            <ul class="nav">\n                                <li><a href="https://www.uni-due.de/suche/"><i class="far fa-fw fa-search"></i><span class="icon-label">Suchen</span></a></li>\n                                <li id="navigationStakeholder"><a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"><i class="far fa-fw fa-user"></i><span class="icon-label">Information for...</span></a>\n                                    <ul class="dropdown-menu">\n                                        <li><a href="https://www.uni-due.de/nano-schuelerlabor/" title="Pupils">Pupils</a></li>\n                                        <li><a href="https://www.uni-due.de/nanoengineering/" title="Students">Students</a></li>\n                                        <li><a href="https://www.uni-due.de/cenide/research.php" title="Scientists">Scientists</a></li>\n                                        <li><a href="https://www.uni-due.de/netz/" title="Companies">Companies</a></li>\n                                        <li><a href="https://www.uni-due.de/cenide/press.shtml" title="Journalists">Journalists</a></li>\n                                        <li><a href="https://www.uni-due.de/cenide/members_en.shtml" title="Members">Members</a></li>\n                                    </ul>\n                                </li>                                <li id="navigationLanguage"><a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"><i class="flag flag-de"></i><span class="current-language">DE</span></a>\n                                     <ul class="dropdown-menu">\n                                            <li><a href="/cenide/netz/en/people.php"><i class="flag flag-en"></i>English</a></li>\n                                        </ul>\n                                </li>\n                                <li class="desktop_only"><a href="/studierendensekretariat/"><i class="far fa-fw fa-check"></i><span class="icon-label">Studieren an der UDE</span></a></li>\n                            </ul><!--/.nav-->\n                        </nav><!--/#navigationHeader-->\n                    </div>\n                </div>\n                <nav class="navbar navbar-expand-lg navbar-dark" id="navigationMain">\n                    <div class="row">\n                        <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navigationMainContent" aria-controls="navigationMainContent" aria-expanded="false" aria-label="Toggle navigation"><i class="far fa-bars"></i><span class="navbar-toggler-label">Navigation</span></button>\n                        <div class="collapse navbar-collapse" id="navigationMainContent">\n                            <ul>                                 <li>\n                                    <a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" title="About"><i class="far fa-info-circle"></i> About</a>\n                                    <div class="navbarSubContainer dropdown-menu">\n\n                                        <ul>\n\n                                            <li>\n                                                <a href="/cenide/netz/about.php" title="Zahlen, Daten, Fakten">Zahlen, Daten, Fakten</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/kooperationen.php" title="Kooperationen">Kooperationen</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                        </ul>\n                                    </div>\n                                </li>\n                                <li>\n                                    <a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" title="Personen"><i class="far fa-users"></i> Personen</a>\n                                    <div class="navbarSubContainer dropdown-menu">\n                                        <header><a href="/cenide/netz/personen.php" title="Personen">Personen</a></header>\n                                        <ul>\n\n                                            <li>\n                                                <a href="/cenide/netz/arbeitsgruppen.php" title="Arbeitsgruppen">Arbeitsgruppen</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/direktorium.php" title="Direktorium">Direktorium</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/administration_und_technik.php" title="Administration und Technik">Administration und Technik</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/liste_der_netz_nutzer.php" title="Liste der NETZ-Nutzer">Liste der NETZ-Nutzer</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                        </ul>\n                                    </div>\n                                </li>\n                                <li>\n                                    <a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" title="Forschung"><i class="far fa-flask"></i> Forschung</a>\n                                    <div class="navbarSubContainer dropdown-menu">\n                                        <header><a href="/cenide/netz/forschung.php" title="Forschung">Forschung</a></header>\n                                        <ul>\n\n                                            <li>\n                                                <a href="/cenide/netz/synthese.php" title="Synthese">Synthese</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/prozessierung.php" title="Prozessierung">Prozessierung</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/analytik.php" title="Analytik">Analytik</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/simulation.php" title="Simulation">Simulation</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/katalyse.php" title="Katalyse">Katalyse</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/batterien.php" title="Batterien">Batterien</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                        </ul>\n                                    </div>\n                                </li>\n                                <li>\n                                    <a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" title="Aktuelles"><i class="far fa-newspaper"></i> Aktuelles</a>\n                                    <div class="navbarSubContainer dropdown-menu">\n                                        <header><a href="/cenide/netz/aktuelles.php" title="Aktuelles">Aktuelles</a></header>\n                                        <ul>\n\n                                            <li>\n                                                <a href="/cenide/netz/veranstaltungen.php" title="Veranstaltungen">Veranstaltungen</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/download.php" title="Downloads">Downloads</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                        </ul>\n                                    </div>\n                                </li>\n                                <li><a href="/cenide/netz/kontakt.php" title="Kontakt / Anfahrt"><i class="far fa-phone"></i> Kontakt / Anfahrt</a></li>\n                                <li>\n                                    <a href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false" title="Intern"><i class="fa fa-users"></i> Intern</a>\n                                    <div class="navbarSubContainer dropdown-menu">\n                                        <header><a href="/cenide/netz/intern.php" title="Intern">Intern</a></header>\n                                        <ul>\n\n                                            <li>\n                                                <a href="/cenide/netz/sicherheit.php" title="Sicherheit">Sicherheit</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/laborinfo.php" title="Laborordnung und -info">Laborordnung und -info</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/nutzerdaten.php" title="NETZ-Nutzerdaten">NETZ-Nutzerdaten</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/anleitungen.php" title="Anleitungen">Anleitungen</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/infrastruktur.php" title="Infrastruktur">Infrastruktur</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/raumbuchung.php" title="Raumbuchung">Raumbuchung</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/mailingliste.php" title="Mailingliste">Mailingliste</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/posterdruck.php" title="Posterdruck">Posterdruck</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                            <li>\n                                                <a href="/cenide/netz/ansprechpartner.php" title="Ansprechpartner">Ansprechpartner</a>\n                                                <ul>\n\n                                                </ul>\n                                            </li>\n\n                                        </ul>\n                                    </div>\n                                </li>\n                                <li><a href="/cenide" title="CENIDE"><i class="far fa-building"></i> CENIDE</a></li>\n                                <li><a href="/cenide/ican/" title="ICAN"><i class="far fa-microscope"></i> ICAN</a></li>\n\n                                <li id="navbarOverflowButton"><a href="javascript:void(false);"></a></li>\n                            </ul>\n                        </div>\n                    </div>\n                </nav>\n            </div>\n        </div>\n        <main>\n            <div id="pagetitlecontainer">\n                <div class="container-background">\n                    <div class="container pagetitle">\n                        <h1 class="display-1">Personen</h1>\n                        <nav class="rootline">\n                            <ol class="nav" vocab="http://schema.org/" typeof="BreadcrumbList">\n                                <li property="itemListElement" typeof="ListItem">\n                                    <a href="/cenide/netz" property="item" typeof="WebPage">\n                                        <i class="far fa-home"></i>\n                                        <span property="name">NETZ</span>\n                                    </a>\n                                    <meta property="position" content="0">\n                                </li>\n                            </ol>\n                        </nav>\n                    </div>\n                </div>\n            </div>\n            <div id="content__standard__main">\n            </div>\n            <div id="middlecontainer">\n                <div id="content__old" class="container-background bg-colored bg-gray">\n                    <div class="container">\n                        <div class="row">\n                            <!-- Inhalts-Spalte -->\n                            <div id="content__old__main" class="col-12">\n    <!--[Flexmodul:_portal_ude_textarea 2]-->\n                <div class="card texteditor">\n                    <div class="card-body">\n                            \n                            <p>Im NETZ arbeiten rund 120 <a href="https://www.uni-due.de/cenide/netz/liste_der_netz_nutzer.php">Wissenschaftler*innen</a> aus der Chemie, der Physik und den Ingenieurwissenschaften der Universit&auml;t Duisburg-Essen sowie Kooperationspartner aus Wissenschaft und Industrie. Die <a href="https://www.uni-due.de/cenide/netz/arbeitsgruppen.php">Arbeitsgruppen</a> ziehen nur f&uuml;r einen begrenzten Projektzeitraum ein. So bleibt die Forschung im NETZ flexibel und offen f&uuml;r neue Entwicklungen und Themen.</p>\n<p>Das NETZ wird von einem <a href="https://www.uni-due.de/cenide/netz/direktorium.php">Direktorium</a> geleitet.</p>\n                            \n                    </div><!--card-body-->\n                </div><!--card-->\n        <!--[Flexmodul:_portal_ude_doppelcontainer]-->\n                <div class="card-deck">\n                            <div class="card card-vertical">\n                             <figure class="card-img-top" style="position:relative">\n                                   <img src="/imperia/md/images/cenide/netz/fittosize_501_0_f9ca081d29d2d97368f8fcf4cd11b7db_hussam_sallum.jpg" alt="Hussam Sallum">\n                             </figure>\n                               <div class="card-body">\n                                 <div class="card-text"></div>\n                               </div><!--card-body-->\n                            </div><!--card-->\n                </div><!--card-deck-->\n                            </div>\n                        </div><!--row-->\n                    </div><!--container-->\n                </div>\n            </div><!--middlecontainer-->\n            <div id="content__sub__main">\n            </div>\n            <a href="#top" title="zum Seitenanfang" class="jumpto-top"><i class="fas fa-chevron-up"></i></a>\n        </main>\n        <footer>\n            <div class="container-background bg-imprint darkbackground imprintline">\n                <div class="container">\n                    <div class="row">\n                        <div class="col" id="footerLogo"><a href="https://www.uni-due.de/de/index.php"><img src="/portal/4/images/UDE-logo-claim-dark.svg" width="1052" height="414" alt=""></a></div>\n                        <div class="col col-md-auto justify-content-end">\n                            <nav id="navigationFooter" class="navbar">\n                                <ul>\n                                    <li><a href="/infoline/"><i class="far fa-fw fa-phone"></i>Infoline</a></li>\n                                    <li><a href="/de/hilfe_im_notfall.php"><i class="far fa-fw fa-exclamation-triangle"></i>Hilfe im Notfall</a></li>\n                                    <li><a href="/de/impressum.shtml"><i class="far fa-comments"></i>Impressum</a></li>\n                                    <li><a href="/cenide/datenschutzerklaerung.php"><i class="far fa-user-shield"></i>Datenschutz</a></li>\n\n                                </ul>\n                            </nav>\n                            <div id="footerCopyright" class="navbar">\n                                <ul class="nav">\n                                    <li>&copy;&nbsp;UDE</li>\n                                    <li>Letzte Änderung:&nbsp;30.10.2020</li>\n                                </ul>\n                            </div>\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </footer>\n        <script src="/portal/4/js/complete.js?20230531081547"></script>\n    </body>\n</html>\n', \
                                     parser=parser)

root = root.find("body")
for not_wanted in ['header', 'nav', 'footer', 'script', 'pre', 'code', 'canvas', "dialog", "iframe", "noscript", "source", "svg", "video", "track", "style", "link"]:
        for elem in root.findall(".//" + not_wanted):
            elem.drop_tree()

AttributeError: 'lxml.etree._Element' object has no attribute 'drop_tree'

In [10]:
import requests
import lxml.html
from lxml import etree

req = requests.get("https://www.uni-due.de/zmb/members/index.php")

root = lxml.html.document_fromstring(req.text)

for not_wanted in ['header', 'nav', 'footer', 'script', 'pre', 'code', 'canvas', "dialog", "iframe", "noscript", "source", "svg", "video", "track", "style", "link"]:
    for elem in root.findall(".//" + not_wanted):
        elem.drop_tree()
for drop in ["strong", "i", "emph", "span"]:
    for elem in root.findall(".//" + drop):
        elem.text=str(elem.text) + " "
        elem.drop_tag()

print(etree.tostring(root, pretty_print=True).decode("utf-8"))

<html lang="en">
    <head>
        <meta name="GENERATOR" content="IMPERIA 9.2.14"/>

        <meta charset="utf-8"/>
        <title>ZMB Members</title>
        <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"/>
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        <meta name="msapplication-TileColor" content="#ffffff"/>
        <meta name="msapplication-TileImage" content="/ms-icon-144x144.png"/>
        <meta name="theme-color" content="#ffffff"/>
        <meta name="keywords" content="ZMB members, ZMB member list, ZMB members overview"/>
        <meta name="description" content="List of all Members of the Center of Medical Biotechnology (ZMB) at the University of Duisburg-Essen"/>
        <meta name="robots" content="index,follow"/>
        <meta name="rubrik" content="Zentrum f&#252;r Medizinische Biotechnologie (/zmb) 2007 ... Me

In [3]:
import requests
import lxml.html
from lxml import etree

req = requests.get("https://www.gmg.ruhr-uni-bochum.de/ueber-uns/mitarbeitende")

root = lxml.html.document_fromstring(req.text)

# for not_wanted in ['header', 'nav', 'footer', 'script', 'pre', 'code', 'canvas', "dialog", "iframe", "noscript", "source", "svg", "video", "track", "style", "link"]:
#     for elem in root.findall(".//" + not_wanted):
#         elem.drop_tree()

print(etree.tostring(root, pretty_print=True).decode("utf-8"))

<html lang="de" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/  dc: http://purl.org/dc/terms/  foaf: http://xmlns.com/foaf/0.1/  og: http://ogp.me/ns#  rdfs: http://www.w3.org/2000/01/rdf-schema#  schema: http://schema.org/  sioc: http://rdfs.org/sioc/ns#  sioct: http://rdfs.org/sioc/types#  skos: http://www.w3.org/2004/02/skos/core#  xsd: http://www.w3.org/2001/XMLSchema# ">
  <head>
    <meta charset="utf-8"/>
<link rel="canonical" href="https://www.gmg.ruhr-uni-bochum.de/ueber-uns/mitarbeitende"/>
<meta name="Generator" content="Drupal 9 (https://www.drupal.org)"/>
<meta name="MobileOptimized" content="width"/>
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"/>
<meta http-equiv="x-ua-compatible" content="ie=edge"/>
<link rel="shortcut icon" href="/themes/custom/rub_theme/favicon.ico" type="image/vnd.microsoft.icon"/>
<link rel="alternate" hreflang="de" href="https://www.gmg.ruhr-