# Natural Language Processing (NLP)

In [7]:
%%capture
import sys
!{sys.executable} -m pip install panda
!{sys.executable} -m pip install spacy
!{sys.executable} -m spacy download de
!{sys.executable} -m spacy download de_news_web_sm
!{sys.executable} -m spacy download en
!{sys.executable} -m spacy download en_core_web_sm

## Spacy / 1. Vers im Alten Testament (deutsch) / Dependency Parsing

In [8]:
import spacy

de_nlp = spacy.load('de')

de_doc = de_nlp(u'Am Anfang schuf Gott Himmel und Erde.')

spacy.displacy.render(de_doc, style='dep', jupyter=True)

## Spacy / 1. Vers im Alten Testament (deutsch) / Named Entity Recognition (NER)

In [9]:
import spacy

de_nlp = spacy.load('de')

de_doc = de_nlp(u'Am Anfang schuf Gott Himmel und Erde.')

spacy.displacy.render(de_doc, style='ent', jupyter=True)

## Spacy / 1. Vers im Alten Testament (deutsch) / Lemmatization + Stop Words

In [10]:
import pandas
import spacy
import tabulate

de_nlp = spacy.load('de')

de_doc = de_nlp(u'Am Anfang schuf Gott Himmel und Erde.')

df = pandas.DataFrame([[t.text, t.lemma_, t.pos_, t.is_stop] for t in de_doc],columns=['text', 'lemma', 'pos', 'stop'])

print(tabulate.tabulate(df, headers='keys', tablefmt='psql'))

+----+--------+----------+-------+--------+
|    | text   | lemma    | pos   | stop   |
|----+--------+----------+-------+--------|
|  0 | Am     | Am       | ADP   | False  |
|  1 | Anfang | Anfang   | NOUN  | False  |
|  2 | schuf  | schaffen | VERB  | False  |
|  3 | Gott   | Gott     | NOUN  | False  |
|  4 | Himmel | himmeln  | NOUN  | False  |
|  5 | und    | und      | CONJ  | True   |
|  6 | Erde   | erden    | NOUN  | False  |
|  7 | .      | .        | PUNCT | False  |
+----+--------+----------+-------+--------+


## Spacy / Machinenbau (englisch) / Dependency Parsing

In [11]:
import spacy

en_nlp = spacy.load('en_core_web_sm')

en_doc = en_nlp(u'\
Switchgear links generators, transporters, and consumers, such as motors and gears. \
')

options = {'compact': True, 'font': 'Source Sans Pro'}

spacy.displacy.render(en_doc, style='dep', options=options, jupyter=True)

In [12]:
import pandas
import tabulate

df = pandas.DataFrame(
    [[t.text, t.lemma_, t.pos_, t.is_stop] for t in en_doc],
    columns=['text', 'lemma', 'pos', 'stop'])

print(tabulate.tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-------------+-------+--------+
|    | text         | lemma       | pos   | stop   |
|----+--------------+-------------+-------+--------|
|  0 | Switchgear   | switchgear  | NOUN  | False  |
|  1 | links        | link        | VERB  | False  |
|  2 | generators   | generator   | NOUN  | False  |
|  3 | ,            | ,           | PUNCT | False  |
|  4 | transporters | transporter | NOUN  | False  |
|  5 | ,            | ,           | PUNCT | False  |
|  6 | and          | and         | CCONJ | True   |
|  7 | consumers    | consumer    | NOUN  | False  |
|  8 | ,            | ,           | PUNCT | False  |
|  9 | such         | such        | ADJ   | True   |
| 10 | as           | as          | ADP   | True   |
| 11 | motors       | motor       | NOUN  | False  |
| 12 | and          | and         | CCONJ | True   |
| 13 | gears        | gear        | NOUN  | False  |
| 14 | .            | .           | PUNCT | False  |
+----+--------------+-------------+-------+---