# Preprocessing von Text Daten

In [50]:
from textblob import TextBlob
from string import punctuation, ascii_lowercase, ascii_letters, ascii_uppercase
from nltk.stem import PorterStemmer, SnowballStemmer
from nltk.tokenize import WordPunctTokenizer

In [38]:
#!python -m textblob.download_corpora

## Daten laden

In [39]:
with open("output/quote_10.txt", "r", encoding="utf-8") as file:
    content = file.read()

In [40]:
content

"‚ÄúThis life ü•≥ is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most impo

## Vorbereiten

In [41]:
# Gro√ü und Kleinschreibung eliminieren
content = content.lower()

In [42]:
# Sonderzeichen entfernen
new_content = ""
for char in content:
    if char not in punctuation + "‚Äú" + "‚Äù":
        new_content = new_content + char

content = new_content

In [43]:
content

'this life ü•≥ is what you make it no matter what youre going to mess up sometimes its a universal truth but the good part is you get to decide how youre going to mess it up girls will be your friends  theyll act like it anyway but just remember some come some go the ones that stay with you through everything  theyre your true best friends dont let go of them also remember sisters make the best friends in the world as for lovers well theyll come and go too and baby i hate to say it most of them  actually pretty much all of them are going to break your heart but you cant give up because if you give up youll never find your soulmate youll never find that half who makes you whole and that goes for everything just because you fail once doesnt mean youre gonna fail at everything keep trying hold on and always always always believe in yourself because if you dont then who will sweetie so keep your head high keep your chin up and most importantly keep smiling because lifes a beautiful thing 

## Text Blob

In [44]:
blob = TextBlob(content)
blob.words

WordList(['this', 'life', 'ü•≥', 'is', 'what', 'you', 'make', 'it', 'no', 'matter', 'what', 'youre', 'going', 'to', 'mess', 'up', 'sometimes', 'its', 'a', 'universal', 'truth', 'but', 'the', 'good', 'part', 'is', 'you', 'get', 'to', 'decide', 'how', 'youre', 'going', 'to', 'mess', 'it', 'up', 'girls', 'will', 'be', 'your', 'friends', 'theyll', 'act', 'like', 'it', 'anyway', 'but', 'just', 'remember', 'some', 'come', 'some', 'go', 'the', 'ones', 'that', 'stay', 'with', 'you', 'through', 'everything', 'theyre', 'your', 'true', 'best', 'friends', 'dont', 'let', 'go', 'of', 'them', 'also', 'remember', 'sisters', 'make', 'the', 'best', 'friends', 'in', 'the', 'world', 'as', 'for', 'lovers', 'well', 'theyll', 'come', 'and', 'go', 'too', 'and', 'baby', 'i', 'hate', 'to', 'say', 'it', 'most', 'of', 'them', 'actually', 'pretty', 'much', 'all', 'of', 'them', 'are', 'going', 'to', 'break', 'your', 'heart', 'but', 'you', 'cant', 'give', 'up', 'because', 'if', 'you', 'give', 'up', 'youll', 'never'

In [45]:
lemmas = [word.lemmatize() for word in blob.words]

In [46]:
stemmer = PorterStemmer()
stemmer.stem("feet")
stemms = [stemmer.stem(word) for word in blob.words]

In [47]:
print(lemmas)
print(stemms)

['this', 'life', 'ü•≥', 'is', 'what', 'you', 'make', 'it', 'no', 'matter', 'what', 'youre', 'going', 'to', 'mess', 'up', 'sometimes', 'it', 'a', 'universal', 'truth', 'but', 'the', 'good', 'part', 'is', 'you', 'get', 'to', 'decide', 'how', 'youre', 'going', 'to', 'mess', 'it', 'up', 'girl', 'will', 'be', 'your', 'friend', 'theyll', 'act', 'like', 'it', 'anyway', 'but', 'just', 'remember', 'some', 'come', 'some', 'go', 'the', 'one', 'that', 'stay', 'with', 'you', 'through', 'everything', 'theyre', 'your', 'true', 'best', 'friend', 'dont', 'let', 'go', 'of', 'them', 'also', 'remember', 'sister', 'make', 'the', 'best', 'friend', 'in', 'the', 'world', 'a', 'for', 'lover', 'well', 'theyll', 'come', 'and', 'go', 'too', 'and', 'baby', 'i', 'hate', 'to', 'say', 'it', 'most', 'of', 'them', 'actually', 'pretty', 'much', 'all', 'of', 'them', 'are', 'going', 'to', 'break', 'your', 'heart', 'but', 'you', 'cant', 'give', 'up', 'because', 'if', 'you', 'give', 'up', 'youll', 'never', 'find', 'your', 