In [1]:
import spacy
from spacy.lang.de import German
from textblob import TextBlob
from textstat import flesch_kincaid_grade, gunning_fog
from rake_nltk import Rake
import nltk

In [14]:
class GermanTextAnalyzer():
    def __init__(self, text) -> None:
        self.nlp = German()  # Load the German NLP pipeline
        self.nlp.add_pipe('sentencizer')
        self.doc = self.nlp(text)  # Process the text
        self.rake = Rake()  # Initialize RAKE for keyphrase extraction

    def analyze(self):
        results = {
            'word_count': self.word_count(self.doc),
            'sentence_count': self.sentence_count(self.doc),
            'avg_word_length': self.avg_word_length(self.doc),
            'fk_grade': self.flesch_kincaid_grade(self.doc),
            'gf_index': self.gunning_fog(self.doc),
            'sentence_complexity': self.sentence_complexity(self.doc),
            'lexical_diversity': self.lexical_diversity(self.doc),
            # 'entities': self.entities(self.doc),
            'sentiment': self.sentiment(self.doc),
            # 'keyphrases': self.keyphrases(self.doc)
        }
        return results

    def word_count(self, doc):
        return len(doc)
    
    def sentence_count(self, doc):
        return len(list(doc.sents))
    
    def avg_word_length(self, doc):
        word_count = len(doc)
        return sum(len(token.text) for token in doc) / word_count if word_count > 0 else 0
    
    def flesch_kincaid_grade(self, doc):
        return flesch_kincaid_grade(doc.text)
    
    def gunning_fog(self, doc):
        return gunning_fog(doc.text)
    
    def sentence_complexity(self, doc):
        sentence_count = len(list(doc.sents))
        return sum(len(sent) for sent in doc.sents) / sentence_count if sentence_count > 0 else 0
    
    def lexical_diversity(self, doc):
        word_count = len(doc)
        return len(set(token.text.lower() for token in doc)) / word_count if word_count > 0 else 0
    
    def sentiment(self, doc):
        blob = TextBlob(doc.text)
        polarity = blob.sentiment.polarity
        if polarity > 0:
            return "Positive"
        elif polarity < 0:
            return "Negative"
        else:
            return "Neutral"


In [15]:
try:
    # Attempt to access a stopword
    nltk.corpus.stopwords.words("german") 
except LookupError as e:
    # If stopwords aren't found, download them
    print(f"{e}\nDownloading stopwords now...")
    nltk.download("stopwords")
    print("Stopwords downloaded successfully. You can now proceed with your code.")

In [16]:
# Create an instance of the analyzer
analyzer = GermanTextAnalyzer("Dies ist ein Beispieltext auf Deutsch al amn.")

# Analyze the text and get the results
results = analyzer.analyze()

# Print the analysis results
print(results)


{'word_count': 9, 'sentence_count': 1, 'avg_word_length': 4.222222222222222, 'fk_grade': 0.5, 'gf_index': 3.2, 'sentence_complexity': 9.0, 'lexical_diversity': 1.0, 'sentiment': 'Neutral'}
