In [None]:
from textblob import TextBlob
import pandas as pd

print("TextBlob 1-Hour Complete Tutorial")
print("=" * 40)


TextBlob 1-Hour Complete Tutorial


In [None]:
# =============================================================================
# SECTION 1: BASICS & TEXT CREATION (10 minutes)
# =============================================================================

print("\n1. CREATING TEXTBLOB OBJECTS")
print("-" * 30)

# Creating TextBlob objects
text1 = "Hello world! This is a simple example."
blob1 = TextBlob(text1)

text2 = """Natural language processing is amazing!
TextBlob makes it very easy to analyze text.
We can perform sentiment analysis, extract nouns, and much more."""
blob2 = TextBlob(text2)

print(f"Original text: {text1}")
print(f"TextBlob object: {blob1}")
print(f"Type: {type(blob1)}")


1. CREATING TEXTBLOB OBJECTS
------------------------------
Original text: Hello world! This is a simple example.
TextBlob object: Hello world! This is a simple example.
Type: <class 'textblob.blob.TextBlob'>


In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
import nltk
nltk.download('brown')

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


True

In [None]:
# =============================================================================
# SECTION 2: BASIC TEXT OPERATIONS (10 minutes)
# =============================================================================

print("\n\n2. BASIC TEXT OPERATIONS")
print("-" * 30)

sample_text = "The quick brown fox jumps over the lazy dog. This is another sentence for testing!"
blob = TextBlob(sample_text)

# Accessing words
print("Words:", blob.words)
print("Number of words:", len(blob.words))

# Accessing sentences
print("\nSentences:")
for i, sentence in enumerate(blob.sentences, 1):
    print(f"  {i}: {sentence}")

# Noun phrases
print("\nNoun phrases:", blob.noun_phrases)

# Word frequency
print("\nWord frequencies:")
word_counts = blob.word_counts
for word, count in list(word_counts.items())[:5]:  # Show first 5
    print(f"  '{word}': {count}")



2. BASIC TEXT OPERATIONS
------------------------------
Words: ['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', 'This', 'is', 'another', 'sentence', 'for', 'testing']
Number of words: 15

Sentences:
  1: The quick brown fox jumps over the lazy dog.
  2: This is another sentence for testing!

Noun phrases: ['quick brown fox jumps', 'lazy dog']

Word frequencies:
  'the': 2
  'quick': 1
  'brown': 1
  'fox': 1
  'jumps': 1


In [None]:

# =============================================================================
# SECTION 3: SENTIMENT ANALYSIS (10 minutes)
# =============================================================================
#https://github.com/sloria/TextBlob/blob/dev/src/textblob/en/en-sentiment.xml


print("\n\n3. SENTIMENT ANALYSIS")
print("-" * 30)

# Sample texts with different sentiments
texts = [
    "I love this product! It's absolutely amazing and wonderful!",
    "This movie is terrible. I hate it so much. Worst experience ever.",
    "The weather is okay today. Nothing special about it.",
    "Python is a great programming language for data science.",
    "I'm feeling sad and disappointed about the results.",
    "The class is very boring",
    "The feedback is very good "
]

print("Sentiment Analysis Results:")
print("Polarity: -1 (negative) to +1 (positive)")
print("Subjectivity: 0 (objective) to 1 (subjective)")
print()

for text in texts:
    blob = TextBlob(text)
    sentiment = blob.sentiment

    # Classify sentiment
    if sentiment.polarity > 0.1:
        mood = "Positive"
    elif sentiment.polarity < -0.1:
        mood = "Negative"
    else:
        mood = "Neutral"

    print(f"Text: {text[:50]}...")
    print(f"  Polarity: {sentiment.polarity:.3f} ({mood})")
    print(f"  Subjectivity: {sentiment.subjectivity:.3f}")
    print()




3. SENTIMENT ANALYSIS
------------------------------
Sentiment Analysis Results:
Polarity: -1 (negative) to +1 (positive)
Subjectivity: 0 (objective) to 1 (subjective)

Text: I love this product! It's absolutely amazing and w...
  Polarity: 0.742 (Positive)
  Subjectivity: 0.833

Text: This movie is terrible. I hate it so much. Worst e...
  Polarity: -0.933 (Negative)
  Subjectivity: 0.967

Text: The weather is okay today. Nothing special about i...
  Polarity: 0.429 (Positive)
  Subjectivity: 0.536

Text: Python is a great programming language for data sc...
  Polarity: 0.800 (Positive)
  Subjectivity: 0.750

Text: I'm feeling sad and disappointed about the results...
  Polarity: -0.625 (Negative)
  Subjectivity: 0.875

Text: The class is very boring...
  Polarity: -1.000 (Negative)
  Subjectivity: 1.000

Text: The feedback is very good ...
  Polarity: 0.910 (Positive)
  Subjectivity: 0.780



In [None]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [None]:
# =============================================================================
# SECTION 4: PART-OF-SPEECH TAGGING (8 minutes)
# =============================================================================

print("\n4. PART-OF-SPEECH TAGGING")
print("-" * 30)

text = "The beautiful sunset painted the sky with vibrant colors yesterday."
blob = TextBlob(text)

print(f"Text: {text}")
print("\nPart-of-Speech Tags:")

# Common POS tags explanation
pos_meanings = {
    'DT': 'Determiner', 'JJ': 'Adjective', 'NN': 'Noun', 'NNS': 'Plural Noun',
    'VBD': 'Past Tense Verb', 'VBN': 'Past Participle', 'IN': 'Preposition',
    'CC': 'Conjunction', 'RB': 'Adverb', 'PRP': 'Pronoun'
}

for word, tag in blob.tags:
    meaning = pos_meanings.get(tag, 'Other')
    print(f"  {word:<12} -> {tag:<4} ({meaning})")

# Extract specific parts of speech
nouns = [word for word, tag in blob.tags if tag.startswith('NN')]
adjectives = [word for word, tag in blob.tags if tag.startswith('JJ')]
verbs = [word for word, tag in blob.tags if tag.startswith('VB')]

print(f"\nExtracted parts:")
print(f"  Nouns: {nouns}")
print(f"  Adjectives: {adjectives}")
print(f"  Verbs: {verbs}")



4. PART-OF-SPEECH TAGGING
------------------------------
Text: The beautiful sunset painted the sky with vibrant colors yesterday.

Part-of-Speech Tags:
  The          -> DT   (Determiner)
  beautiful    -> JJ   (Adjective)
  sunset       -> NN   (Noun)
  painted      -> VBD  (Past Tense Verb)
  the          -> DT   (Determiner)
  sky          -> NN   (Noun)
  with         -> IN   (Preposition)
  vibrant      -> JJ   (Adjective)
  colors       -> NNS  (Plural Noun)
  yesterday    -> NN   (Noun)

Extracted parts:
  Nouns: ['sunset', 'sky', 'colors', 'yesterday']
  Adjectives: ['beautiful', 'vibrant']
  Verbs: ['painted']


In [None]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# =============================================================================
# SECTION 5: TEXT NORMALIZATION & TRANSFORMATION (8 minutes)
# =============================================================================

print("\n\n5. TEXT NORMALIZATION & TRANSFORMATION")
print("-" * 30)

text = "The CATS are running quickly through the gardens"
blob = TextBlob(text)

print(f"Original: {text}")

# Case transformations
print(f"Upper: {blob.upper()}")
print(f"Lower: {blob.lower()}")
print(f"Title: {blob.title()}")

# Word transformations
words = blob.words
print(f"\nOriginal words: {list(words)}")

# Singularization and Pluralization
print("\nSingular/Plural transformations:")
test_words = ['cats', 'running', 'gardens', 'child', 'mouse']
for word in test_words:
    w = TextBlob(word).words[0]
    print(f"  {word:<8} -> singular: {w.singularize():<8} | plural: {w.pluralize()}")

# Lemmatization (getting root form)
print("\nLemmatization:")
lemma_words = ['running', 'ran', 'better', 'dancing', 'wolves']
for word in lemma_words:
    w = TextBlob(word).words[0]
    print(f"  {word:<8} -> lemma: {w.lemmatize()}")



5. TEXT NORMALIZATION & TRANSFORMATION
------------------------------
Original: The CATS are running quickly through the gardens
Upper: THE CATS ARE RUNNING QUICKLY THROUGH THE GARDENS
Lower: the cats are running quickly through the gardens
Title: The Cats Are Running Quickly Through The Gardens

Original words: ['The', 'CATS', 'are', 'running', 'quickly', 'through', 'the', 'gardens']

Singular/Plural transformations:
  cats     -> singular: cat      | plural: catss
  running  -> singular: running  | plural: runnings
  gardens  -> singular: garden   | plural: gardenss
  child    -> singular: child    | plural: children
  mouse    -> singular: mouse    | plural: mice

Lemmatization:
  running  -> lemma: running
  ran      -> lemma: ran
  better   -> lemma: better
  dancing  -> lemma: dancing
  wolves   -> lemma: wolf


In [None]:
# =============================================================================
# SECTION 6: SPELLING CORRECTION (5 minutes)
# =============================================================================

print("\n\n6. SPELLING CORRECTION")
print("-" * 30)

# Text with spelling errors
incorrect_text = "I havv goood speling but somtimes I maek mistaks"
blob = TextBlob(incorrect_text)

print(f"Original: {incorrect_text}")
print(f"Corrected: {blob.correct()}")

# Individual word correction
wrong_words = ['speling', 'mistaks', 'recieve', 'definately', 'occured']
print("\nIndividual word corrections:")
for word in wrong_words:
    corrected = TextBlob(word).correct()
    print(f"  {word:<12} -> {corrected}")




6. SPELLING CORRECTION
------------------------------
Original: I havv goood speling but somtimes I maek mistaks
Corrected: I have good spelling but sometimes I make mistake

Individual word corrections:
  speling      -> spelling
  mistaks      -> mistake
  recieve      -> receive
  definately   -> definitely
  occured      -> occurred


In [None]:
# =============================================================================
# SECTION 7: N-GRAMS & LANGUAGE DETECTION (5 minutes)
# =============================================================================

print("\n\n7. N-GRAMS & LANGUAGE DETECTION")
print("-" * 30)

text = "Natural language processing with Python is powerful and useful"
blob = TextBlob(text)

# N-grams
print("Bigrams (2-grams):")
for bigram in blob.ngrams(n=2):
    print(f"  {' '.join(bigram)}")

print("\nTrigrams (3-grams):")
for trigram in blob.ngrams(n=3)[:5]:  # Show first 5
    print(f"  {' '.join(trigram)}")

# Language detection
print("\nLanguage Detection:")
texts_diff_langs = [
    "Hello, how are you?",
    "Bonjour, comment allez-vous?",
    "Hola, ¿cómo estás?",
    "Hallo, wie geht es dir?",
    "Привет, как дела?"
]

for text in texts_diff_langs:
    try:
        detected_lang = TextBlob(text).detect_language()
        print(f"  '{text}' -> {detected_lang}")
    except:
        print(f"  '{text}' -> Could not detect")




7. N-GRAMS & LANGUAGE DETECTION
------------------------------
Bigrams (2-grams):
  Natural language
  language processing
  processing with
  with Python
  Python is
  is powerful
  powerful and
  and useful

Trigrams (3-grams):
  Natural language processing
  language processing with
  processing with Python
  with Python is
  Python is powerful

Language Detection:
  'Hello, how are you?' -> Could not detect
  'Bonjour, comment allez-vous?' -> Could not detect
  'Hola, ¿cómo estás?' -> Could not detect
  'Hallo, wie geht es dir?' -> Could not detect
  'Привет, как дела?' -> Could not detect


In [None]:
# =============================================================================
# SECTION 8: PRACTICAL EXAMPLES & USE CASES (9 minutes)
# =============================================================================

print("\n\n8. PRACTICAL EXAMPLES")
print("-" * 30)

# Example 1: Analyzing customer reviews
print("Example 1: Customer Review Analysis")
reviews = [
    "This product is absolutely fantastic! Best purchase ever!",
    "Terrible quality. Broke after one day. Don't buy this.",
    "It's okay, nothing special but does the job.",
    "Amazing customer service and great product quality!",
    "Worst experience ever. Very disappointed."
]

review_analysis = []
for i, review in enumerate(reviews, 1):
    blob = TextBlob(review)
    sentiment = blob.sentiment.polarity

    if sentiment > 0.1:
        rating = "⭐⭐⭐⭐⭐" if sentiment > 0.5 else "⭐⭐⭐⭐"
    elif sentiment < -0.1:
        rating = "⭐⭐" if sentiment > -0.5 else "⭐"
    else:
        rating = "⭐⭐⭐"

    review_analysis.append({
        'review': review[:40] + "...",
        'sentiment': sentiment,
        'rating': rating
    })

print("\nReview Analysis Results:")
for analysis in review_analysis:
    print(f"  {analysis['rating']} ({analysis['sentiment']:+.2f}) - {analysis['review']}")

# Example 2: Text preprocessing pipeline
print("\n\nExample 2: Text Preprocessing Pipeline")

def preprocess_text(text):
    """Complete text preprocessing pipeline"""
    blob = TextBlob(text)

    # Step 1: Basic cleaning
    cleaned = blob.lower()

    # Step 2: Spelling correction
    corrected = cleaned.correct()

    # Step 3: Extract meaningful words (nouns and adjectives)
    meaningful_words = [word for word, tag in corrected.tags
                       if tag.startswith(('NN', 'JJ', 'VB'))]

    # Step 4: Lemmatization
    lemmatized = [TextBlob(word).words[0].lemmatize() for word in meaningful_words]

    return {
        'original': text,
        'cleaned': str(cleaned),
        'corrected': str(corrected),
        'meaningful_words': meaningful_words,
        'lemmatized': lemmatized,
        'sentiment': corrected.sentiment.polarity
    }

# Test the pipeline
sample_texts = [
    "The cats are running quickly through the beautiful gardens!",
    "I absolutley love this amazng product. It's fantasic!",
    "This experiance was terribel and I'm very disapointed."
]

print("Text Preprocessing Results:")
for text in sample_texts:
    result = preprocess_text(text)
    print(f"\nOriginal: {result['original']}")
    print(f"Processed: {' '.join(result['lemmatized'])}")
    print(f"Sentiment: {result['sentiment']:+.3f}")



8. PRACTICAL EXAMPLES
------------------------------
Example 1: Customer Review Analysis

Review Analysis Results:
  ⭐⭐⭐⭐⭐ (+0.75) - This product is absolutely fantastic! Be...
  ⭐ (-1.00) - Terrible quality. Broke after one day. D...
  ⭐⭐⭐⭐ (+0.43) - It's okay, nothing special but does the ...
  ⭐⭐⭐⭐⭐ (+0.80) - Amazing customer service and great produ...
  ⭐ (-0.99) - Worst experience ever. Very disappointed...


Example 2: Text Preprocessing Pipeline
Text Preprocessing Results:

Original: The cats are running quickly through the beautiful gardens!
Processed: cat are running beautiful garden
Sentiment: +0.667

Original: I absolutley love this amazng product. It's fantasic!
Processed: i love amazing product 's fantastic
Sentiment: +0.533

Original: This experiance was terribel and I'm very disapointed.
Processed: experience wa terrible i 'm disappointed
Sentiment: -0.988


In [None]:
# =============================================================================
# SECTION 9: INTEGRATION WITH PANDAS (5 minutes)
# =============================================================================

print("\n\n9. INTEGRATION WITH PANDAS")
print("-" * 30)

# Create sample dataset
data = {
    'text': [
        "I love this new smartphone! Great battery life.",
        "The camera quality is poor. Very disappointed.",
        "Good value for money. Decent performance overall.",
        "Excellent build quality and fast processing speed.",
        "Battery dies too quickly. Not worth the price."
    ],
    'product': ['Phone A', 'Phone B', 'Phone C', 'Phone D', 'Phone E']
}

df = pd.DataFrame(data)

# Apply TextBlob operations to pandas DataFrame
df['sentiment'] = df['text'].apply(lambda x: TextBlob(x).sentiment.polarity)
df['subjectivity'] = df['text'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
df['word_count'] = df['text'].apply(lambda x: len(TextBlob(x).words))
df['noun_phrases'] = df['text'].apply(lambda x: list(TextBlob(x).noun_phrases))

print("DataFrame with TextBlob Analysis:")
print(df[['product', 'sentiment', 'word_count']].round(3))

# Summary statistics
print(f"\nSummary Statistics:")
print(f"Average sentiment: {df['sentiment'].mean():.3f}")
print(f"Most positive review: {df.loc[df['sentiment'].idxmax(), 'product']}")
print(f"Most negative review: {df.loc[df['sentiment'].idxmin(), 'product']}")




9. INTEGRATION WITH PANDAS
------------------------------
DataFrame with TextBlob Analysis:
   product  sentiment  word_count
0  Phone A      0.490           8
1  Phone B     -0.688           7
2  Phone C      0.289           7
3  Phone D      0.600           7
4  Phone E     -0.150           8

Summary Statistics:
Average sentiment: 0.108
Most positive review: Phone D
Most negative review: Phone B


In [None]:
# =============================================================================
# SECTION 10: TIPS & BEST PRACTICES (5 minutes)
# =============================================================================

print("\n\n10. TIPS & BEST PRACTICES")
print("-" * 30)

tips = [
    "1. Performance: TextBlob is slower than spaCy for large datasets",
    "2. Accuracy: Built-in sentiment analysis works well for general text but may need training for domain-specific content",
    "3. Language: Works best with English; limited support for other languages",
    "4. Spelling correction: Can be slow and may not always be accurate",
    "5. Memory: Process large texts in chunks to avoid memory issues",
    "6. Preprocessing: Always clean your text before analysis",
    "7. Validation: Always validate sentiment results with sample data",
    "8. Alternatives: Consider NLTK, spaCy, or transformers for production use"
]

for tip in tips:
    print(f"  {tip}")

# Final example: Complete text analysis function
def complete_text_analysis(text):
    """Comprehensive text analysis using TextBlob"""
    blob = TextBlob(text)

    analysis = {
        'text': text,
        'word_count': len(blob.words),
        'sentence_count': len(blob.sentences),
        'sentiment_polarity': blob.sentiment.polarity,
        'sentiment_subjectivity': blob.sentiment.subjectivity,
        'noun_phrases': list(blob.noun_phrases),
        #'most_common_words': blob.word_counts.most_common(3),
        'language': 'en'  # Default, as detection can be unreliable
    }

    # Sentiment classification
    if analysis['sentiment_polarity'] > 0.1:
        analysis['sentiment_label'] = 'Positive'
    elif analysis['sentiment_polarity'] < -0.1:
        analysis['sentiment_label'] = 'Negative'
    else:
        analysis['sentiment_label'] = 'Neutral'

    return analysis

# Test the complete analysis
test_text = """
TextBlob is a Python library for processing textual data. It provides a simple API
for diving into common natural language processing tasks such as part-of-speech tagging,
noun phrase extraction, sentiment analysis, classification, translation, and more.
The library is built on top of NLTK and pattern, making it easy to use for beginners.
"""

final_analysis = complete_text_analysis(test_text)

print(f"\n\nCOMPLETE TEXT ANALYSIS EXAMPLE:")
print(f"Text length: {len(final_analysis['text'])} characters")
print(f"Words: {final_analysis['word_count']}")
print(f"Sentences: {final_analysis['sentence_count']}")
print(f"Sentiment: {final_analysis['sentiment_label']} ({final_analysis['sentiment_polarity']:+.3f})")
print(f"Subjectivity: {final_analysis['sentiment_subjectivity']:.3f}")
print(f"Key phrases: {final_analysis['noun_phrases'][:3]}")
#print(f"Common words: {final_analysis['most_common_words']}")

print(f"\n{'='*50}")
print("TEXTBLOB TUTORIAL COMPLETED! 🎉")
print("You've learned all the essential TextBlob features.")
print("Practice with your own text data to master these concepts!")
print(f"{'='*50}")



10. TIPS & BEST PRACTICES
------------------------------
  1. Performance: TextBlob is slower than spaCy for large datasets
  2. Accuracy: Built-in sentiment analysis works well for general text but may need training for domain-specific content
  3. Language: Works best with English; limited support for other languages
  4. Spelling correction: Can be slow and may not always be accurate
  5. Memory: Process large texts in chunks to avoid memory issues
  6. Preprocessing: Always clean your text before analysis
  7. Validation: Always validate sentiment results with sample data
  8. Alternatives: Consider NLTK, spaCy, or transformers for production use


COMPLETE TEXT ANALYSIS EXAMPLE:
Text length: 344 characters
Words: 52
Sentences: 3
Sentiment: Positive (+0.176)
Subjectivity: 0.513
Key phrases: ['textblob', 'python', 'processing textual data']

TEXTBLOB TUTORIAL COMPLETED! 🎉
You've learned all the essential TextBlob features.
Practice with your own text data to master these concepts!