In [1]:
import nltk
from nltk import word_tokenize, ngrams
from nltk.corpus import stopwords
from collections import Counter
import string

def preprocess_text(text):
    """Preprocess text by tokenizing and cleaning punctuation"""
    tokens = word_tokenize(text.lower())
    # Remove punctuation and single-character tokens
    tokens = [token for token in tokens if token not in string.punctuation and len(token) > 1]
    return tokens

def generate_filtered_ngrams(tokens, n, stop_words):
    """Generate n-grams and filter out those containing stop words"""
    n_grams = ngrams(tokens, n)
    # Filter n-grams that don't contain any stop words
    filtered = [gram for gram in n_grams if not any(word in stop_words for word in gram)]
    return filtered

def analyze_ngrams(text):
    """Main function to analyze n-grams in text"""
    # Download required NLTK resources if not already present
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    
    # Get English stop words
    stop_words = set(stopwords.words('english'))
    
    # Preprocess text
    tokens = preprocess_text(text)
    
    # Generate and filter bi-grams and tri-grams
    bigrams = generate_filtered_ngrams(tokens, 2, stop_words)
    trigrams = generate_filtered_ngrams(tokens, 3, stop_words)
    
    # Count frequencies
    bigram_counts = Counter(bigrams)
    trigram_counts = Counter(trigrams)
    
    # Get top n-grams
    top_bigrams = bigram_counts.most_common(3)
    top_trigrams = trigram_counts.most_common(2)
    
    return top_bigrams, top_trigrams

# Sample text
text = "The government announced new policies on climate change. Climate change policies will impact industries significantly. Industries must adapt to new regulations."

# Perform analysis
top_bigrams, top_trigrams = analyze_ngrams(text)

# Display results
print("Top 3 Bi-grams:")
for bigram, count in top_bigrams:
    print(f"{' '.join(bigram)}: {count}")
    
print("\nTop 2 Tri-grams:")
for trigram, count in top_trigrams:
    print(f"{' '.join(trigram)}: {count}")

# Analysis explanation
analysis = """
Analysis:
The most frequent meaningful phrases like 'climate change' and 'new policies' capture the key topics 
of the article. These n-grams can form the basis of a summary by highlighting the main subjects 
('climate change policies') and their effects ('impact industries significantly'). Filtering out stop 
words ensures we focus on content-bearing terms that truly represent the article's meaning.
"""
print(analysis)

Top 3 Bi-grams:
climate change: 2
government announced: 1
announced new: 1

Top 2 Tri-grams:
government announced new: 1
announced new policies: 1

Analysis:
The most frequent meaningful phrases like 'climate change' and 'new policies' capture the key topics 
of the article. These n-grams can form the basis of a summary by highlighting the main subjects 
('climate change policies') and their effects ('impact industries significantly'). Filtering out stop 
words ensures we focus on content-bearing terms that truly represent the article's meaning.

