In [5]:
import re
import string
import pandas as pd
from collections import Counter
import emoji
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Sample posts (replace this with your scraped data)
posts = [
    {"title": "Discover the Economic Highs and Potential Risk Ahead: Economic and Market Review", "upvotes": 1, "comments": 0},
    {"title": "Breaking News: Cryptocurrency is Booming, Is It Time to Invest?", "upvotes": 10, "comments": 2},
    {"title": "Exciting New Tech Trends in 2025!", "upvotes": 5, "comments": 1},
    {"title": "Economic Crisis or Booming Economy? What's Next?", "upvotes": 8, "comments": 5},
    {"title": "How to Make a Difference in the World with AI #AI #Technology", "upvotes": 20, "comments": 10}
]

# Extract titles
titles = [post['title'] for post in posts]

# Step 1: Text Structure Analysis
def text_structure_analysis(titles):
    # Get the length of each post title
    title_lengths = [len(title.split()) for title in titles]
    avg_length = sum(title_lengths) / len(title_lengths)
    max_length = max(title_lengths)
    min_length = min(title_lengths)
    
    print(f"Average Length of Titles: {avg_length} words")
    print(f"Maximum Length of Title: {max_length} words")
    print(f"Minimum Length of Title: {min_length} words")








In [7]:
# Step 2: Identify Common Keywords and Hashtags
def identify_keywords_hashtags(titles):
    # Extract words (remove punctuation and convert to lowercase)
    words = ' '.join(titles).lower()
    words = words.translate(str.maketrans('', '', string.punctuation))
    word_list = words.split()
    
    # Filter out common stopwords
    stopwords = set([
        'the', 'and', 'is', 'in', 'to', 'it', 'of', 'with', 'for', 'on', 'a', 'an', 'at', 'by', 'as', 'from'
    ])
    filtered_words = [word for word in word_list if word not in stopwords]
    
    # Most common words
    word_count = Counter(filtered_words)
    common_words = word_count.most_common(10)
    
    # Extract hashtags
    hashtags = re.findall(r'#\w+', ' '.join(titles))
    
    print("Most Common Words (excluding stopwords):", common_words)
    print("Hashtags found:", hashtags)

In [None]:
# Step 3: Basic Sentiment Trends
def sentiment_analysis(titles):
    # Initialize VADER Sentiment Analyzer
    analyzer = SentimentIntensityAnalyzer()
    
    sentiments = []
    for title in titles:
        sentiment_score = analyzer.polarity_scores(title)
        sentiments.append(sentiment_score['compound'])
    
    avg_sentiment = sum(sentiments) / len(sentiments)
    print(f"Average Sentiment Score: {avg_sentiment}")

In [None]:
# Step 4: Detect Language Issues, Emoji Usage, and Text Length Variations
def detect_issues(titles):
    # Detect Emojis
    emojis_in_titles = [emoji.demojize(title) for title in titles if emoji.emoji_count(title) > 0]
    
    # Text length variation
    length_variation = [len(title) for title in titles]
    avg_title_length = sum(length_variation) / len(length_variation)
    max_title_length = max(length_variation)
    min_title_length = min(length_variation)
    
    print(f"Titles containing emojis: {emojis_in_titles}")
    print(f"Average Title Length: {avg_title_length} characters")
    print(f"Maximum Title Length: {max_title_length} characters")
    print(f"Minimum Title Length: {min_title_length} characters")

# Run the analysis
print("Step 1: Text Structure Analysis")
text_structure_analysis(titles)
print("\nStep 2: Common Keywords and Hashtags")
identify_keywords_hashtags(titles)
print("\nStep 3: Sentiment Analysis")
sentiment_analysis(titles)
print("\nStep 4: Language Issues, Emoji Usage, and Length Variations")
detect_issues(titles)

Step 1: Text Structure Analysis
Average Length of Titles: 9.4 words
Maximum Length of Title: 12 words
Minimum Length of Title: 6 words

Step 2: Common Keywords and Hashtags
Most Common Words (excluding stopwords): [('economic', 3), ('booming', 2), ('ai', 2), ('discover', 1), ('highs', 1), ('potential', 1), ('risk', 1), ('ahead', 1), ('market', 1), ('review', 1)]
Hashtags found: ['#AI', '#Technology']

Step 3: Sentiment Analysis
Average Sentiment Score: -0.07966

Step 4: Language Issues, Emoji Usage, and Length Variations
Titles containing emojis: []
Average Title Length: 57.0 characters
Maximum Title Length: 80 characters
Minimum Title Length: 33 characters
