# Crypto News Sentiment Analysis

This notebook analyzes sentiment from crypto news using NLTK's VADER sentiment analyzer.

In [None]:
# Install required packages if not already installed
!pip install nltk requests pandas matplotlib seaborn

In [None]:
import nltk
# Download VADER lexicon if not already downloaded
nltk.download('vader_lexicon')

In [None]:
from dataclasses import dataclass
from datetime import datetime
import requests
from typing import List, Optional, Dict, Any
import json
from nltk.sentiment import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

: 

## Data Classes and RSS Feed Scraper

In [None]:
@dataclass
class RSSItem:
    """Data class to store RSS feed item information"""
    id: str
    url: str
    title: str
    content_text: str
    content_html: str
    image: Optional[str]
    published_date: Optional[datetime]
    authors: List[Dict[str, str]]
    attachments: List[Dict[str, str]]

class RSSFeedError(Exception):
    """Custom exception for RSS feed errors"""
    pass

class RSSFeedScraper:
    """Scrapes and processes RSS feed data"""
    def __init__(self, feed_url: str = "https://rss.app/feeds/v1.1/t3OljJfE1OVl9TMq.json"):
        self.feed_url = feed_url

    def fetch_feed(self) -> List[RSSItem]:
        """Fetch and parse RSS feed data"""
        try:
            response = requests.get(self.feed_url)
            response.raise_for_status()
            feed_data = response.json()
            
            return self._parse_items(feed_data.get('items', []))
        except Exception as e:
            raise RSSFeedError(f"Failed to fetch RSS feed: {str(e)}")

    def _parse_date(self, date_str: str) -> Optional[datetime]:
        """Parse date string with multiple format attempts"""
        if not date_str:
            return None
            
        date_formats = [
            '%Y-%m-%dT%H:%M:%S.%fZ',  # Standard ISO format with microseconds
            '%Y-%m-%dT%H:%M:%SZ',     # ISO format without microseconds
            '%Y-%m-%d %H:%M:%S',      # Basic datetime format
            '%Y-%m-%d'                # Just date
        ]
        
        for date_format in date_formats:
            try:
                return datetime.strptime(date_str, date_format)
            except ValueError:
                continue
        return None

    def _parse_items(self, items: List[dict]) -> List[RSSItem]:
        """Parse RSS items into RSSItem objects"""
        parsed_items = []
        for item in items:
            published_date = self._parse_date(item.get('date_published'))
            parsed_items.append(
                RSSItem(
                    id=item.get('id', ''),
                    url=item.get('url', ''),
                    title=item.get('title', ''),
                    content_text=item.get('content_text', ''),
                    content_html=item.get('content_html', ''),
                    image=item.get('image'),
                    published_date=published_date,
                    authors=item.get('authors', []),
                    attachments=item.get('attachments', [])
                )
            )
        return parsed_items

## Sentiment Analysis Implementation

In [None]:
class RSSFeedSentimentAnalyzer:
    """Analyzes sentiment from RSS feed content"""
    def __init__(self, feed_url: str = "https://rss.app/feeds/v1.1/t3OljJfE1OVl9TMq.json"):
        self.scraper = RSSFeedScraper(feed_url)
        self.sia = SentimentIntensityAnalyzer()
    
    def get_sentiment(self) -> Dict[str, Any]:
        """Get sentiment analysis from RSS feed items"""
        try:
            # Fetch RSS items
            items = self.scraper.fetch_feed()
            
            if not items:
                return {
                    "value": 0.0,
                    "classification": "Neutral",
                    "interpretation": "No RSS items found",
                    "items_analyzed": 0,
                    "timestamp": datetime.now().isoformat()
                }
            
            # Calculate sentiment for each item
            sentiments = []
            for item in items:
                # Analyze both title and content
                title_scores = self.sia.polarity_scores(item.title)
                content_scores = self.sia.polarity_scores(item.content_text)
                
                # Average the compound scores (giving more weight to title)
                item_sentiment = (title_scores['compound'] * 0.6 + 
                                content_scores['compound'] * 0.4)
                sentiments.append(item_sentiment)
            
            # Calculate average sentiment
            avg_sentiment = sum(sentiments) / len(sentiments)
            
            # Get classification
            if avg_sentiment < -0.6:
                classification = "Extreme Fear"
            elif avg_sentiment < -0.2:
                classification = "Fear"
            elif avg_sentiment < 0.2:
                classification = "Neutral"
            elif avg_sentiment < 0.6:
                classification = "Greed"
            else:
                classification = "Extreme Greed"
            
            # Create interpretation
            interpretation = f"{classification} - RSS feed sentiment is "
            if avg_sentiment > 0:
                interpretation += "positive, showing optimistic market signals"
            elif avg_sentiment < 0:
                interpretation += "negative, showing pessimistic market signals"
            else:
                interpretation += "neutral, showing balanced market signals"
            
            return {
                "value": avg_sentiment,
                "classification": classification,
                "interpretation": interpretation,
                "items_analyzed": len(items),
                "latest_item_date": items[0].published_date.isoformat() if items[0].published_date else None,
                "timestamp": datetime.now().isoformat()
            }
            
        except RSSFeedError as e:
            return {
                "value": 0.0,
                "classification": "Error",
                "interpretation": f"Failed to analyze RSS feed: {str(e)}",
                "items_analyzed": 0,
                "timestamp": datetime.now().isoformat()
            }

## Analyze Crypto News Sentiment

In [None]:
# Initialize analyzer
analyzer = RSSFeedSentimentAnalyzer("https://rss.app/feeds/v1.1/t3OljJfE1OVl9TMq.json")

# Get sentiment analysis
sentiment = analyzer.get_sentiment()

# Display results
print(f"Crypto News Sentiment Analysis:")
print(f"Value: {sentiment['value']:.2f}")
print(f"Classification: {sentiment['classification']}")
print(f"Interpretation: {sentiment['interpretation']}")
print(f"Items Analyzed: {sentiment['items_analyzed']}")
if sentiment.get('latest_item_date'):
    print(f"Latest Item Date: {sentiment['latest_item_date']}")

## Create DataFrame for Detailed Analysis

In [None]:
# Get items and their sentiments
items = analyzer.scraper.fetch_feed()
sia = SentimentIntensityAnalyzer()

# Create list for DataFrame
data = []
for item in items:
    title_scores = sia.polarity_scores(item.title)
    content_scores = sia.polarity_scores(item.content_text)
    weighted_sentiment = title_scores['compound'] * 0.6 + content_scores['compound'] * 0.4
    
    data.append({
        'title': item.title,
        'published_date': item.published_date,
        'title_sentiment': title_scores['compound'],
        'content_sentiment': content_scores['compound'],
        'weighted_sentiment': weighted_sentiment,
        'url': item.url
    })

df = pd.DataFrame(data)
df = df.sort_values('published_date', ascending=False)

# Display the first few articles with formatted dates
display_df = df.copy()
display_df['published_date'] = display_df['published_date'].dt.strftime('%Y-%m-%d %H:%M')
display_df.head()

## Visualize Sentiment Distribution

In [None]:
plt.figure(figsize=(12, 6))

# Plot sentiment distributions
sns.boxplot(data=df[['title_sentiment', 'content_sentiment', 'weighted_sentiment']])
plt.title('Distribution of Crypto News Sentiment Scores')
plt.ylabel('Sentiment Score')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.show()

## Sentiment Over Time

In [None]:
plt.figure(figsize=(12, 6))

# Create the scatter plot
plt.scatter(df['published_date'], df['weighted_sentiment'], alpha=0.6)

# Customize the date format on x-axis
ax = plt.gca()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M'))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=3))  # Show every 3 hours

plt.title('Crypto News Sentiment Trend')
plt.ylabel('Weighted Sentiment')
plt.xlabel('Published Date')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()