1. Data Collection & Preprocessing

In [4]:
!pip install textblob
!pip install requests

import requests
import pandas as pd
from textblob import TextBlob
import time

# 1. Data Collection

def fetch_news():
    url = "https://newsapi.org/v2/everything?q=crypto&apiKey=YOUR_API_KEY"
    response = requests.get(url)
    data = response.json()
    return data['articles'] if 'articles' in data else []

def fetch_market_data():
    url = "https://api.coingecko.com/api/v3/coins/markets"
    params = {"vs_currency": "usd", "order": "market_cap_desc", "per_page": 10, "page": 1}
    response = requests.get(url, params=params)
    return response.json()

# 2. Data Processing

def clean_text(text):
    return text.replace('\n', ' ').strip()

def sentiment_analysis(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

def process_news(articles):
    processed_data = []
    for article in articles:
        title = clean_text(article['title'])
        sentiment = sentiment_analysis(title)
        processed_data.append({
            'title': title,
            'source': article['source']['name'],
            'sentiment': sentiment
        })
    return pd.DataFrame(processed_data)

# 3. Weighting Based on Credibility
CREDIBILITY_SCORES = {
    "CoinTelegraph": 1.0,
    "CryptoPanic": 0.9,
    "Reddit": 0.6,
    "Twitter": 0.5,
    "Unknown": 0.3
}

def weight_news(df):
    df['weight'] = df['source'].apply(lambda x: CREDIBILITY_SCORES.get(x, 0.3))
    return df

# 4. Time-Series Alignment
def align_data(news_df, market_data):
    market_df = pd.DataFrame(market_data)[['id', 'current_price', 'last_updated']]
    market_df['last_updated'] = pd.to_datetime(market_df['last_updated'])
    return news_df, market_df

# Execution
if __name__ == "__main__":
    news_articles = fetch_news()
    news_df = process_news(news_articles)
    weighted_news_df = weight_news(news_df)
    market_data = fetch_market_data()
    news_df, market_df = align_data(weighted_news_df, market_data)
    
    print(news_df.head())
    print(market_df.head())

Defaulting to user installation because normal site-packages is not writeable
Collecting textblob
  Using cached textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB

KeyError: 'source'