In [3]:
#Loading necessary packages
#pip install nltk==3.6.2 
import requests
import pandas as pd
import datetime
import time
import random
import nltk
from tqdm import tqdm
from finvader import finvader
from transformers import pipeline
from wordcloud import WordCloud
import matplotlib.pyplot as plt

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Korisnik\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import script

In [12]:
# Configuration
api_key = 'OTl2SM9_8xGEkqop_pj57cYyS4gjsurl'
news_url = "https://api.polygon.io/v2/reference/news"  

In [1]:
#user input
ticker = input("Enter the stock ticker symbol (e.g., BA): ").strip().upper()
if not ticker:
    print("No ticker entered. Try again.")
    exit()

date_choice = input(
    "Choose the date range by typing in a corresponding number:\n"
    "1. Last day\n"
    "2. Last week\n"
    "3. Last month\n"
    "Enter your choice (1, 2, or 3): ").strip()

In [5]:
today = datetime.date.today()

if date_choice == "1":
    start_date = (today - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    end_date = today.strftime("%Y-%m-%d")
elif date_choice == "2":
    start_date = (today - datetime.timedelta(days=7)).strftime("%Y-%m-%d")
    end_date = today.strftime("%Y-%m-%d")
elif date_choice == "3":
    start_date = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
    end_date = today.strftime("%Y-%m-%d")
else:
    print("Invalid choice. We will default to previous month.")
    start_date = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
    end_date = today.strftime("%Y-%m-%d")
    
print(f"Start Date: {start_date}, End Date: {end_date}")


Start Date: 2024-12-11, End Date: 2025-01-10


In [7]:
def generate_weekly_intervals(start_date, end_date):
    """
    Generates weekly date intervals within the specified date range.
    """
    intervals = []
    current_end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    current_start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    
    while current_end_date > current_start_date:
        week_start_date = max(current_start_date, current_end_date - datetime.timedelta(days=6))
        intervals.append((week_start_date.strftime("%Y-%m-%d"), current_end_date.strftime("%Y-%m-%d")))
        current_end_date -= datetime.timedelta(days=7)
    
    return intervals

In [None]:
all_news = []
fetch_intervals = generate_weekly_intervals(start_date, end_date)

In [None]:
#Below is code if we were not using dynamic dates but a fixed month and predefined list of stock tickers for which news is available
#tickers = ["JNJ", "JPM", "XOM", "AAPL","ABBV", "NKE","TSLA","MCD","BA"]
#start_date = "2024-11-30"
#end_date = "2024-12-31"
#today = datetime.date.today()
#start_date = (today - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
#end_date = today.strftime("%Y-%m-%d")

#all_news = []
# fetch news in weekly intervals
#fetch_intervals = generate_weekly_intervals(start_date, end_date)

In [10]:
def get_news(ticker, start_date, end_date, api_key, news_url, limit=100):
    """
    Fetch news for a ticker within a given date range.
    """
    params = {
        "ticker": ticker,
        "published_utc.gte": start_date,
        "published_utc.lte": end_date,
        "limit": limit,
        "apiKey": api_key
    }
    time.sleep(random.uniform(1, 2))  # Shorter sleep for testing/debugging
    
    response = requests.get(news_url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data.get("results", [])
    else:
        print(f"Failed to get news: {response.status_code} - {response.text}")
        return []

In [13]:
for week_start, week_end in tqdm(fetch_intervals, desc="Fetching news intervals"):
    try:
        weekly_news = get_news(ticker, week_start, week_end, api_key, news_url, limit=100)
        if not weekly_news:
            continue
        for article in weekly_news:
            article['ticker'] = ticker
        all_news.extend(weekly_news)
    except Exception as e:
        print(f"Error in accessing news for {ticker} from {week_start} to {week_end}: {e}")

Fetching news intervals: 100%|██████████| 5/5 [00:08<00:00,  1.61s/it]


In [None]:
news_df = pd.DataFrame(all_news)
news_df.head()

In [22]:
news_df = news_df.drop(columns=['id', 'image_url', 'author', 'amp_url'], errors='ignore')
news_df = news_df.drop_duplicates(subset=['title', 'published_utc', 'ticker'])
news_df = news_df.dropna(subset=['published_utc'])  # Drop rows with invalid dates

In [None]:
if news_df.empty:
    print("No news articles with valid dates available for the selected ticker and date range.")
else:
    print("News articles fetched and processed successfully.")

In [None]:
news_df = pd.DataFrame(all_news)
news_df = news_df.drop(columns=['id', 'image_url', 'author', 'amp_url'], errors='ignore')
news_df['published_utc'] = pd.to_datetime(news_df['published_utc'], errors='coerce')
#news_df['published_utc'] = news_df['published_utc'].dt.date
news_df = news_df.drop_duplicates(subset=['title', 'published_utc', 'ticker'])
news_df = news_df.dropna(subset=['published_utc'])  # Drop rows with invalid dates

In [None]:
news_df.head()

In [None]:
#FinVADER sentiment analysis
news_df['finvader_compound'] = news_df['title'].apply(analyze_sentiment_finvader)

In [None]:
daily_sentiment, weekly_sentiment, monthly_sentiment = aggregate_sentiment(news_df)

In [57]:
# Save to CSV
news_df.to_csv("news_data_with_finvader_sentiment.csv", index=False)
daily_sentiment.to_csv("daily_sentiment.csv", index=False)
weekly_sentiment.to_csv("weekly_sentiment.csv", index=False)
monthly_sentiment.to_csv("monthly_sentiment.csv", index=False)
