In [2]:
from dotenv import load_dotenv
from datetime import datetime, timedelta
import requests
import os
import time
import pandas as pd 
from news_preprocessing import *

In [3]:
load_dotenv()

True

In [5]:
def fetch_news(api_key, ticker, start_date, end_date):
    base_url = os.environ.get("endpointnewsp")
    headers = {"Authorization": f"Bearer {api_key}"}
    all_news = []
    
    end_date = datetime.now() - timedelta(days=1)  # Yesterday's date
    start_date = end_date - timedelta(days=365 * 2)  # Two years back
    current_date = start_date

    while current_date <= end_date:
        batch_end_date = current_date + timedelta(days=50)
        if batch_end_date > end_date:
            batch_end_date = end_date

        params = {
            "ticker": ticker,
            "published_utc.gte": current_date.strftime('%Y-%m-%d'),
            "published_utc.lte": batch_end_date.strftime('%Y-%m-%d'),
            "limit": 50,
            "sort": "published_utc"
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                articles = data.get('results', [])
                all_news.extend(articles)
                print(f"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}")
                current_date = batch_end_date + timedelta(days=1)
            elif response.status_code == 429:
                print("Rate limit reached. Waiting to retry...")
                time.sleep(60)  # Wait for 60 seconds or as recommended by the API
                continue  # Retry the current request
            else:
                print(f"Failed to fetch data: {response.status_code}, {response.text}")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_news

# Example usage
api_key = os.environ.get('newsp_api')
ticker = 'TSLA'
#start_date = datetime(2022, 4, 1)  # start date
#end_date = datetime(2024, 4, 1)
news_articles = fetch_news(api_key, ticker, start_date, end_date)
print(f"Total articles fetched: {len(news_articles)}")


Fetched 50 articles from 2022-04-01 to 2022-05-21
Fetched 50 articles from 2022-05-22 to 2022-07-11
Fetched 50 articles from 2022-07-12 to 2022-08-31
Fetched 50 articles from 2022-09-01 to 2022-10-21
Fetched 50 articles from 2022-10-22 to 2022-12-11
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2022-12-12 to 2023-01-31
Fetched 50 articles from 2023-02-01 to 2023-03-23
Fetched 50 articles from 2023-03-24 to 2023-05-13
Fetched 50 articles from 2023-05-14 to 2023-07-03
Fetched 50 articles from 2023-07-04 to 2023-08-23
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2023-08-24 to 2023-10-13
Fetched 50 articles from 2023-10-14 to 2023-12-03
Fetched 50 articles from 2023-12-04 to 2024-01-23
Fetched 50 articles from 2024-01-24 to 2024-03-14
Fetched 50 articles from 2024-03-15 to 2024-04-01
Total articles fetched: 750


In [6]:
# Process the news articles
df = process_news_articles(news_articles)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 712 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000711
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         712 non-null    object 
 1   id           712 non-null    object 
 2   publisher    712 non-null    object 
 3   title        712 non-null    object 
 4   author       712 non-null    object 
 5   article_url  712 non-null    object 
 6   tickers      712 non-null    object 
 7   amp_url      712 non-null    object 
 8   image_url    712 non-null    object 
 9   description  712 non-null    object 
 10  keywords     712 non-null    object 
 11  sentiment    712 non-null    float64
 12  time         712 non-null    object 
dtypes: float64(1), object(12)
memory usage: 77.9+ KB


In [8]:
df.head()

Unnamed: 0,date,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,time
1970-01-01 00:00:00.000000000,2022-05-21,rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Says Twitter Refusing To Cough Up Sp...,Bibhu Pattnaik,https://www.benzinga.com/news/22/05/27337474/e...,"[TSLA, TWTR]",https://www.benzinga.com/amp/content/27337474,https://cdn.benzinga.com/files/images/story/20...,Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t...,"[News, Top Stories, Tech]",-0.064242,19:39:28
1970-01-01 00:00:00.000000001,2022-05-21,eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Twitter Stock Fell This Week,newsfeedback@fool.com (Daniel Sparks),https://www.fool.com/investing/2022/05/21/why-...,"[TWTR, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/681359/...,Is the company's $44 billion deal at risk of n...,[investing],0.0,12:16:04
1970-01-01 00:00:00.000000002,2022-05-21,LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4,"{'name': 'Seeking Alpha', 'homepage_url': 'htt...",S&P 500 Earnings Update: Earnings Yield Above ...,"Brian Gilmartin, CFA",https://seekingalpha.com/article/4513558-sp500...,"[WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,...",https://seekingalpha.com/amp/article/4513558-s...,https://static.seekingalpha.com/cdn/s3/uploads...,The S&P 500 earnings yield is 6.03% vs last we...,No keywords,0.0,04:00:00
1970-01-01 00:00:00.000000003,2022-05-20,PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Tesla Stock Was Slammed on Friday,newsfeedback@fool.com (Daniel Sparks),https://www.fool.com/investing/2022/05/20/why-...,"[TSLA, TWTR]",No URL provided,https://g.foolcdn.com/editorial/images/681336/...,The electric-car maker's stock has been gettin...,[investing],0.1875,22:56:26
1970-01-01 00:00:00.000000004,2022-05-20,nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE,"{'name': 'Benzinga', 'homepage_url': 'https://...",S&P 500 Falls For 7th Consecutive Week Amid On...,Henry Khederian,https://www.benzinga.com/news/22/05/27332539/s...,"[DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ]",https://www.benzinga.com/amp/content/27332539,https://cdn.benzinga.com/files/images/story/20...,U.S. indices rebounded off session lows Friday...,"[News, After-Hours Center, Markets, Movers, Tr...",0.096032,20:17:20


In [9]:
df= df.sort_index(ascending=False)

In [10]:
df.to_csv('news_articles.csv', index=False)


In [11]:
df_processed = exponential_moving_average(df, window=7)

In [12]:
df_processed.to_csv('news_articles_ema.csv', index=False)

In [13]:
df_processed.head()

Unnamed: 0,date,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,time,exp_mean_7_days
1970-01-01 00:00:00.000000711,2024-03-27,rD3Jh821u6EVUukLdPuuvheL8Iry8BIAHdPj15YgbJ4,"{'name': 'Benzinga', 'homepage_url': 'https://...",Tesla-CATL 'Power Couple' Can Recharge US EV M...,Anan Ashraf,https://www.benzinga.com/analyst-ratings/analy...,[TSLA],https://www.benzinga.com/amp/content/37950620,https://cdn.benzinga.com/files/images/story/20...,Morgan Stanley analyst and Tesla Inc (NASDAQ:T...,"[News, Analyst Color, Tech]",0.01,03:22:17,0.01
1970-01-01 00:00:00.000000710,2024-03-27,gPdvI5l7YFrkcBSZSK_ZgKkYlay8UIuDpFlOroyG7d4,"{'name': 'Benzinga', 'homepage_url': 'https://...","Investor Sentiment Falls Further, S&P 500 Fall...",Avi Kapoor,https://www.benzinga.com/news/earnings/24/03/3...,"[CTAS, STX, CCL, TSLA, RH]",https://www.benzinga.com/amp/content/37951150,https://cdn.benzinga.com/files/images/story/20...,The CNN Money Fear and Greed index showed a fu...,"[News, Earnings, Pre-Market Outlook, Markets, ...",0.157812,08:00:27,0.046953
1970-01-01 00:00:00.000000709,2024-03-27,aNETwfELk3fnJHMejDxAXfpu931S-zaC93cs-pD7cS0,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: 1 Unstoppable Artificial Intelli...,newsfeedback@fool.com (Anthony Di Pizio),https://www.fool.com/investing/2024/03/27/forg...,"[ORCL, TSLA, META, MSFT, GOOGL, AAPL, AMZN, NV...",No URL provided,https://g.foolcdn.com/editorial/images/770375/...,"Tesla stock is down 31% already in 2024, jeopa...",[investing],0.172222,08:27:00,0.07827
1970-01-01 00:00:00.000000708,2024-03-27,QzI8mX8__zTvRWgwqgSYPjAD49Wi5sqsn1-5gYQoNXU,"{'name': 'The Motley Fool', 'homepage_url': 'h...",This Fantastic Stock Has Outperformed Tesla in...,newsfeedback@fool.com (Neil Rozenbaum),https://www.fool.com/investing/2024/03/27/this...,"[LULU, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/770691/...,Lululemon stock has crashed 20% since reportin...,[investing],0.0,09:30:00,0.058703
1970-01-01 00:00:00.000000707,2024-03-27,UubTuww0IbuYZIBCvNK2f_cK_xfOImnbHahPNHEPSVY,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: I Think This Stock Should Replac...,newsfeedback@fool.com (Ryan Vanzo),https://www.fool.com/investing/2024/03/27/forg...,"[V, TSLA, META, GOOGL, NVDA, GOOG]",No URL provided,https://g.foolcdn.com/editorial/images/769403/...,Tesla stock has lost 30% of its value in three...,[investing],1.0,09:50:00,0.294027
