In [14]:
from dotenv import load_dotenv


In [17]:
from datetime import datetime, timedelta
import requests
import os
import time

def fetch_news(api_key, ticker, start_date, end_date):
    base_url = os.environ.get("endpointnewsp")
    headers = {"Authorization": f"Bearer {api_key}"}
    all_news = []
    
    current_date = start_date

    while current_date <= end_date:
        batch_end_date = current_date + timedelta(days=50)
        if batch_end_date > end_date:
            batch_end_date = end_date

        params = {
            "ticker": ticker,
            "published_utc.gte": current_date.strftime('%Y-%m-%d'),
            "published_utc.lte": batch_end_date.strftime('%Y-%m-%d'),
            "limit": 50,
            "sort": "published_utc"
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                articles = data.get('results', [])
                all_news.extend(articles)
                print(f"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}")
                current_date = batch_end_date + timedelta(days=1)
            elif response.status_code == 429:
                print("Rate limit reached. Waiting to retry...")
                time.sleep(60)  # Wait for 60 seconds or as recommended by the API
                continue  # Retry the current request
            else:
                print(f"Failed to fetch data: {response.status_code}, {response.text}")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_news

# Example usage
api_key = os.environ.get('newsp_api')
ticker = 'TSLA'
start_date = datetime(2022, 4, 1)  # Adjust this to your last known good start date
end_date = datetime(2024, 4, 1)
news_articles = fetch_news(api_key, ticker, start_date, end_date)
print(f"Total articles fetched: {len(news_articles)}")


Fetched 50 articles from 2022-04-01 to 2022-05-21
Fetched 50 articles from 2022-05-22 to 2022-07-11
Fetched 50 articles from 2022-07-12 to 2022-08-31
Fetched 50 articles from 2022-09-01 to 2022-10-21
Fetched 50 articles from 2022-10-22 to 2022-12-11
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2022-12-12 to 2023-01-31
Fetched 50 articles from 2023-02-01 to 2023-03-23
Fetched 50 articles from 2023-03-24 to 2023-05-13
Fetched 50 articles from 2023-05-14 to 2023-07-03
Fetched 50 articles from 2023-07-04 to 2023-08-23
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2023-08-24 to 2023-10-13
Fetched 50 articles from 2023-10-14 to 2023-12-03
Fetched 50 articles from 2023-12-04 to 2024-01-23
Fetched 50 articles from 2024-01-24 to 2024-03-14
Fetched 50 articles from 2024-03-15 to 2024-04-01
Total articles fetched: 750


In [24]:
news_articles

[{'id': 'rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ',
  'publisher': {'name': 'Benzinga',
   'homepage_url': 'https://www.benzinga.com/',
   'logo_url': 'https://s3.polygon.io/public/assets/news/logos/benzinga.svg',
   'favicon_url': 'https://s3.polygon.io/public/assets/news/favicons/benzinga.ico'},
  'title': "Elon Musk Says Twitter Refusing To Cough Up Spam Numbers: They're Being 'Very Suspicious'",
  'author': 'Bibhu Pattnaik',
  'published_utc': '2022-05-21T19:39:28Z',
  'article_url': 'https://www.benzinga.com/news/22/05/27337474/elon-musk-and-his-tryst-with-the-twitter-spam-the-saga-goes-on',
  'tickers': ['TSLA', 'TWTR'],
  'amp_url': 'https://www.benzinga.com/amp/content/27337474',
  'image_url': 'https://cdn.benzinga.com/files/images/story/2022/05/21/musk_twitter_bots.jpg?width=1200&height=800&fit=crop',
  'description': 'Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems to be tacitly delaying the process of moving forward\xa0with\xa0his $44 billion offer for Twitter Inc (NYSE: TW

In [30]:
import pandas as pd 

# Assuming news_articles is a list of dictionaries
df = pd.DataFrame(news_articles)




In [34]:
df.head()


Unnamed: 0,id,publisher,title,author,published_utc,article_url,tickers,amp_url,image_url,description,keywords
0,rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Says Twitter Refusing To Cough Up Sp...,Bibhu Pattnaik,2022-05-21T19:39:28Z,https://www.benzinga.com/news/22/05/27337474/e...,"[TSLA, TWTR]",https://www.benzinga.com/amp/content/27337474,https://cdn.benzinga.com/files/images/story/20...,Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t...,"[News, Top Stories, Tech]"
1,eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Twitter Stock Fell This Week,newsfeedback@fool.com (Daniel Sparks),2022-05-21T12:16:04Z,https://www.fool.com/investing/2022/05/21/why-...,"[TWTR, TSLA]",,https://g.foolcdn.com/editorial/images/681359/...,Is the company's $44 billion deal at risk of n...,[investing]
2,LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4,"{'name': 'Seeking Alpha', 'homepage_url': 'htt...",S&P 500 Earnings Update: Earnings Yield Above ...,"Brian Gilmartin, CFA",2022-05-21T04:00:00Z,https://seekingalpha.com/article/4513558-sp500...,"[WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,...",https://seekingalpha.com/amp/article/4513558-s...,https://static.seekingalpha.com/cdn/s3/uploads...,The S&P 500 earnings yield is 6.03% vs last we...,
3,PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Tesla Stock Was Slammed on Friday,newsfeedback@fool.com (Daniel Sparks),2022-05-20T22:56:26Z,https://www.fool.com/investing/2022/05/20/why-...,"[TSLA, TWTR]",,https://g.foolcdn.com/editorial/images/681336/...,The electric-car maker's stock has been gettin...,[investing]
4,nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE,"{'name': 'Benzinga', 'homepage_url': 'https://...",S&P 500 Falls For 7th Consecutive Week Amid On...,Henry Khederian,2022-05-20T20:17:20Z,https://www.benzinga.com/news/22/05/27332539/s...,"[DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ]",https://www.benzinga.com/amp/content/27332539,https://cdn.benzinga.com/files/images/story/20...,U.S. indices rebounded off session lows Friday...,"[News, After-Hours Center, Markets, Movers, Tr..."


In [38]:
df.isnull().sum()

id                 0
publisher          0
title              0
author             0
published_utc      0
article_url        0
tickers            0
amp_url          251
image_url          0
description       38
keywords         308
dtype: int64

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             750 non-null    object
 1   publisher      750 non-null    object
 2   title          750 non-null    object
 3   author         750 non-null    object
 4   published_utc  750 non-null    object
 5   article_url    750 non-null    object
 6   tickers        750 non-null    object
 7   amp_url        499 non-null    object
 8   image_url      750 non-null    object
 9   description    712 non-null    object
 10  keywords       442 non-null    object
dtypes: object(11)
memory usage: 64.6+ KB


In [41]:
# Drop rows where the description is NaN
df = df.dropna(subset=['description'])

# Fill missing 'amp_url' with a specific placeholder
df['amp_url'] = df['amp_url'].fillna('No URL provided')  # placeholder

# Fill missing 'keywords' with a specific placeholder
df['keywords'] = df['keywords'].fillna('No keywords')  # placeholder



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['amp_url'] = df['amp_url'].fillna('No URL provided')  # placeholder
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['keywords'] = df['keywords'].fillna('No keywords')  # placeholder


In [42]:
df.isnull().sum()

id               0
publisher        0
title            0
author           0
published_utc    0
article_url      0
tickers          0
amp_url          0
image_url        0
description      0
keywords         0
dtype: int64

In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 712 entries, 0 to 749
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             712 non-null    object
 1   publisher      712 non-null    object
 2   title          712 non-null    object
 3   author         712 non-null    object
 4   published_utc  712 non-null    object
 5   article_url    712 non-null    object
 6   tickers        712 non-null    object
 7   amp_url        712 non-null    object
 8   image_url      712 non-null    object
 9   description    712 non-null    object
 10  keywords       712 non-null    object
dtypes: object(11)
memory usage: 66.8+ KB


In [44]:
from textblob import TextBlob

# calculate sentiment
df['sentiment'] = df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)

df[['title', 'sentiment']].head()  # Display titles with their sentiment scores


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)


Unnamed: 0,title,sentiment
0,Elon Musk Says Twitter Refusing To Cough Up Sp...,-0.064242
1,Why Twitter Stock Fell This Week,0.0
2,S&P 500 Earnings Update: Earnings Yield Above ...,0.0
3,Why Tesla Stock Was Slammed on Friday,0.1875
4,S&P 500 Falls For 7th Consecutive Week Amid On...,0.096032


In [45]:
df 

Unnamed: 0,id,publisher,title,author,published_utc,article_url,tickers,amp_url,image_url,description,keywords,sentiment
0,rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Says Twitter Refusing To Cough Up Sp...,Bibhu Pattnaik,2022-05-21T19:39:28Z,https://www.benzinga.com/news/22/05/27337474/e...,"[TSLA, TWTR]",https://www.benzinga.com/amp/content/27337474,https://cdn.benzinga.com/files/images/story/20...,Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t...,"[News, Top Stories, Tech]",-0.064242
1,eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Twitter Stock Fell This Week,newsfeedback@fool.com (Daniel Sparks),2022-05-21T12:16:04Z,https://www.fool.com/investing/2022/05/21/why-...,"[TWTR, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/681359/...,Is the company's $44 billion deal at risk of n...,[investing],0.000000
2,LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4,"{'name': 'Seeking Alpha', 'homepage_url': 'htt...",S&P 500 Earnings Update: Earnings Yield Above ...,"Brian Gilmartin, CFA",2022-05-21T04:00:00Z,https://seekingalpha.com/article/4513558-sp500...,"[WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,...",https://seekingalpha.com/amp/article/4513558-s...,https://static.seekingalpha.com/cdn/s3/uploads...,The S&P 500 earnings yield is 6.03% vs last we...,No keywords,0.000000
3,PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Tesla Stock Was Slammed on Friday,newsfeedback@fool.com (Daniel Sparks),2022-05-20T22:56:26Z,https://www.fool.com/investing/2022/05/20/why-...,"[TSLA, TWTR]",No URL provided,https://g.foolcdn.com/editorial/images/681336/...,The electric-car maker's stock has been gettin...,[investing],0.187500
4,nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE,"{'name': 'Benzinga', 'homepage_url': 'https://...",S&P 500 Falls For 7th Consecutive Week Amid On...,Henry Khederian,2022-05-20T20:17:20Z,https://www.benzinga.com/news/22/05/27332539/s...,"[DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ]",https://www.benzinga.com/amp/content/27332539,https://cdn.benzinga.com/files/images/story/20...,U.S. indices rebounded off session lows Friday...,"[News, After-Hours Center, Markets, Movers, Tr...",0.096032
...,...,...,...,...,...,...,...,...,...,...,...,...
745,UubTuww0IbuYZIBCvNK2f_cK_xfOImnbHahPNHEPSVY,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: I Think This Stock Should Replac...,newsfeedback@fool.com (Ryan Vanzo),2024-03-27T09:50:00Z,https://www.fool.com/investing/2024/03/27/forg...,"[V, TSLA, META, GOOGL, NVDA, GOOG]",No URL provided,https://g.foolcdn.com/editorial/images/769403/...,Tesla stock has lost 30% of its value in three...,[investing],1.000000
746,QzI8mX8__zTvRWgwqgSYPjAD49Wi5sqsn1-5gYQoNXU,"{'name': 'The Motley Fool', 'homepage_url': 'h...",This Fantastic Stock Has Outperformed Tesla in...,newsfeedback@fool.com (Neil Rozenbaum),2024-03-27T09:30:00Z,https://www.fool.com/investing/2024/03/27/this...,"[LULU, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/770691/...,Lululemon stock has crashed 20% since reportin...,[investing],0.000000
747,aNETwfELk3fnJHMejDxAXfpu931S-zaC93cs-pD7cS0,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: 1 Unstoppable Artificial Intelli...,newsfeedback@fool.com (Anthony Di Pizio),2024-03-27T08:27:00Z,https://www.fool.com/investing/2024/03/27/forg...,"[ORCL, TSLA, META, MSFT, GOOGL, AAPL, AMZN, NV...",No URL provided,https://g.foolcdn.com/editorial/images/770375/...,"Tesla stock is down 31% already in 2024, jeopa...",[investing],0.172222
748,gPdvI5l7YFrkcBSZSK_ZgKkYlay8UIuDpFlOroyG7d4,"{'name': 'Benzinga', 'homepage_url': 'https://...","Investor Sentiment Falls Further, S&P 500 Fall...",Avi Kapoor,2024-03-27T08:00:27Z,https://www.benzinga.com/news/earnings/24/03/3...,"[CTAS, STX, CCL, TSLA, RH]",https://www.benzinga.com/amp/content/37951150,https://cdn.benzinga.com/files/images/story/20...,The CNN Money Fear and Greed index showed a fu...,"[News, Earnings, Pre-Market Outlook, Markets, ...",0.157812


In [46]:
df['published_utc'] = pd.to_datetime(df['published_utc'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['published_utc'] = pd.to_datetime(df['published_utc'])


In [49]:
# Create a new column for just the date
df['date'] = df['published_utc'].dt.date

# Create a new column for just the time
df['time'] = df['published_utc'].dt.time


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = df['published_utc'].dt.date
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time'] = df['published_utc'].dt.time


In [51]:
df.drop('published_utc', axis=1, inplace=True)
df.drop('published_time', axis=1, inplace=True)
df.drop('published_date', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('published_utc', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('published_time', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('published_date', axis=1, inplace=True)


In [52]:
df

Unnamed: 0,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,date,time
0,rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Says Twitter Refusing To Cough Up Sp...,Bibhu Pattnaik,https://www.benzinga.com/news/22/05/27337474/e...,"[TSLA, TWTR]",https://www.benzinga.com/amp/content/27337474,https://cdn.benzinga.com/files/images/story/20...,Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t...,"[News, Top Stories, Tech]",-0.064242,2022-05-21,19:39:28
1,eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Twitter Stock Fell This Week,newsfeedback@fool.com (Daniel Sparks),https://www.fool.com/investing/2022/05/21/why-...,"[TWTR, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/681359/...,Is the company's $44 billion deal at risk of n...,[investing],0.000000,2022-05-21,12:16:04
2,LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4,"{'name': 'Seeking Alpha', 'homepage_url': 'htt...",S&P 500 Earnings Update: Earnings Yield Above ...,"Brian Gilmartin, CFA",https://seekingalpha.com/article/4513558-sp500...,"[WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,...",https://seekingalpha.com/amp/article/4513558-s...,https://static.seekingalpha.com/cdn/s3/uploads...,The S&P 500 earnings yield is 6.03% vs last we...,No keywords,0.000000,2022-05-21,04:00:00
3,PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Tesla Stock Was Slammed on Friday,newsfeedback@fool.com (Daniel Sparks),https://www.fool.com/investing/2022/05/20/why-...,"[TSLA, TWTR]",No URL provided,https://g.foolcdn.com/editorial/images/681336/...,The electric-car maker's stock has been gettin...,[investing],0.187500,2022-05-20,22:56:26
4,nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE,"{'name': 'Benzinga', 'homepage_url': 'https://...",S&P 500 Falls For 7th Consecutive Week Amid On...,Henry Khederian,https://www.benzinga.com/news/22/05/27332539/s...,"[DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ]",https://www.benzinga.com/amp/content/27332539,https://cdn.benzinga.com/files/images/story/20...,U.S. indices rebounded off session lows Friday...,"[News, After-Hours Center, Markets, Movers, Tr...",0.096032,2022-05-20,20:17:20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,UubTuww0IbuYZIBCvNK2f_cK_xfOImnbHahPNHEPSVY,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: I Think This Stock Should Replac...,newsfeedback@fool.com (Ryan Vanzo),https://www.fool.com/investing/2024/03/27/forg...,"[V, TSLA, META, GOOGL, NVDA, GOOG]",No URL provided,https://g.foolcdn.com/editorial/images/769403/...,Tesla stock has lost 30% of its value in three...,[investing],1.000000,2024-03-27,09:50:00
746,QzI8mX8__zTvRWgwqgSYPjAD49Wi5sqsn1-5gYQoNXU,"{'name': 'The Motley Fool', 'homepage_url': 'h...",This Fantastic Stock Has Outperformed Tesla in...,newsfeedback@fool.com (Neil Rozenbaum),https://www.fool.com/investing/2024/03/27/this...,"[LULU, TSLA]",No URL provided,https://g.foolcdn.com/editorial/images/770691/...,Lululemon stock has crashed 20% since reportin...,[investing],0.000000,2024-03-27,09:30:00
747,aNETwfELk3fnJHMejDxAXfpu931S-zaC93cs-pD7cS0,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Forget Tesla: 1 Unstoppable Artificial Intelli...,newsfeedback@fool.com (Anthony Di Pizio),https://www.fool.com/investing/2024/03/27/forg...,"[ORCL, TSLA, META, MSFT, GOOGL, AAPL, AMZN, NV...",No URL provided,https://g.foolcdn.com/editorial/images/770375/...,"Tesla stock is down 31% already in 2024, jeopa...",[investing],0.172222,2024-03-27,08:27:00
748,gPdvI5l7YFrkcBSZSK_ZgKkYlay8UIuDpFlOroyG7d4,"{'name': 'Benzinga', 'homepage_url': 'https://...","Investor Sentiment Falls Further, S&P 500 Fall...",Avi Kapoor,https://www.benzinga.com/news/earnings/24/03/3...,"[CTAS, STX, CCL, TSLA, RH]",https://www.benzinga.com/amp/content/37951150,https://cdn.benzinga.com/files/images/story/20...,The CNN Money Fear and Greed index showed a fu...,"[News, Earnings, Pre-Market Outlook, Markets, ...",0.157812,2024-03-27,08:00:27
