In [1]:
# Importing the necessary libraries
import os
import pandas as pd
from datetime import datetime as dt, timedelta
from dotenv import load_dotenv
from newsapi import NewsApiClient
from tqdm import tqdm
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import warnings

warnings.filterwarnings("ignore")
load_dotenv()

True

In [2]:
# Defining the time period of articles accessed
today = dt.today().date()

str(today - timedelta(days=2))

'2025-04-17'

In [3]:
# Initialising the news api object
NEWSAPI_API_KEY = os.getenv("NEWSAPI_API_KEY")
newsapi = NewsApiClient(api_key=NEWSAPI_API_KEY)

In [4]:
# Getting all articles in relation to query from everything endpoint
naphtha_news = newsapi.get_everything(
    q='crude oil OR suez',
    from_param=str(today - timedelta(days=1)),
    to=str(today),
    sort_by='relevancy',
    page_size=30,
    language='en',
    page=1
)

naphtha_news

{'status': 'ok',
 'totalResults': 4,
 'articles': [{'source': {'id': None, 'name': 'The Punch'},
   'author': 'Opinion',
   'title': 'Why naira-for-crude must be sustained',
   'description': 'The oil sector in Nigeria has always been the topic of the day. Online. Offline. On the radio. Even in commuter buses. If you want to start a heated conversation anywhere in this country, just mention “fuel”. The reactions will start coming in. The price. The…',
   'url': 'https://punchng.com/why-naira-for-crude-must-be-sustained/',
   'urlToImage': 'https://cdn.punchng.com/wp-content/uploads/2019/10/16132352/500-and-1000-naira-notes.jpg',
   'publishedAt': '2025-04-18T00:37:44Z',
   'content': 'The oil sector in Nigeria has always been the topic of the day. Online. Offline. On the radio. Even in commuter buses. If you want to start a heated conversation anywhere in this country, just mentio… [+4402 chars]'},
  {'source': {'id': None, 'name': 'The Punch'},
   'author': 'Solomon Odeniyi',
   'titl

In [5]:
# Sample response result
naphtha_news['articles'][0]

{'source': {'id': None, 'name': 'The Punch'},
 'author': 'Opinion',
 'title': 'Why naira-for-crude must be sustained',
 'description': 'The oil sector in Nigeria has always been the topic of the day. Online. Offline. On the radio. Even in commuter buses. If you want to start a heated conversation anywhere in this country, just mention “fuel”. The reactions will start coming in. The price. The…',
 'url': 'https://punchng.com/why-naira-for-crude-must-be-sustained/',
 'urlToImage': 'https://cdn.punchng.com/wp-content/uploads/2019/10/16132352/500-and-1000-naira-notes.jpg',
 'publishedAt': '2025-04-18T00:37:44Z',
 'content': 'The oil sector in Nigeria has always been the topic of the day. Online. Offline. On the radio. Even in commuter buses. If you want to start a heated conversation anywhere in this country, just mentio… [+4402 chars]'}

In [10]:
# Converting dictionary results into a DataFrame
news_df = pd.DataFrame(data=[
    {
        "Article_Source": article['source']['name'],
        "Author": article['author'],
        "Title": article['title'],
        "Description": article['description'],
        "URL": article['url'],
        "Publish_Date": article['publishedAt'],
        "Summary": article['content'],
        "Job_Date": today
  }
  for article in naphtha_news['articles']
  ])

news_df.head()

Unnamed: 0,Article_Source,Author,Title,Description,URL,Publish_Date,Summary,Job_Date
0,The Punch,Opinion,Why naira-for-crude must be sustained,The oil sector in Nigeria has always been the ...,https://punchng.com/why-naira-for-crude-must-b...,2025-04-18T00:37:44Z,The oil sector in Nigeria has always been the ...,2025-04-19
1,The Punch,Solomon Odeniyi,Insurgents killed as NAF bombs Sambisa strongh...,The Nigerian Air Force on Thursday announced t...,https://punchng.com/insurgents-killed-as-naf-b...,2025-04-18T01:21:23Z,The Nigerian Air Force on Thursday announced t...,2025-04-19
2,The Punch,Punch Editorial Board,US criticism of Nigeria’s import ban,THE condemnation of Nigeria’s import ban on so...,https://punchng.com/us-criticism-of-nigerias-i...,2025-04-18T00:37:37Z,THE condemnation of Nigerias import ban on som...,2025-04-19
3,Ncspin.com,by Tom Campbell,In search of kindness and civility,85% say civility in society today is much wors...,https://www.ncspin.com/in-search-of-kindness-a...,2025-04-18T00:58:26Z,I was pushing my precious cargo of eggs and co...,2025-04-19


In [11]:
# Cleaning step - need to clean publish date
news_df["Publish_Date"] = pd.to_datetime(news_df["Publish_Date"]).dt.date

# Next, we want to clean the summary column
news_df["Summary"] = news_df["Summary"].str.replace("\r\n", " ").str.extract("^(.*?)\s*\[")

news_df.head()

Unnamed: 0,Article_Source,Author,Title,Description,URL,Publish_Date,Summary,Job_Date
0,The Punch,Opinion,Why naira-for-crude must be sustained,The oil sector in Nigeria has always been the ...,https://punchng.com/why-naira-for-crude-must-b...,2025-04-18,The oil sector in Nigeria has always been the ...,2025-04-19
1,The Punch,Solomon Odeniyi,Insurgents killed as NAF bombs Sambisa strongh...,The Nigerian Air Force on Thursday announced t...,https://punchng.com/insurgents-killed-as-naf-b...,2025-04-18,The Nigerian Air Force on Thursday announced t...,2025-04-19
2,The Punch,Punch Editorial Board,US criticism of Nigeria’s import ban,THE condemnation of Nigeria’s import ban on so...,https://punchng.com/us-criticism-of-nigerias-i...,2025-04-18,THE condemnation of Nigerias import ban on som...,2025-04-19
3,Ncspin.com,by Tom Campbell,In search of kindness and civility,85% say civility in society today is much wors...,https://www.ncspin.com/in-search-of-kindness-a...,2025-04-18,I was pushing my precious cargo of eggs and co...,2025-04-19


In [8]:
# Now to define what we've done as a function
def news_data_extraction(query:str, start_date, end_date) -> pd.DataFrame:
    news_dict = newsapi.get_everything(
        q=query,
        from_param=str(start_date.date()),
        to=str(end_date.date()),
        sort_by='relevancy',
        page_size=30,
        language='en',
        page=1
    )

    news_df = pd.DataFrame(data=[
    {
        "Article_Source": article['source']['name'],
        "Author": article['author'],
        "Title": article['title'],
        "Description": article['description'],
        "URL": article['url'],
        "Publish_Date": article['publishedAt'],
        "Summary": article['content'],
        "Job_Date": dt.today().date()
  }
  for article in news_dict['articles']
  ])
    
    news_df["Publish_Date"] = pd.to_datetime(news_df["Publish_Date"]).dt.date
    news_df["Summary"] = news_df["Summary"].str.replace("\r\n", " ").str.extract("^(.*?)\\s*\\[")

    return news_df

news_test = news_data_extraction(query="crude oil OR suez", start_date=dt.today() - timedelta(days=1), end_date=dt.today())
news_test.head()

Unnamed: 0,Article_Source,Author,Title,Description,URL,Publish_Date,Summary,Job_Date
0,The Punch,Opinion,Why naira-for-crude must be sustained,The oil sector in Nigeria has always been the ...,https://punchng.com/why-naira-for-crude-must-b...,2025-04-18,The oil sector in Nigeria has always been the ...,2025-04-19
1,The Punch,Solomon Odeniyi,Insurgents killed as NAF bombs Sambisa strongh...,The Nigerian Air Force on Thursday announced t...,https://punchng.com/insurgents-killed-as-naf-b...,2025-04-18,The Nigerian Air Force on Thursday announced t...,2025-04-19
2,The Punch,Punch Editorial Board,US criticism of Nigeria’s import ban,THE condemnation of Nigeria’s import ban on so...,https://punchng.com/us-criticism-of-nigerias-i...,2025-04-18,THE condemnation of Nigerias import ban on som...,2025-04-19
3,Ncspin.com,by Tom Campbell,In search of kindness and civility,85% say civility in society today is much wors...,https://www.ncspin.com/in-search-of-kindness-a...,2025-04-18,I was pushing my precious cargo of eggs and co...,2025-04-19


In [None]:
# Lastly, we want to gather all the relevant news article for each day in the last 4 days
dates = pd.date_range(end=dt.today().date(), periods=5)[::-1]
tmp = []

for index in tqdm(range(len(dates) - 1)):
    tmp.append(news_data_extraction(query="crude oil OR suez", start_date=dates[index + 1], end_date=dates[index]))

news_final = pd.concat(tmp, ignore_index=True)
news_final.sort_values(by="Publish_Date", ascending=False, inplace=True)
news_final.reset_index(drop=True, inplace=True)

In [None]:
# Lets add a field called sentiment to understand the sentiment based on the summary field
analyzer = SentimentIntensityAnalyzer()

# Generating the sentiment score
news_final["Sentiment_Score"] = news_final["Summary"].apply(lambda x: analyzer.polarity_scores(x)["compound"])

# Now to classify sentiment score into negative, neutral or positive
news_final["Sentiment"] = news_final["Sentiment_Score"].apply(lambda x: 'negative' if x < 0 else ('positive' if x > 0 else 'neutral'))

In [None]:
news_final.groupby(['Publish_Date', 'Sentiment'])['Sentiment_Score'].count()

In [None]:
news_final.isnull().sum()