In [4]:
import requests
import pandas as pd
from datetime import datetime, timedelta

def get_news_from_newsapi(query, date, num_results=100):
    api_key = '45ad16b5f9d8477180e394d2477e0296'  # Replace with your API key
    url = f'https://newsapi.org/v2/everything?q={query}&from={date}&to={date}&pageSize={num_results}&apiKey={api_key}'

    response = requests.get(url)
    
    # Print the status code and response text for debugging
    print(f"Status Code: {response.status_code}")
    # print(f"Response Text: {response.text}")

    if response.status_code == 200:
        news_data = response.json()

        if news_data.get("status") == "ok":
            articles = news_data.get("articles", [])

            news_df = pd.DataFrame([{
                'Title': article['title'],
                'Link': article['url'],
                'Source': article['source']['name'],
                'Snippet': article['description'],
                'Published At': article['publishedAt']  # Extracting the publication date
            } for article in articles])
            
            return news_df
        else:
            print("Error in news data response")
            return pd.DataFrame()
    else:
        print("Error fetching news")
        return pd.DataFrame()

def collect_news_over_date_range(query, from_date, to_date, num_results_per_day=10):
    # Initialize an empty DataFrame to store all the news articles
    all_news_df = pd.DataFrame()

    # Convert the date strings to datetime objects
    start_date = datetime.strptime(from_date, "%Y-%m-%d")
    end_date = datetime.strptime(to_date, "%Y-%m-%d")

    # Loop over each day in the date range
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y-%m-%d")
        print(f"Fetching news for {date_str}")
        
        # Get news for the current date
        news_df = get_news_from_newsapi(query, date_str, num_results=num_results_per_day)
        
        # Append the news DataFrame to the cumulative DataFrame
        all_news_df = pd.concat([all_news_df, news_df], ignore_index=True)
        
        # Move to the next day
        current_date += timedelta(days=1)
    
    return all_news_df

# Example usage
keyword = input("Enter the keyword or topic: ")
from_date = input("Enter the start date (YYYY-MM-DD): ")
to_date = input("Enter the end date (YYYY-MM-DD): ")

news_df = collect_news_over_date_range(keyword, from_date, to_date, num_results_per_day=10)

# Display the DataFrame
print(news_df)



# Optionally, save to a CSV file
if not news_df.empty:
    news_df.to_csv(f'{keyword}_news_{from_date}_to_{to_date}.csv', index=False)
else:
    print("No articles found for the given date range")


Enter the keyword or topic:  GOOG
Enter the start date (YYYY-MM-DD):  2024-07-03
Enter the end date (YYYY-MM-DD):  2024-09-03


Fetching news for 2024-07-03
Status Code: 429
Error fetching news
Fetching news for 2024-07-04
Status Code: 429
Error fetching news
Fetching news for 2024-07-05
Status Code: 429
Error fetching news
Fetching news for 2024-07-06
Status Code: 429
Error fetching news
Fetching news for 2024-07-07
Status Code: 429
Error fetching news
Fetching news for 2024-07-08
Status Code: 429
Error fetching news
Fetching news for 2024-07-09
Status Code: 429
Error fetching news
Fetching news for 2024-07-10
Status Code: 429
Error fetching news
Fetching news for 2024-07-11
Status Code: 429
Error fetching news
Fetching news for 2024-07-12
Status Code: 429
Error fetching news
Fetching news for 2024-07-13
Status Code: 429
Error fetching news
Fetching news for 2024-07-14
Status Code: 429
Error fetching news
Fetching news for 2024-07-15
Status Code: 429
Error fetching news
Fetching news for 2024-07-16
Status Code: 429
Error fetching news
Fetching news for 2024-07-17
Status Code: 429
Error fetching news
Fetching n