## Basic Imports

In [1]:
import pandas as pd
from tqdm import tqdm
import requests
from datetime import datetime, timedelta

In [22]:
def fetch_articles(ticker, start_date_str, api_key, limit=1000):
    url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT'
    news = []

    # Convert start_date_str to datetime object
    start_date = datetime.strptime(start_date_str, '%Y%m%dT%H%M')

    current_date = datetime.now()
    total_days = (current_date - start_date).days

    with tqdm(total=total_days, desc="Fetching Articles") as pbar:
        while True:
            params = dict(
                tickers=ticker,
                time_from=start_date.strftime('%Y%m%dT%H%M'),
                limit=limit,
                sort='EARLIEST',
                apikey=api_key,
            )
            
            try:
                r = requests.get(url, params=params)
                r.raise_for_status()
                data = r.json()
            except requests.exceptions.RequestException as e:
                print(f"Request failed: {e}")
                return None

            if data.get('items') == '0':
                print("No more articles to extract!")
                break
                
            if 'Information' in data or 'Note' in data:
                if 'Information' in data:
                    print(data['Information'])
                if 'Note' in data:
                    print(data['Note'])
                print(f"{len(news)} articles extracted up to {start_date}!")
                break
                
            if 'feed' not in data:
                print("Unexpected response format.")
                return data

            for item in data['feed']:
                news.append([item['time_published'], item['title'], item['summary']])
            
            # Convert to DataFrame
            df = pd.DataFrame(news, columns=['date', 'title', 'summary'])
            
            # Check the last date in the DataFrame and update start_date
            last_date_str = df['date'].iloc[-1]
            last_date = datetime.strptime(last_date_str, '%Y%m%dT%H%M%S')

            # Update progress bar
            days_progress = (last_date - start_date).days
            pbar.update(days_progress)

            # Update start_date to continue fetching
            start_date = last_date + timedelta(minutes=1)
            
            # Ensure the progress bar does not exceed total days
            if start_date >= current_date:
                print("Current date reached!")
                break

    return df


In [None]:
# Parameters
ticker = 'GOOG'
start_date_str = '20240507T0222'
api_key = 'IG4QYJ428THEQF1H'

# Fetch articles and create DataFrame
df = fetch_articles(ticker, start_date_str, api_key)

Fetching Articles:  71%|█████████████████████████████████████████████████████████████▏                        | 42/59 [00:15<00:05,  2.99it/s]

In [31]:
display(df)

Unnamed: 0,date,title,summary
0,20231112T223542,Moomoo Lights up Nasdaq Tower in NY Times Squa...,Moomoo Lights up Nasdaq Tower in NY Times Squa...
1,20231113T010407,Moomoo Lights up Nasdaq Tower in NY Times Squa...,Moomoo Lights up Nasdaq Tower in NY Times Squa...
2,20231113T014804,Google CEO's Diwali Wishes Shine Light On Top ...,Revealing the world's curiosity about one of t...
3,20231113T022453,MrWhoseTheBoss Breaks Down The Iconic Shift In...,Tech YouTuber Arun Maini has dissected the dif...
4,20231113T024548,Campaign Decoded: Harpic's quirky #MomWaliDiwali,What was the thought behind the campaign? Diwa...
...,...,...,...
18833,20240507T012606,Apple working on homegrown chips to run AI too...,Apple Inc. has been developing a homegrown chi...
18834,20240507T012633,Apple working on homegrown chips to run AI too...,Apple Inc. has been developing a homegrown chi...
18835,20240507T015254,Bitcoin ETFs A Social Dud For Lay Folks? Analy...,Spot exchange-traded funds might have excited ...
18836,20240507T015746,YouTube Sensation MrBeast Is Parting Ways With...,"In a surprising turn of events, YouTube sensat..."


In [32]:
df.to_parquet('../data/1-raw/GOOG/goog-news-4.parquet')