In [1]:
import pandas as pd
from datetime import datetime, timezone
import os
import requests
import time
import random
from urllib.parse import quote

In [2]:
ticker_to_currency = {
    'SGD=X': 'Singapore Dollar',
    'SGDMYR=X': 'Singapore Dollar to Malaysian Ringgit',
    'GBPSGD=X': 'British Pound to Singapore Dollar',
    'EURSGD=X': 'Euro to Singapore Dollar',
    'SGDJPY=X': 'Singapore Dollar to Japanese Yen',
    'SGDHKD=X': 'Singapore Dollar to Hong Kong Dollar',
    'SGDIDR=X': 'Singapore Dollar to Indonesian Rupiah',
    'SGDCNY=X': 'Singapore Dollar to Chinese Yuan',
    'SGDTHB=X': 'Singapore Dollar to Thai Baht',
    'SGDINR=X': 'Singapore Dollar to Indian Rupee',
    'SGDKRW=X': 'Singapore Dollar to South Korean Won',
    'AUDSGD=X': 'Australian Dollar to Singapore Dollar',
    'NZDSGD=X': 'New Zealand Dollar to Singapore Dollar',
    'GBPUSD=X': 'British Pound to US Dollar',
    'JPY=X': 'Japanese Yen',
    'HKD=X': 'Hong Kong Dollar',
    'MYR=X': 'Malaysian Ringgit',
    'INR=X': 'Indian Rupee',
    'CNY=X': 'Chinese Yuan',
    'PHP=X': 'Philippine Peso',
    'IDR=X': 'Indonesian Rupiah',
    'THB=X': 'Thai Baht',
    'CHF=X': 'Swiss Franc',
    'MXN=X': 'Mexican Peso',
    'AUDUSD=X': 'Australian Dollar to US Dollar',
    'NZDUSD=X': 'New Zealand Dollar to US Dollar',
    'KRW=X': 'South Korean Won',
    'VND=X': 'Vietnamese Dong',
    'CAD=X': 'Canadian Dollar',
    'EURJPY=X': 'Euro to Japanese Yen',
    'GBPJPY=X': 'British Pound to Japanese Yen',
    'EURGBP=X': 'Euro to British Pound',
    'EURSEK=X': 'Euro to Swedish Krona',
    'EURCHF=X': 'Euro to Swiss Franc',
    'EURHUF=X': 'Euro to Hungarian Forint',
    # Add other tickers for rice_exporter_currencies as needed
    'CNYUSD=X': 'Chinese Yuan to US Dollar',
    'CNYKRW=X': 'Chinese Yuan to South Korean Won',
    'CNYJPY=X': 'Chinese Yuan to Japanese Yen',
    'EGP=X': 'Egyptian Pound',
    'TRY=X': 'Turkish Lira',
    'PGK=X': 'Papua New Guinean Kina',
    'IRR=X': 'Iranian Rial',
    'SAR=X': 'Saudi Riyal',
    'XAF=X': 'Central African CFA Franc',
    'BDT=X': 'Bangladeshi Taka',
    'INRAED=X': 'Indian Rupee to UAE Dirham',
    'NGR=X': 'Nigerian Naira',
    'NPR=X': 'Nepalese Rupee',
    'IDRPHP=X': 'Indonesian Rupiah to Philippine Peso',
    'IDRMYR=X': 'Indonesian Rupiah to Malaysian Ringgit',
    'IDRSGD=X': 'Indonesian Rupiah to Singapore Dollar',
    'MMK=X': 'Myanmar Kyat',
    'VNDPHP=X': 'Vietnamese Dong to Philippine Peso',
    'CNYVND=X': 'Chinese Yuan to Vietnamese Dong',
    'XOF=X': 'West African CFA Franc',
    'GHS=X': 'Ghanaian Cedi',
    'VNDMYR=X': 'Vietnamese Dong to Malaysian Ringgit',
    'IQD=X': 'Iraqi Dinar',
    'THBCNY=X': 'Thai Baht to Chinese Yuan',
    'THBZAR=X': 'Thai Baht to South African Rand',
    'THBHKD=X': 'Thai Baht to Hong Kong Dollar',
    'CNYMMK=X': 'Chinese Yuan to Myanmar Kyat',
    'EURMMK=X': 'Euro to Myanmar Kyat',
    'KHR=X': 'Cambodian Riel',
    'EUR=X': 'Euro',
    'MYR=X': 'Malaysian Ringgit',
    'PKR=X': 'Pakistani Rupee',
    'MYRPKR=X': 'Malaysian Ringgit to Pakistani Rupee',
    'KES=X': 'Kenyan Shilling',
    'PKRAED=X': 'Pakistani Rupee to UAE Dirham',
}

In [3]:
def get_ticker(currency_name, ticker_to_currency):
    for ticker, currency in ticker_to_currency.items():
        if currency_name in currency:
            return ticker
    return None

In [4]:
def convert_datetime(pub_date_str):
    # Step 1: Parse the datetime string
    pub_date = datetime.strptime(pub_date_str, "%Y-%m-%dT%H:%M:%SZ")

    # Step 2: Convert to UTC (if not already in UTC)
    pub_date_utc = pub_date.replace(tzinfo=timezone.utc)

    # Step 3: Format to only include the date
    pub_date_utc_only_date = datetime.strftime(pub_date_utc, "%Y-%m-%d")
    return pub_date_utc_only_date

In [5]:
def fetch_guardian_news_for_currencies(subjects, start_date, end_date, api_key, ticker_to_currency):
    news_data = []
    titles_set = set()  # Set to keep track of titles
    for subject in subjects:
        base_url = "https://content.guardianapis.com/search?"
        page = 1
        page_size = 1
        start_date = pd.to_datetime(start_date).strftime("%Y-%m-%d")
        end_date = pd.to_datetime(end_date).strftime("%Y-%m-%d")
        while page <= page_size:
            time.sleep(random.uniform(1, 3))
            response = requests.get(base_url + f'page={page}&q={quote(subject)}&from-date={start_date}&api-key={api_key}&order-by=oldest')
            if response.status_code != 200:
                print(f"Error: {response.status_code}")
                break  # Exit if there's an error
        
            data = response.json()
            
            # Check if there are no results
            if not data['response']['results']:
                print("No more results found, exiting loop.")
                break  # Break out of the loop if no results are found
            
            page_size = data['response']['pages']
            print(f"Page size: {page_size}")
            
            for article in data['response']['results']:
                title = article['webTitle']
                pub_date = article['webPublicationDate']
                pub_date = convert_datetime(pub_date)
                if start_date <= pub_date <= end_date:
                    print(pub_date)
                    if title not in titles_set:
                        news_data.append({
                            'title': title,
                            'published_date': pub_date
                        })
                        titles_set.add(title)  # Add title to the set
                    else:
                        continue
                else:
                    page_size = page
                    print('Breaking out of loop for this subject...')
                    break  # Exit the loop if the article is outside the date range

            page += 1
    
    # Sort and save only if there's news data
    if news_data:
        sorted_data = sorted(news_data, key=lambda x: x['published_date'])
        df = pd.DataFrame(sorted_data)
        
        # Save DataFrame to a CSV file
        ticker = get_ticker(subjects[0], ticker_to_currency)
        csv_filename = f'../data/news/{ticker}.csv'
        df.to_csv(csv_filename, index=False)
        print(f"Data saved to {csv_filename}")
    else:
        print("No news data to save.")

    return news_data


In [6]:
# insert api key
api_key = ''
start_date = '2014-01-03 00:00:00+00:00'
end_date = '2021-06-29 00:00:00+00:00'
# change ticker accordingly based on which currency to retrieve news for
ticker = ticker_to_currency['EURSGD=X']
fetch_guardian_news_for_currencies([ticker], start_date, end_date, api_key, ticker_to_currency)

Page size: 3627
2014-01-03
2014-01-03
2014-01-03
2014-01-03
2014-01-04
2014-01-04
2014-01-05
2014-01-05
2014-01-05
2014-01-06
Page size: 3627
2014-01-06
2014-01-06
2014-01-06
2014-01-06
2014-01-06
2014-01-07
2014-01-07
2014-01-07
2014-01-07
2014-01-07
Page size: 3627
2014-01-07
2014-01-07
2014-01-08
2014-01-08
2014-01-08
2014-01-08
2014-01-09
2014-01-09
2014-01-09
2014-01-09
Page size: 3627
2014-01-09
2014-01-09
2014-01-09
2014-01-09
2014-01-09
2014-01-09
2014-01-09
2014-01-09
2014-01-10
2014-01-10
Page size: 3627
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
2014-01-10
Page size: 3627
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-12
2014-01-13
Page size: 3627
2014-01-13
2014-01-13
2014-01-13
2014-01-13
2014-01-13
2014-01-13
2014-01-13
2014-01-14
2014-01-14
2014-01-14
Page size: 3627
2014-01-14
2014-01-14
2014-01-14
2014-01-14
2014-01-14
2014-01-14
2014-01-14
2014-01-14
2014-01-14
201

[{'title': 'Hery Rajaonarimampianina and the global leaders with the longest names',
  'published_date': '2014-01-03'},
 {'title': 'The US should encourage Arabic language students, not criminalise them | Anna Lekas Miller',
  'published_date': '2014-01-03'},
 {'title': 'Activate Singapore TV spot', 'published_date': '2014-01-03'},
 {'title': 'Holiday hotspots: where to go in 2014',
  'published_date': '2014-01-03'},
 {'title': "Dad's tattoos are a map of his life",
  'published_date': '2014-01-04'},
 {'title': "José Mourinho urges English coaches to follow Steve McClaren's example",
  'published_date': '2014-01-04'},
 {'title': "A lost year for new technology? Look beyond 2013's gadgets",
  'published_date': '2014-01-05'},
 {'title': 'A-League: what we learned in round 13',
  'published_date': '2014-01-05'},
 {'title': "LearnVest CEO: 'Financial literacy is a fundamental human right'",
  'published_date': '2014-01-05'},
 {'title': 'Bill Gates preaches the aid gospel, but is he just a 