In [None]:
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from datetime import date, datetime


def scrape_alarabiya():
    pass

def scrape_rt():
    url = 'https://arabic.rt.com/'

    print('Opening RT Arabic')
    request = Request(url, headers={'User-Agent':'Mozilla/5.0'})

    html_page = urlopen(request).read()

    parsed_page = BeautifulSoup(html_page, 'html.parser')

    website_links = list()

    print('Finding Articles')
    for link in parsed_page.find_all('a'):
        href = link.get('href')
        href = str(href)
        if '%' in href:
            website_links.append(href)
    
    website_links = set(website_links)
    print(f'{len(website_links)} articles found')

    article_slug = 'https://arabic.rt.com'
    article_links = list()

    for link in website_links:
        article_link = article_slug + link
        article_links.append(article_link)
    
    today = date.today()
    today = str(today)
    today = today.split('-')
    today = today[::-1]
    today = '.'.join(today)
    
    print('Reading headlines. This will take a while.')
    headlines = list()
    for link in article_links:
        sub_request = Request(link, headers={'User-Agent':'Mozilla/5.0'})
        try:
            sub_html_page = urlopen(sub_request).read()
        except:
            continue
        sub_parsed_page = BeautifulSoup(sub_html_page, 'html.parser')

        date_spans = sub_parsed_page.find_all('span', {'class': 'date'})
        header_spans = sub_parsed_page.find_all('h1', {'class': 'heading'})

        if len(date_spans) < 1 or len(header_spans) < 1:
            continue

        article_date = date_spans[0].text
        
        if today in article_date:
            article_headline = header_spans[0].text
            headlines.append(
                article_headline
            )
    print(f'Found {len(headlines)} articles from today. Writing them into the rt_headlines.txt file.')

    output_file_name = 'rt_headlines.txt'
    with open(output_file_name, 'w+', encoding='utf-8') as output_file:
        for article_headline in headlines:
            output_file.write(article_headline + '\n')
        
    
def scrape_youm7():
    url = 'https://www.youm7.com/'

    print('Opening Al-Youm Al-Sabeh')
    request = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

    html_page = urlopen(request).read()

    parsed_page = BeautifulSoup(html_page, 'html.parser')

    today = date.today()
    today = str(today)
    today = today.split('-')
    if today[1][0] == '0':
        month = today[1].replace('0', '')
        today = today[0] + '/' + month + '/' + today[2]
    else:
        today = today[0] + '/' + today[1] + '/' + today[2]

    print('Finding Headlines')
    headlines = list()

    for link in parsed_page.find_all('a'):
        str_link = link.get('href')
        if today in str_link:
            headline = str_link.split('/')

            headline = headline[-2]

            headline = headline.replace('-', ' ')

            headlines.append(headline)
    
    headlines = set(headlines)

    print(f'Found {len(headlines)} articles from today. Writing them into the youm7_headlines.txt file.')

    output_file_name = 'youm7_headlines.txt'
    with open(output_file_name, 'w+', encoding='utf-8') as output_file:
        for article_headline in headlines:
            output_file.write(article_headline + '\n')


def scrape_alhurra():
    url = 'https://www.alhurra.com/'

    print('Opening Al-Youm Al-Hurra')
    request = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

    html_page = urlopen(request).read()

    parsed_page = BeautifulSoup(html_page, 'html.parser')

    today = date.today()
    today = str(today)
    today = today.replace('-', '/')

    print('Finding Headlines')
    headlines = list()

    for link in parsed_page.find_all('a'):
        str_link = link.get('href')
        if today in str_link:
            headline = link.findNext('span').text

            headlines.append(headline)
    
    headlines = set(headlines)

    print(f'Found {len(headlines)} articles from today. Writing them into the alhurra_headlines.txt file.')

    output_file_name = 'alhurra_headlines.txt'
    with open(output_file_name, 'w+', encoding='utf-8') as output_file:
        for article_headline in headlines:
            output_file.write(article_headline + '\n')


def menu():
    menu_prompt_welcome = 'Welcome to Scraper'
    menu_prompt_options = 'The following websites are available to scrape: Al-Arabiya, RT Arabic, Sky News Arabic, Al-Youm Al-Sabbah, and Al-Hurra\
        \nType in a number from the list below:\n1 -> Scrape All\
        \n2 -> Scrape Al-Arabiya\
        \n3 -> Scrape RT Arabic\
        \n4 -> Scrape Sky News Arabic\
        \n5 -> Scrape Al-Youm Al-Sabbah\
        \n6 -> Scrape Al-Hurra'
    
    print(menu_prompt_welcome)
    print(menu_prompt_options)


def run():
    user_input_prompt = 'Enter a number and hit the Enter key: '

    while True:
        menu()

        success = False

        while not success:
            user_input = input(user_input_prompt)
            if user_input == '1':
                scrape_all()
                print('Successfully Done!')
            elif user_input == '2':
                scrape_alarabiya()
                print('Successfully Done!')
            elif user_input == '3':
                scrape_rt()
                print('Successfully Done!')
            elif user_input == '4':
                scrape_youm7()
                print('Successfully Done!')
            elif user_input == '5':
                scrape_alhurra()
                print('Successfully Done!')
            else:
                print('Input incorrect....')


def scrape_all():
    scrape_alarabiya()
    scrape_rt()
    scrape_youm7()
    scrape_alhurra()


if __name__ == '__main__':
    run()

Welcome to Scraper
The following websites are available to scrape: Al-Arabiya, RT Arabic, Sky News Arabic, Al-Youm Al-Sabbah, and Al-Hurra        
Type in a number from the list below:
1 -> Scrape All        
2 -> Scrape Al-Arabiya        
3 -> Scrape RT Arabic        
4 -> Scrape Sky News Arabic        
5 -> Scrape Al-Youm Al-Sabbah        
6 -> Scrape Al-Hurra


Enter a number and hit the Enter key:  1


Opening RT Arabic
Finding Articles
121 articles found
Reading headlines. This will take a while.
Found 44 articles from today. Writing them into the rt_headlines.txt file.
Opening Al-Youm Al-Sabeh
Finding Headlines
Found 123 articles from today. Writing them into the youm7_headlines.txt file.
Opening Al-Youm Al-Hurra
Finding Headlines
Found 14 articles from today. Writing them into the alhurra_headlines.txt file.
Successfully Done!
