# Neccessary Import Statements

In [3]:
import os
import time
from datetime import datetime
import grequests as areq
from selenium import webdriver
from bs4 import BeautifulSoup

ModuleNotFoundError: No module named 'grequests'

# Define Neccessary Variables

In [None]:
entities_to_search = ["First Abu Dhabi Bank", 
                      "NBAD", 
                      "FAB", 
                      "Bank of Abu Dhabi First", 
                      "بنك أبوظبي الأول", 
                      "Abu Dhabi First", 
                      "و أ ب"]

# Crawlers

### Modularized Crawlers For Each Outlet

#### Reuters

In [None]:
def reuters_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Reuters search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1. https://stackoverflow.com/questions/20986631/how-can-i-scroll-a-web-page-using-selenium-webdriver-in-python
    2. https://pypi.org/project/beautifulsoup4/
    3. https://github.com/spyoungtech/grequests
    4. https://realpython.com/modern-web-automation-with-python-and-selenium/
    5. https://stackoverflow.com/questions/11205386/python-beautifulsoup-get-an-attribute-value-based-on-the-name-attribute
    6. https://docs.python.org/3.3/library/datetime.html
    7. https://www.educative.io/edpresso/how-to-convert-a-string-to-a-date-in-python
    8. https://www.dataquest.io/blog/web-scraping-tutorial-python/
    """
    # First, collect necccessary parameters that will be used for the rest
    # of the function and verify the inputed data.
    assert isinstance(query_link, str)
    assert "reuters" in query_link
    
    # Now instantiate the driver and use it to navigate to the page. Then,
    # scroll down a few times so that we can have access to more articles.
    driver_to_use = webdriver.Chrome(path_to_driver)
    driver_to_use.get(query_link)
    
    num_times_scroll_down = 33 
        # As of 9/3/20, this is the number of scrolls that results in no
        # more articles being loaded.
    num_seconds_between_scrolls = 0.5
    time.sleep(num_seconds_between_scrolls*3)
    for i in range(num_times_scroll_down):
        # Iterate for the number of times that is specifed above.
        driver_to_use.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);"
        )
        time.sleep(num_seconds_between_scrolls)
        
    # Now, collect all of the article links that make up this page.
    a_elements_list = driver_to_use.find_elements_by_tag_name("a")
    article_a_elements = [
        a for a in a_elements_list \
        if a.get_attribute("class") == \
        "TextLabel__text-label___3oCVw TextLabel__black-to-orange___23uc0 TextLabel__medium___t9PWg MarketStoryItem-headline-2cgfz"
    ]
    article_links_list = [
        art.get_attribute("href") for art in article_a_elements
    ]
    
    # Close the driver since we will now longer be using it.
    driver_to_use.close()
    print(
        "Found {} Reuters articles from this search query.".format(
            len(article_links_list)
        )
    )
    print("Now obtaining data for those articles.")
    
    # Next, send requests to each of those links. Save the desired 
    # information for each article that was published within the last year
    # and does in fact mention the company we're interested in (the
    # aliases of which are listed above).
    page_gets = (areq.get(url) for url in article_links_list)
    page_gets_results = areq.map(page_gets)
    
    today_datetime = datetime.today()
    page_dicts_list = []
    for page in page_gets_results:
        # Iterate over each of the page request results.
        if page.ok:
            # If the response to the article URL was successful.
            page_contents = page.content
            page_soup = BeautifulSoup(page_contents)
            
            # Get and verify published date.
            article_pub_date = page_soup.find(
                "meta", {"name": "analyticsAttributes.articleDate"}
            )["content"]
            article_pub_datetime = datetime.strptime(
                article_pub_date[:10:], "%Y-%m-%d"
            )
            days_since_published = (
                today_datetime - article_pub_datetime
            ).days
            if days_since_published > 365:
                # If the article was published beyond the past year.
                continue
            
            # Get and verify article text (content).
            content_html = page_soup.find(
                "div", class_="StandardArticleBody_body"
            )
            article_content = "".join(
                [p_element.text for p_element in content_html.find_all("p")
            ])
            mentions_an_alias = any([
                alias.lower() in article_content.lower() \
                for alias in entities_to_search
            ])
            if not mentions_an_alias:
                # If the article does NOT mention any of the aliases that
                # the company of interest goes by in its text.
                continue
            
            # Get article title.
            article_title = page_soup.find(
                "meta", property="og:title"
            )["content"]
            
            # Get article URL.
            article_url = page_soup.find(
                "meta", {"name": "analyticsAttributes.canonicalUrl"}
            )["content"]
            
            # Get article author.
            article_author = page_soup.find(
                "meta", {"name": "analyticsAttributes.author"}
            )["content"]
            
            # Validate obtained data.
            assert isinstance(article_title, str)
            assert isinstance(article_url, str)
            assert isinstance(article_author, str)
            assert isinstance(article_pub_date, str)
            assert isinstance(article_content, str)
            
            # Save obtained and verified data.
            page_dict = {"Title": article_title,
                         "Article_URL": article_url,
                         "Author": article_author,
                         "Published_Date": article_pub_date,
                         "Outlet": "reuters",
                         "Content": article_content}
            page_dicts_list.append(page_dict)
    
    # Return the final results.
    to_return = page_dicts_list

    return to_return

In [None]:
hello = reuters_crawler("https://www.reuters.com/companies/FAB.AD/news")[:5:]

In [None]:
example_dict = hello[3]
example_dict

#### Mediain

In [None]:
def mediain_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Mediain search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1. https://stackoverflow.com/questions/56101612/python-requests-http-response-406
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "mediain" in query_link
    
    # Now, instantiate a BeautifulSoup object for the specified link and
    # use it to navigate to the page. Then, compile a list of links for
    # each page of the query.
    query_req_setup = [areq.get(query_link, headers={"User-Agent": "XY"})]
    query_req_obj = areq.map(query_req_setup)[0]
    assert query_req_obj.ok
    query_soup = BeautifulSoup(query_req_obj.content)
    
    number_of_pages = int(query_soup.find("span", class_="pages").text[-2::])
    dot_com_index = query_link.index(".com") + len(".com")
    query_page_links = [
        "{}{}{}".format(query_link[:dot_com_index+1:], 
                        "page/{}".format(i), 
                        query_link[dot_com_index::]) \
        for i in range(1, number_of_pages+1)
    ]
        
    # Next, collect all of the article links that make up these pages.
    search_page_gets = (
        areq.get(url, headers={"User-Agent": "XY"}) for url in query_page_links
    )
    search_page_gets_results = areq.map(search_page_gets)
    assert all([page_get.ok for page_get in search_page_gets_results])
    
    article_links_list = []
    for search_page in search_page_gets_results:
        search_page_soup = BeautifulSoup(search_page.content)
        links_list_search = search_page_soup.find_all(
            "article", class_="item-list"
        )
        page_article_links_list = [
            link_soup.find("a")["href"] for link_soup in links_list_search
        ]
        article_links_list += page_article_links_list
    assert len(article_links_list) <= 10*number_of_pages
    print(
        "Found {} articles from this search query.".format(
            len(article_links_list)
        )
    )
    print("Now obtaining data for those articles.")
    
    # Next, send requests to each of those links. Save the desired 
    # information for each article that was published within the last year
    # and does in fact mention the company we're interested in (the
    # aliases of which are listed above).
    page_gets = (
        areq.get(url, headers={"User-Agent": "XY"}) for url in article_links_list
    )
    page_gets_results = areq.map(page_gets)
    
    today_datetime = datetime.today()
    page_dicts_list = []
    for page in page_gets_results:
        # Iterate over each of the page request results.
        if page.ok:
            # If the response to the article URL was successful.
            page_contents = page.content
            page_soup = BeautifulSoup(page_contents)
            
            # Get and verify published date.
            article_pub_date = page_soup.find(
                "meta", property="article:published_time"
            )["content"]
            article_pub_datetime = datetime.strptime(
                article_pub_date[:10:], "%Y-%m-%d"
            )
            days_since_published = (
                today_datetime - article_pub_datetime
            ).days
            if days_since_published > 365:
                # If the article was published beyond the past year.
                continue
            
            # Get and verify article text (content).
            content_html = page_soup.find(
                "div", class_="entry"
            )
            article_content = "".join(
                [p_element.text for p_element in content_html.find_all("p")
            ])
            mentions_an_alias = any([
                alias.lower() in article_content.lower() \
                for alias in entities_to_search
            ])
            if not mentions_an_alias:
                # If the article does NOT mention any of the aliases that
                # the company of interest goes by in its text.
                continue
            
            # Get article title.
            article_title = page_soup.find(
                "meta", property="og:title"
            )["content"]
            
            # Get article URL.
            article_url = page_soup.find(
                "meta", property="og:url"
            )["content"]
            
            # Get article description
            article_description = page_soup.find(
                "meta", property="og:description"
            )["content"]
            
            # Validate obtained data.
            assert isinstance(article_title, str)
            assert isinstance(article_url, str)
            assert isinstance(article_pub_date, str)
            assert isinstance(article_content, str)
            assert isinstance(article_description, str)
            
            # Save obtained and verified data.
            page_dict = {"Title": article_title,
                         "Article_URL": article_url,
                         "Published_Date": article_pub_date,
                         "Outlet": "mediain",
                         "Description": article_description,
                         "Content": article_content}
            page_dicts_list.append(page_dict)
    
    # Return the final results.
    to_return = page_dicts_list

    return to_return

In [None]:
mediain_crawler(
    "https://mediain.com/?s=%D8%A8%D9%86%D9%83+%D8%A3%D8%A8%D9%88%D8%B8%D8%A8%D9%8A+%D8%A7%D9%84%D8%A3%D9%88%D9%84&lang=en"
)[:5:]

In [None]:
mediain_crawler(
    "https://mediain.com/?s=first+abu+dhabi+bank&lang=en"
)[:3:]

#### Ol

In [None]:
def ol_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Ol search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "ol" in query_link
    
    # Now, instantiate a BeautifulSoup object for the specified link and
    # use it to navigate to the page. Then, compile a list of links for
    # each page of the query.
    query_req_setup = [areq.get(query_link)]
    query_req_obj = areq.map(query_req_setup)[0]
    assert query_req_obj.ok
    query_soup = BeautifulSoup(query_req_obj.content)
    
    num_nav_items = len(
        query_soup.find_all("span", class_="pages-nav-item")
    )
    number_of_pages = num_nav_items if num_nav_items > 0 else num_nav_items + 1
    dot_com_index = query_link.index(".om") + len(".om")
    query_page_links = [
        "{}{}{}".format(query_link[:dot_com_index+1:], 
                        "page/{}".format(i), 
                        query_link[dot_com_index::]) \
        for i in range(1, number_of_pages+1)
    ]
        
    # Next, collect all of the article links that make up these pages.
    search_page_gets = (areq.get(url) for url in query_page_links)
    search_page_gets_results = areq.map(search_page_gets)
    assert all([page_get.ok for page_get in search_page_gets_results])
    
    article_links_list = []
    for search_page in search_page_gets_results:
        search_page_soup = BeautifulSoup(search_page.content)
        links_list_search = search_page_soup.find_all(
            "h2", class_="post-title"
        )
        page_article_links_list = [
            "https://www.ol.om{}".format(link_soup.find("a")["href"]) for link_soup in links_list_search
        ]
        article_links_list += page_article_links_list
    assert len(article_links_list) <= 10*number_of_pages
    print(
        "Found {} articles from this search query.".format(
            len(article_links_list)
        )
    )
    print("Now obtaining data for those articles.")
    
    # Next, send requests to each of those links. Save the desired 
    # information for each article that was published within the last year
    # and does in fact mention the company we're interested in (the
    # aliases of which are listed above).
    page_gets = (areq.get(url) for url in article_links_list)
    page_gets_results = areq.map(page_gets)
    
    today_datetime = datetime.today()
    page_dicts_list = []
    for page in page_gets_results:
        # Iterate over each of the page request results.
        if page.ok:
            # If the response to the article URL was successful.
            page_contents = page.content
            page_soup = BeautifulSoup(page_contents)
            
            # Get and verify published date.
            article_pub_date = page_soup.find(
                "meta", property="article:published_time"
            )["content"]
            article_pub_datetime = datetime.strptime(
                article_pub_date[:10:], "%Y-%m-%d"
            )
            days_since_published = (
                today_datetime - article_pub_datetime
            ).days
            if days_since_published > 365:
                # If the article was published beyond the past year.
                continue
            
            # Get and verify article text (content).
            content_html = page_soup.find(
                "div", class_="entry-content entry clearfix"
            )
            article_content = "".join(
                [p_element.text for p_element in content_html.find_all("p")
            ])
            mentions_an_alias = any([
                alias.lower() in article_content.lower() \
                for alias in entities_to_search
            ])
            if not mentions_an_alias:
                # If the article does NOT mention any of the aliases that
                # the company of interest goes by in its text.
                continue
            
            # Get article title.
            article_title = page_soup.find(
                "meta", property="og:title"
            )["content"]
            
            # Get article URL.
            article_url = page_soup.find(
                "meta", property="og:url"
            )["content"]
            
            # Get article description
            article_description = page_soup.find(
                "meta", property="og:description"
            )["content"]
            
            # Validate obtained data.
            assert isinstance(article_title, str)
            assert isinstance(article_url, str)
            assert isinstance(article_pub_date, str)
            assert isinstance(article_content, str)
            assert isinstance(article_description, str)
            
            # Save obtained and verified data.
            page_dict = {"Title": article_title,
                         "Article_URL": article_url,
                         "Published_Date": article_pub_date,
                         "Outlet": "ol",
                         "Description": article_description,
                         "Content": article_content}
            page_dicts_list.append(page_dict)
    
    # Return the final results.
    to_return = page_dicts_list

    return to_return

In [None]:
ol_crawler("https://www.ol.om/?s=FAB")[2]

In [None]:
ol_crawler("https://www.ol.om/?s=%D8%A8%D9%86%D9%83+%D8%A3%D8%A8%D9%88%D8%B8%D8%A8%D9%8A+%D8%A7%D9%84%D8%A3%D9%88%D9%84")[2]

#### Alarabiya

In [None]:
def alarabiya_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Alarabiya search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1. 
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "alarabiya" in query_link
    
    to_return = None

    return to_return

#### Al

In [None]:
def al_ain_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Al search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "al" in query_link
    
    to_return = None

    return to_return

#### Sa

In [None]:
def sa_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Sa search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "sa" in query_link
    
    to_return = None

    return to_return

#### Mubasher

In [None]:
def mubasher_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Mubasher search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "mubasher" in query_link
    
    to_return = None

    return to_return

#### Alkhaleejonline

In [None]:
def alkhaleejonline_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Alkhaleejonline search feature 
    that mention the company (which can be mentioned by any of its 
    aliases) in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "alkhaleejonline" in query_link
    
    to_return = None

    return to_return

#### Aa

In [None]:
def aa_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Aa search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "aa" in query_link
    
    to_return = None

    return to_return

#### Eremnews

In [None]:
def eremnews_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Eremnews search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1. 
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "eremnews" in query_link
    
    to_return = None

    return to_return

#### Elnashra

In [None]:
def elnashra_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Elnashra search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "elnashra" in query_link
    
    to_return = None

    return to_return

#### Aleqt

In [None]:
def aleqt_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Aleqt search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "aleqt" in query_link
    
    to_return = None

    return to_return

#### Wam

In [None]:
def wam_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Wam search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "wam" in query_link
    
    to_return = None

    return to_return

#### Youm7

In [None]:
def youm7_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Youm7 search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "youm7" in query_link
    
    to_return = None

    return to_return

#### Alittihad

In [None]:
def alittihad_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Alittihad search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "alittihad" in query_link
    
    to_return = None

    return to_return

#### Amwalalghad

In [None]:
def amwalalghad_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Amwalalghad search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "amwalalghad" in query_link
    
    to_return = None

    return to_return

#### Amwal-mag

In [None]:
def amwal_mag_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to provide the user with a function
    that allows them to obtain several (if not all) of the articles that
    are returned from a query done on the Amwal-Mag search feature that 
    mention the company (which can be mentioned by any of its aliases)
    in its text.
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify the URL of the search query
        that they performed for this outlet.
    path_to_driver : str
        This string allows the user to specify where on their local machine
        a Google driver exec object can be found.
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return : list of dict objects
        This is a list of dictionaries where each dictionary corresponds
        to an article that was found in the search query. Each dictionary
        contains the information that was retrieved for each article.
    
    References
    ----------
    1.
    """
    # First, validate the inputted data.
    assert isinstance(query_link, str)
    assert "amwal-mag" in query_link
    
    to_return = None

    return to_return

#### Full crawler function

In [None]:
def search_query_crawler(query_link: str, path_to_driver=driver_path):
    """
    The purpose of this function is to 
    
    Parameters
    ----------
    query_link : str
        This string allows the user to specify
    path_to_driver : str
        This string allows the user to specify
        
        NOTE that its default value is "../Data/chromedriver"
        
    Returns
    -------
    to_return :
    
    References
    ----------
    1.
    """
    to_return = None
    # First, define all of the neccessary parameters that will be needed for the
    # rest of the function.
    allowed_outlets = ["reuters",
                       "mediain",
                       "ol",
                       "alarabiya",
                       "al-ain",
                       "sa",
                       "mubasher",
                       "alkhaleejonline",
                       "aa",
                       "eremnews",
                       "elnashra",
                       "aleqt",
                       "wam",
                       "youm7",
                       "alittihad",
                       "amwalalghad",
                       "amwal-mag"]
    outlet_checker = [outlet in query_link for outlet in allowed_outlets]
    assert sum(outlet_checker) == 1
    outlet_used = allowed_outlets[outlet_checker.index(1)]
    
    # Next define everything that will be done for each outlet.
    if outlet_used == 
    
    return to_return