# get_state_topic_google_news

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
pd.options.display.max_rows = 100

In [2]:
def get_state_topic_google_news(state, topic):
    """This function takes a US State name (string dtype) and a topic of interest (string dtype). 
    The output is a pandas DataFrame with articles, urls, and publishing times for articles containing the state and topic
    """
   
    url = "https://news.google.com/rss/search?q={}+{}&hl=en-US&gl=US&ceid=US:en".format(state, topic)
    list_of_titles = []
    list_of_article_links = []
    list_of_pubdates = []
    state_id_for_articles = []
    
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'lxml')
    xml = str(list(soup)).split('<item><title>')
    
    for i in range(len(xml[1:-1])):
        list_of_titles.append(xml[i+1].split("</title>")[0])
        list_of_article_links.append(xml[i+1].split("</title><link/>")[1].split("<guid ispermalink")[0])
        list_of_pubdates.append(xml[i+1].split("</guid><pubdate>")[1].split("</pubdate>")[0])
        state_id_for_articles.append(state)
    
    df = pd.DataFrame([list_of_titles, list_of_article_links, list_of_pubdates, state_id_for_articles]).T
    df.columns = ["Title","URL","Published","State"]
    df["Source"] = df["Title"].str.split("-").str[-1]
    return df
    

In [3]:
get_state_topic_google_news("North Dakota","Coronavirus")

Unnamed: 0,Title,URL,Published,State,Source
0,3 more test positive for coronavirus in North ...,https://www.inforum.com/lifestyle/health/50111...,"Mon, 23 Mar 2020 23:00:00 GMT",North Dakota,INFORUM
1,"North Dakota coronavirus news, March 23: Cauti...",https://bismarcktribune.com/news/local/health/...,"Mon, 23 Mar 2020 23:15:00 GMT",North Dakota,Bismarck Tribune
2,"North Dakota coronavirus news, March 22: Trans...",https://bismarcktribune.com/news/local/health/...,"Sun, 22 Mar 2020 22:30:00 GMT",North Dakota,Bismarck Tribune
3,"2 more cases of coronavirus in North Dakota, b...",https://www.grandforksherald.com/lifestyle/hea...,"Sun, 22 Mar 2020 23:00:00 GMT",North Dakota,Grand Forks Herald
4,Coronavirus: South Dakota now seeing community...,https://www.inforum.com/lifestyle/health/50111...,"Mon, 23 Mar 2020 14:00:00 GMT",North Dakota,INFORUM
5,North Dakota: Latest updates on Coronavirus - ...,https://www.livescience.com/north-dakota-coron...,"Mon, 23 Mar 2020 16:32:00 GMT",North Dakota,Livescience.com
6,"North Dakota coronavirus news, March 20: Bisma...",https://bismarcktribune.com/news/local/health/...,"Fri, 20 Mar 2020 23:00:00 GMT",North Dakota,Bismarck Tribune
7,UPDATE: North Dakota now has five cases of Cor...,https://www.kvrr.com/2020/03/17/update-north-d...,"Wed, 18 Mar 2020 01:07:30 GMT",North Dakota,KVRR
8,"North Dakota governor, others warn: Beware of ...",https://www.grandforksherald.com/news/governme...,"Sun, 22 Mar 2020 13:00:00 GMT",North Dakota,Grand Forks Herald
9,"North Dakota coronavirus news, March 21: State...",https://bismarcktribune.com/news/local/health/...,"Sat, 21 Mar 2020 23:30:00 GMT",North Dakota,Bismarck Tribune
