# Google Schema.org Survey for Biomedical Queries

In [282]:
from pytrends.request import TrendReq
from googleapiclient.discovery import build
from collections import deque
from math import ceil
import pandas as pd

In [244]:
def get_popular_queries(seed_query, 
                        relevance_distance=2,
                        popularity_threshold=55,
                        category=45,               # category: health
                        timeframe='today 12-m',    # timeframe: 12 months from now
                        location='US'):            # location: United States
    """
    Find the most popular queries collected by Google.
    
    Arguments:
    seed_query -- inital seed query to bootstrap refined queries
    relevance_distance -- degree of the output queries being closely related to the 
                          initial seed (default = 2)
    popularity_threshold -- threshold to find the most popular queries (default = 55)
    category -- topic category for the query, full list:
                https://github.com/pat310/google-trends-api/wiki/Google-Trends-Categories
                (default = 45 [Health])
    timeframe -- time range for the query (default = 12 months from today)
    location -- two letter country abbreviation of the origin of the query
                (default = United States)
    
    Returns:
    output -- a list of popular queries
    """
    pytrends = TrendReq(hl='en-US', tz=360)
    
    seeds = deque([seed_query])
    iteration = 0
    while iteration < relevance_distance:
        new_queries = set()
        while len(seeds) > 0:
            keyword = seeds.popleft();
            pytrends.build_payload([keyword], cat=category, timeframe=timeframe, geo=location)
            try:
                top_queries = pytrends.related_queries()[keyword]['top']
                above_threshold_queries = top_queries[top_queries['value'] >= popularity_threshold]
                new_queries.update(above_threshold_queries['query'])
            except TypeError:
                pass
        seeds.extend(new_queries)
        iteration += 1
    output = set(seeds)
    return output
        

In [261]:
def google_search(search_keyword, api_key, cse_id, **kwargs):
    """
    Do a Web search using the Google Custom Search API.
    
    Arguments:
    search_keyword -- search keyword string
    api_key -- the Google API key
    cse_id -- the custom search engine key
    
    Returns:
    An search result object.
    """
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=search_keyword, cx=cse_id, **kwargs).execute()
    return res['items']

In [280]:
def survey_google_search(search_keyword, api_key, cse_id, result_size=10):
    """
    Survey the rank of search results from Google.
    
    Arguments:
    search_term -- search keyword string
    api_key -- the Google API key
    cse_id -- the custom search engine key
    result_size -- expected search result size
    
    Returns:
    output -- a dataframe about the rank of search results
    """
    DEFAULT_GOOGLE_RESULT = 10
    
    data = []
    rank = 1
    iterations = ceil(result_size / DEFAULT_GOOGLE_RESULT)
    for i in range(0, iterations):
        results = google_search(search_keyword,
                                api_key,
                                cse_id,
                                start=(i*DEFAULT_GOOGLE_RESULT)+1, 
                                num=DEFAULT_GOOGLE_RESULT)
        for result in results:
            data.append([rank, result['link'], result['title']])
            rank += 1
    return pd.DataFrame(data, columns=['rank', 'url', 'title'])

# Test Cases

In [294]:
get_popular_queries("flu", relevance_distance=4, popularity_threshold=90)

{'what symptoms of the flu'}

In [281]:
my_api_key = ""
my_cse_id = ""
survey_google_search('what symptoms of the flu', my_api_key, my_cse_id)

Unnamed: 0,rank,url,title
0,1,https://www.usatoday.com/story/news/nation-now...,2018 flu symptoms: Do I have the flu?
1,2,https://www.webmd.com/cold-and-flu/flu-guide/f...,"Flu Symptoms, Types, and Complications of the Flu"
2,3,https://www.healthline.com/health/cold-flu/ear...,What Are the 6 Early Flu Symptoms?
3,4,https://www.cnn.com/2018/01/31/health/flu-symp...,How to spot flu symptoms - CNN
4,5,https://www.washingtonpost.com/news/to-your-he...,Flu symptoms 2018: What to know about the flu ...
5,6,https://www.cdc.gov/flu/consumer/symptoms.htm,Flu Symptoms & Complications | Seasonal Influe...
6,7,https://www.rd.com/health/wellness/flu-symptoms/,Symptoms of the Flu: How to Tell If It's the R...
7,8,http://www.mucinex.com/cold-and-flu-learning-c...,COMMON AND UNCOMMON SYMPTOMS OF THE FLU
8,9,https://www.cdc.gov/flu/takingcare.htm,The Flu: What To Do If You Get Sick | Seasonal...
9,10,http://www.businessinsider.com/how-to-beat-pre...,How to help beat a cold or the flu - Business ...
