<a href="https://colab.research.google.com/github/simodepth/Autosuggest-Keywords/blob/main/Google_Autosuggest_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#🚀 Run Google Autosuggest to find keywords ideas in bulk 




#Requirements and Assumptions
- Have requests_html module installed before to start
- Leverage GPU runtime type when processing very generic queries

In [1]:
#@title Install Requests_html
!pip install requests_html

Collecting requests_html
  Downloading requests_html-0.10.0-py3-none-any.whl (13 kB)
Collecting pyppeteer>=0.0.14
  Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)
[K     |████████████████████████████████| 83 kB 1.4 MB/s 
[?25hCollecting w3lib
  Downloading w3lib-1.22.0-py2.py3-none-any.whl (20 kB)
Collecting fake-useragent
  Downloading fake-useragent-0.1.11.tar.gz (13 kB)
Collecting parse
  Downloading parse-1.19.0.tar.gz (30 kB)
Collecting pyquery
  Downloading pyquery-1.4.3-py3-none-any.whl (22 kB)
Collecting urllib3<2.0.0,>=1.25.8
  Downloading urllib3-1.26.9-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 24.8 MB/s 
Collecting websockets<11.0,>=10.0
  Downloading websockets-10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (112 kB)
[K     |████████████████████████████████| 112 kB 48.0 MB/s 
Collecting pyee<9.0.0,>=8.1.0
  Downloading pyee-8.2.2-py2.py3-none-any.whl (12 kB)
Collecting cssse

In [2]:
#@title Run Import Modules
import requests
import urllib
import json
import operator
import pandas as pd
from requests_html import HTML
from requests_html import HTMLSession


#Set up connections with Google SERPs

In [3]:
def get_source(url):

    try:
        session = HTMLSession()
        response = session.get(url)
        return response
    except requests.exceptions.RequestException as e:
        print(e)

In [4]:
def get_results(query):
    query = urllib.parse.quote_plus(query)
    response = get_source("https://suggestqueries.google.com/complete/search?output=chrome&hl=en&q=" + query)
    results = json.loads(response.text)
    return results

In [5]:
#@title Type your Keyword
search_term = "Catan"
results = get_results(search_term)
results

['Catan',
 ['catania',
  'catanduanes',
  'catan online',
  'catan universe',
  'catan rules',
  'catanauan quezon',
  'catan strategy',
  'catanduanes map'],
 ['', '', '', '', '', '', '', ''],
 [],
 {'google:clientdata': {'bpc': False, 'tlw': False},
  'google:suggestrelevance': [800, 653, 652, 651, 650, 601, 600, 550],
  'google:suggestsubtypes': [[512],
   [512],
   [512],
   [512],
   [512],
   [512],
   [512],
   [512]],
  'google:suggesttype': ['QUERY',
   'QUERY',
   'QUERY',
   'QUERY',
   'QUERY',
   'QUERY',
   'QUERY',
   'QUERY'],
  'google:verbatimrelevance': 1300}]

In [6]:
#@title Format the Results
def format_results(results):
    suggestions = []
    for index, value in enumerate(results[1]):
        suggestion = {'term': value, 'relevance': results[4]['google:suggestrelevance'][index]}
        suggestions.append(suggestion)
    return suggestions
formatted_results = format_results(results)
formatted_results

[{'relevance': 800, 'term': 'catania'},
 {'relevance': 653, 'term': 'catanduanes'},
 {'relevance': 652, 'term': 'catan online'},
 {'relevance': 651, 'term': 'catan universe'},
 {'relevance': 650, 'term': 'catan rules'},
 {'relevance': 601, 'term': 'catanauan quezon'},
 {'relevance': 600, 'term': 'catan strategy'},
 {'relevance': 550, 'term': 'catanduanes map'}]

#Spice-up the research by adding some term suffixes 🌶

In [7]:
def get_expanded_term_suffixes():
    expanded_term_suffixes = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
                             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
    return expanded_term_suffixes

#Define your Content Angle

In [8]:
def get_expanded_term_prefixes():
    expanded_term_prefixes = ['what *', 'where *', 'how to *', 'why *', 'vs*', 'or*', 'buy*', 'how much*',
                              'best *', 'tutorial *', 'tips *', 'ideas *', 'review *', 'guide *' 
                             ]
    return expanded_term_prefixes

📔 You can expand the range of angle at your ease

In [9]:
#@title Expand the Research
def get_expanded_terms(query):

    expanded_term_prefixes = get_expanded_term_prefixes()
    expanded_term_suffixes = get_expanded_term_suffixes()   

    terms = []
    terms.append(query)

    for term in expanded_term_prefixes:
        terms.append(term + ' ' + query)

    for term in expanded_term_suffixes:
        terms.append(query + ' ' + term)

    return terms


In [10]:
get_expanded_terms(search_term)


['Catan',
 'what * Catan',
 'where * Catan',
 'how to * Catan',
 'why * Catan',
 'vs* Catan',
 'or* Catan',
 'buy* Catan',
 'how much* Catan',
 'best * Catan',
 'tutorial * Catan',
 'tips * Catan',
 'ideas * Catan',
 'review * Catan',
 'guide * Catan',
 'Catan a',
 'Catan b',
 'Catan c',
 'Catan d',
 'Catan e',
 'Catan f',
 'Catan g',
 'Catan h',
 'Catan i',
 'Catan j',
 'Catan k',
 'Catan l',
 'Catan m',
 'Catan n',
 'Catan o',
 'Catan p',
 'Catan q',
 'Catan r',
 'Catan s',
 'Catan t',
 'Catan u',
 'Catan v',
 'Catan w',
 'Catan x',
 'Catan y',
 'Catan z']

In [11]:
#@title Get further suggestions 🤷
def get_expanded_suggestions(query):

    all_results = []

    expanded_terms = get_expanded_terms(query)
    for term in expanded_terms:
        results = get_results(term)
        results = format_results(results)
        all_results = all_results + results
        all_results = sorted(all_results, key=lambda k: k['relevance'], reverse=True)
    return all_results


In [12]:
expanded_results = get_expanded_suggestions(search_term)
expanded_results

[{'relevance': 1251, 'term': 'catan expansion'},
 {'relevance': 1250, 'term': 'catan board game'},
 {'relevance': 1250, 'term': 'catan cities and knights'},
 {'relevance': 1250, 'term': 'catan expansion pack'},
 {'relevance': 1250, 'term': 'catan game'},
 {'relevance': 1250, 'term': 'catan how to play'},
 {'relevance': 1250, 'term': 'catan io'},
 {'relevance': 1250, 'term': 'catan junior'},
 {'relevance': 1250, 'term': 'catan knights and cities'},
 {'relevance': 1250, 'term': 'catan longest road rules'},
 {'relevance': 1250, 'term': 'catan map generator'},
 {'relevance': 1250, 'term': 'catan online'},
 {'relevance': 1250, 'term': 'catan qualifiers 2022'},
 {'relevance': 1250, 'term': 'catan rules'},
 {'relevance': 1250, 'term': 'catan strategy'},
 {'relevance': 1250, 'term': 'catan universe'},
 {'relevance': 1250, 'term': 'catan vr'},
 {'relevance': 1250, 'term': 'catan xbox'},
 {'relevance': 1250, 'term': 'catan zaidimas'},
 {'relevance': 1200, 'term': 'catan pronunciation'},
 {'relev



---



```
# N.B: "Relevance" indicator refers to an automated estimation based on the frequency that specific term is subjected to in the search results page
```

