<a href="https://colab.research.google.com/github/purrvaja/Google-Trends-Analysis-with-Power-BI/blob/main/google_trends_api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytrends

In [2]:
import pandas as pd
import numpy as np

from pytrends.request import TrendReq

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import time
import math


In [78]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [67]:
keywords_list = [
    "Machine Learning",
    "Deep Learning",
    "Natural Language Processing",
    "Computer Vision",
    "Robotics",
    "Big Data",
    "Algorithm",
    "Neural Networks",
    "Artificial General Intelligence (AGI)",
    "Explainable AI (XAI)",
    "Reinforcement Learning",
    "Chatbots",
    "Generative AI",
    "Predictive Analytics",
    "AI Ethics"
]

In [5]:
# initialize a new Google Trends Request Object

pt = TrendReq(hl='en-US', tz=360)

In [53]:
# setting the time interval in between each api call

time_interval = 15

## Interest over time

In [69]:
def interest_over_time(keywords_list, pt):
    result_df = pd.DataFrame(columns=['keyword', 'date', 'volume'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):

        # print a progress message
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")


        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the interest over time
            iot = pt.interest_over_time()
            iot.reset_index(inplace=True)

            # Add the columns
            iot['keyword'] = keyword
            iot['volume'] = iot[keyword]

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, iot[['date', 'keyword', 'volume']]], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)


        # print progress message
        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100))+"%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list)-1:
            time.sleep(time_interval)

    # Convert the 'date' column to datetime format
    result_df['date'] = pd.to_datetime(result_df['date'])

    # Format the 'date' column to YYYY-MM
    result_df['date'] = result_df['date'].dt.strftime('%Y-%m')

    return result_df


# calling the function
iot_df = interest_over_time(keywords_list, pt)






Error when working on Natural Language Processing => The request failed: Google returned a response with code 429


Error when working on Computer Vision => The request failed: Google returned a response with code 429








Error when working on Neural Networks => The request failed: Google returned a response with code 429

















In [70]:
iot_df.head(10)

Unnamed: 0,keyword,date,volume
0,Machine Learning,2004-01,13
1,Machine Learning,2004-02,13
2,Machine Learning,2004-03,12
3,Machine Learning,2004-04,15
4,Machine Learning,2004-05,15
5,Machine Learning,2004-06,13
6,Machine Learning,2004-07,11
7,Machine Learning,2004-08,11
8,Machine Learning,2004-09,13
9,Machine Learning,2004-10,12


## Interest by region

In [71]:
def interest_by_region(keywords_list, pt):
    result_df = pd.DataFrame(columns=['country', 'keyword', 'volume'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")

        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the interest by region
            ibr = pt.interest_by_region(resolution='COUNTRY')
            ibr.reset_index(inplace=True)

            # Adding the columns
            ibr['country'] = ibr['geoName']
            ibr['keyword'] = keyword
            ibr['volume'] = ibr[keyword]

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, ibr[['country', 'keyword', 'volume']]], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)


        # print progress message
        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100))+"%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list)-1:
            time.sleep(time_interval)

    return result_df


# calling the function
ibr_df = interest_by_region(keywords_list, pt)










Error when working on Robotics => The request failed: Google returned a response with code 429


Error when working on Big Data => The request failed: Google returned a response with code 429


Error when working on Algorithm => The request failed: Google returned a response with code 429






Error when working on Explainable AI (XAI) => The request failed: Google returned a response with code 429


Error when working on Reinforcement Learning => The request failed: Google returned a response with code 429




Error when working on Generative AI => The request failed: Google returned a response with code 429







In [72]:
ibr_df.head(10)

Unnamed: 0,country,keyword,volume
0,Afghanistan,Machine Learning,0
1,Albania,Machine Learning,0
2,Algeria,Machine Learning,13
3,American Samoa,Machine Learning,0
4,Andorra,Machine Learning,0
5,Angola,Machine Learning,0
6,Anguilla,Machine Learning,0
7,Antarctica,Machine Learning,0
8,Antigua & Barbuda,Machine Learning,0
9,Argentina,Machine Learning,7


## Related Topics

In [73]:
def related_topics(keywords_list, pt):
    result_df = pd.DataFrame(columns=['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")

        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the related topics
            rt = pt.related_topics()

            # since this returns a 3-level nested dictionary, it needs to be wrangled and converted to a dataframe to proceed
            level2_keys = list(rt.get(keyword, {}).keys())

            level2_df = pd.DataFrame(columns=['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

            for l2_key in level2_keys:
                l2_data = rt.get(keyword, {}).get(l2_key, {})
                l2_data['keyword'] = keyword
                l2_data['sub_keyword'] = l2_key

                # Append to the level 2 dataframe
                level2_df = pd.concat([level2_df, pd.DataFrame([l2_data])], ignore_index=True)

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, level2_df], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)

        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100)) + "%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list) - 1:
            time.sleep(time_interval)

    return result_df



# calling the function
rt_df = related_topics(keywords_list, pt)


Error when working on Machine Learning => Must pass 2-d input. shape=(1, 23, 8)


Error when working on Deep Learning => Must pass 2-d input. shape=(1, 1, 8)


Error when working on Natural Language Processing => Must pass 2-d input. shape=(1, 1, 8)


Error when working on Computer Vision => Must pass 2-d input. shape=(1, 18, 8)



KeyboardInterrupt: 

## Saving the DataFrames


In [79]:
iot_df.to_csv('/content/gdrive/My Drive/interest_over_time.csv')
ibr_df.to_csv('/content/gdrive/My Drive/interest_by_region.csv')