<a href="https://colab.research.google.com/github/purrvaja/Google-Trends-Analysis-with-Power-BI/blob/main/google_trends_api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pytrends

Collecting pytrends
  Downloading pytrends-4.9.2-py3-none-any.whl (15 kB)
Installing collected packages: pytrends
Successfully installed pytrends-4.9.2


In [2]:
import pandas as pd
import numpy as np

from pytrends.request import TrendReq

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import time
import math


In [3]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [4]:
keywords_list = [
    "Machine Learning",
    "Deep Learning",
    "Natural Language Processing",
    "Computer Vision",
    "Robotics",
    "Big Data",
    "Algorithm",
    "Neural Networks",
    "Artificial General Intelligence (AGI)",
    "Explainable AI (XAI)",
    "Reinforcement Learning",
    "Chatbots",
    "Generative AI",
    "Predictive Analytics",
    "AI Ethics"
]

In [5]:
# initialize a new Google Trends Request Object

pt = TrendReq(hl='en-US', tz=360)

In [7]:
# setting the time interval in between each api call
# using a higher time interval to avoid the response code 429

time_interval = 60

## Interest over time

In [8]:
def interest_over_time(keywords_list, pt):
    result_df = pd.DataFrame(columns=['keyword', 'date', 'volume'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):

        # print a progress message
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")


        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the interest over time
            iot = pt.interest_over_time()
            iot.reset_index(inplace=True)

            # Add the columns
            iot['keyword'] = keyword
            iot['volume'] = iot[keyword]

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, iot[['date', 'keyword', 'volume']]], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)


        # print progress message
        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100))+"%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list)-1:
            time.sleep(time_interval)

    # Convert the 'date' column to datetime format
    result_df['date'] = pd.to_datetime(result_df['date'])

    # Format the 'date' column to YYYY-MM
    result_df['date'] = result_df['date'].dt.strftime('%Y-%m')

    return result_df


# calling the function
iot_df = interest_over_time(keywords_list, pt)

































In [9]:
iot_df.head(10)

Unnamed: 0,keyword,date,volume
0,Machine Learning,2004-01,13
1,Machine Learning,2004-02,13
2,Machine Learning,2004-03,13
3,Machine Learning,2004-04,14
4,Machine Learning,2004-05,13
5,Machine Learning,2004-06,13
6,Machine Learning,2004-07,10
7,Machine Learning,2004-08,11
8,Machine Learning,2004-09,13
9,Machine Learning,2004-10,12


In [None]:
# identify the ones that weren't captured in the first run. create a new list with those and run again. append the datafrmaes

## Interest by region

In [10]:
def interest_by_region(keywords_list, pt):
    result_df = pd.DataFrame(columns=['country', 'keyword', 'volume'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")

        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the interest by region
            ibr = pt.interest_by_region(resolution='COUNTRY')
            ibr.reset_index(inplace=True)

            # Adding the columns
            ibr['country'] = ibr['geoName']
            ibr['keyword'] = keyword
            ibr['volume'] = ibr[keyword]

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, ibr[['country', 'keyword', 'volume']]], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)


        # print progress message
        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100))+"%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list)-1:
            time.sleep(time_interval)

    return result_df


# calling the function
ibr_df = interest_by_region(keywords_list, pt)

































In [11]:
ibr_df.head(10)

Unnamed: 0,country,keyword,volume
0,Afghanistan,Machine Learning,0
1,Albania,Machine Learning,0
2,Algeria,Machine Learning,13
3,American Samoa,Machine Learning,0
4,Andorra,Machine Learning,0
5,Angola,Machine Learning,0
6,Anguilla,Machine Learning,0
7,Antarctica,Machine Learning,0
8,Antigua & Barbuda,Machine Learning,0
9,Argentina,Machine Learning,8


## Related Topics

In [15]:
test_keywords_list = ['Deep Learning']
result_df = pd.DataFrame(columns = ['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

# Loop through each keyword
for index, keyword in enumerate(test_keywords_list):
            print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")

            # Set the keyword & timeframe for the current month
            try:
              pt.build_payload([keyword], cat=0, timeframe='all')


              # Get the related topics
              rt = pt.related_topics()

              # since this returns a 3-level nested dictionary, it needs to be wrangled and converted to a dataframe to proceed
              level2_keys = list(rt.get(keyword, {}).keys())

              level2_df = pd.DataFrame(columns = ['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

              for l2_key in level2_keys:
                l2_df = rt.get(keyword, {}).get(l2_key, {})
                l2_df['keyword'] = keyword
                l2_df['sub_keyword'] = l2_key

                #l2_df = [['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value']]

                # append to the level 2 dataframe
                level2_df = pd.concat([level2_df, l2_df], ignore_index=True)



              # Appending the results to the main DataFrame
              result_df = pd.concat([result_df, level2_df], ignore_index=True)


            except Exception as e:
              print("Error when working on", keyword, "=>", e)

            print("=" * 25, str(math.ceil(((index + 1) / len(test_keywords_list)) * 100))+"%", "Completed =>", keyword, "=" * 25, "\n")

            if index != len(test_keywords_list)-1:
              time.sleep(time_interval)





In [16]:
result_df

Unnamed: 0,keyword,sub_keyword,topic_title,topic_type,value,formattedValue,link,topic_mid,hasData
0,Deep Learning,rising,deep,Topic,241000,Breakout,/trends/explore?q=/g/120z94sp_&date=all,/g/120z94sp_,
1,Deep Learning,rising,Keras,Software,166000,Breakout,/trends/explore?q=/g/11c1r2rvnp&date=all,/g/11c1r2rvnp,
2,Deep Learning,top,Neuron,Topic,100,100,/trends/explore?q=/m/059bs&date=all,/m/059bs,True
3,Deep Learning,top,deep,Topic,67,67,/trends/explore?q=/g/120z94sp_&date=all,/g/120z94sp_,True
4,Deep Learning,top,Keras,Software,46,46,/trends/explore?q=/g/11c1r2rvnp&date=all,/g/11c1r2rvnp,True


In [14]:
result_df

Unnamed: 0,keyword,sub_keyword,topic_title,topic_type,value,formattedValue,link,topic_mid,hasData
0,Machine Learning,rising,Deep learning,Topic,381450,Breakout,/trends/explore?q=/m/0h1fn8h&date=all,/m/0h1fn8h,
1,Machine Learning,rising,Data science,Field of study,304650,Breakout,/trends/explore?q=/m/0jt3_q3&date=all,/m/0jt3_q3,
2,Machine Learning,rising,Engineer,Degree,213850,Breakout,/trends/explore?q=/m/09j9h&date=all,/m/09j9h,
3,Machine Learning,rising,Coursera,Corporation,166750,Breakout,/trends/explore?q=/m/0j9kbbz&date=all,/m/0j9kbbz,
4,Machine Learning,rising,Microsoft Azure,Computer application,146800,Breakout,/trends/explore?q=/m/04y7lrx&date=all,/m/04y7lrx,
5,Machine Learning,rising,TensorFlow,Software,137700,Breakout,/trends/explore?q=/g/11bwp1s2k3&date=all,/g/11bwp1s2k3,
6,Machine Learning,rising,GitHub,Software company,133400,Breakout,/trends/explore?q=/m/0ryppmg&date=all,/m/0ryppmg,
7,Machine Learning,rising,Analytics,Topic,130050,Breakout,/trends/explore?q=/m/02gcn9&date=all,/m/02gcn9,
8,Machine Learning,rising,scikit-learn,Computer program,127300,Breakout,/trends/explore?q=/m/0h97pvq&date=all,/m/0h97pvq,
9,Machine Learning,rising,Cloud computing,Topic,109250,Breakout,/trends/explore?q=/m/02y_9m3&date=all,/m/02y_9m3,


In [17]:
def related_topics(keywords_list, pt):
    result_df = pd.DataFrame(columns=['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

    # Loop through each keyword
    for index, keyword in enumerate(keywords_list):
        print("=" * 25, str(index+1) + '.', "Processing =>", keyword, "=" * 25, "\n")

        # Set the keyword & timeframe for the current month
        try:
            pt.build_payload([keyword], cat=0, timeframe='all')

            # Get the related topics
            rt = pt.related_topics()

            # since this returns a 3-level nested dictionary, it needs to be wrangled and converted to a dataframe to proceed
            level2_keys = list(rt.get(keyword, {}).keys())

            level2_df = pd.DataFrame(columns=['keyword', 'sub_keyword', 'topic_title', 'topic_type', 'value'])

            for l2_key in level2_keys:
                l2_data = rt.get(keyword, {}).get(l2_key, {})
                l2_data['keyword'] = keyword
                l2_data['sub_keyword'] = l2_key

                # Append to the level 2 dataframe
                level2_df = pd.concat([level2_df, pd.DataFrame(l2_data)], ignore_index=True)

            # Appending the results to the main DataFrame
            result_df = pd.concat([result_df, level2_df], ignore_index=True)

        except Exception as e:
            print("Error when working on", keyword, "=>", e)

        print("=" * 25, str(math.ceil(((index + 1) / len(keywords_list)) * 100)) + "%", "Completed =>", keyword, "=" * 25, "\n")

        if index != len(keywords_list) - 1:
            time.sleep(time_interval)

    return result_df



# calling the function
rt_df = related_topics(keywords_list, pt)

































In [18]:
rt_df.head(10)

Unnamed: 0,keyword,sub_keyword,topic_title,topic_type,value,formattedValue,link,topic_mid,hasData
0,Machine Learning,rising,Deep learning,Topic,381450,Breakout,/trends/explore?q=/m/0h1fn8h&date=all,/m/0h1fn8h,
1,Machine Learning,rising,Data science,Field of study,304650,Breakout,/trends/explore?q=/m/0jt3_q3&date=all,/m/0jt3_q3,
2,Machine Learning,rising,Engineer,Degree,213850,Breakout,/trends/explore?q=/m/09j9h&date=all,/m/09j9h,
3,Machine Learning,rising,Coursera,Corporation,166750,Breakout,/trends/explore?q=/m/0j9kbbz&date=all,/m/0j9kbbz,
4,Machine Learning,rising,Microsoft Azure,Computer application,146800,Breakout,/trends/explore?q=/m/04y7lrx&date=all,/m/04y7lrx,
5,Machine Learning,rising,TensorFlow,Software,137700,Breakout,/trends/explore?q=/g/11bwp1s2k3&date=all,/g/11bwp1s2k3,
6,Machine Learning,rising,GitHub,Software company,133400,Breakout,/trends/explore?q=/m/0ryppmg&date=all,/m/0ryppmg,
7,Machine Learning,rising,Analytics,Topic,130050,Breakout,/trends/explore?q=/m/02gcn9&date=all,/m/02gcn9,
8,Machine Learning,rising,scikit-learn,Computer program,127300,Breakout,/trends/explore?q=/m/0h97pvq&date=all,/m/0h97pvq,
9,Machine Learning,rising,Cloud computing,Topic,109250,Breakout,/trends/explore?q=/m/02y_9m3&date=all,/m/02y_9m3,


## Saving the DataFrames


In [19]:
iot_df.to_csv('/content/gdrive/My Drive/interest_over_time.csv')
ibr_df.to_csv('/content/gdrive/My Drive/interest_by_region.csv')
rt_df.to_csv('/content/gdrive/My Drive/releated_topics.csv')