In [6]:
import pandas as pd
import plotly.express as px
from pytrends.request import TrendReq

In [7]:
pytrend = TrendReq(tz=360) # US central time

from https://lazarinastoy.com/the-ultimate-guide-to-pytrends-google-trends-api-with-python/#frequently-asked-questions

According to Google Trends, the values are calculated on a scale from 0 to 100, where 100 is the location with the most popularity as a fraction of total searches in that location, a value of 50 indicates a location that is half as popular. A value of 0 indicates a location where there was not enough data for this term.


NOTE TO SELF: combining data with data that shows non-relative stats might be helpful

In [8]:
internal_codes = pd.read_csv("internal_codes.csv")
internal_codes

Unnamed: 0,topic,mid_code
0,Coronavirus disease 2019,/g/11j2cc_qll
1,COVID-19 vaccine,/g/11j8_9sv06
2,Side effect,/m/0119nml8
3,Mental health,/m/03x69g
4,Housing,/m/04dn__2
5,Economy,/m/0gfps3
6,Unemployment,/m/07s_c
7,Mask,/m/01kr41
8,Social distancing,/g/11c7s5skh1
9,Economic impact of the COVID-19 pandemic,/g/11hfncsjc_


# Creating function to get heatmap of U.S.

To get relative regional interest in a topic.

In [22]:
def get_usa_heatmap(index): # index is target row of internal_codes dataframe
    mid = internal_codes.mid_code[index]
    pytrend.build_payload([mid], geo='US', timeframe = '2020-01-07 2022-01-07')
    df = pytrend.interest_by_region()
    
    states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'ID', \
          'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', \
          'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', \
         'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
    fig = px.choropleth(locations=states, locationmode="USA-states", color=list(df[mid]), scope="usa", \
                       title = "Search topic: {}".format(internal_codes.topic[index]))
    print(internal_codes.topic[index])
    fig.show()

In [23]:
get_usa_heatmap(1)

COVID-19 vaccine


In [24]:
get_usa_heatmap(2)

Side effect


In [25]:
get_usa_heatmap(3)

Mental health


In [26]:
get_usa_heatmap(4)

Housing


In [27]:
get_usa_heatmap(5)

Economy


In [28]:
get_usa_heatmap(6)

Unemployment


#### pre-covid unemployment searches:

In [33]:
    index = 6
    mid = internal_codes.mid_code[index]
    pytrend.build_payload([mid], geo='US', timeframe = '2017-01-07 2019-01-07')
    df = pytrend.interest_by_region()
    
    states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'ID', \
          'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', \
          'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', \
         'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
    fig = px.choropleth(locations=states, locationmode="USA-states", color=list(df[mid]), scope="usa", \
                       title = "Search topic: {} (2017-2019)".format(internal_codes.topic[index]))
    print(internal_codes.topic[index])
    fig.show()

Unemployment


In [29]:
get_usa_heatmap(7)

Mask


In [30]:
get_usa_heatmap(8)

Social distancing


In [31]:
get_usa_heatmap(9)

Economic impact of the COVID-19 pandemic


# Getting table of regional data for all topics

In [19]:
topic_dfs = []
for i in range(len(internal_codes)):
    pytrend.build_payload([internal_codes.mid_code[i]], geo='US', timeframe = '2020-01-07 2022-01-07')
    topic_dfs.append(pytrend.interest_by_region())    

In [20]:
df = pd.concat(topic_dfs, axis=1)
column_dict = {}
for i in range(len(internal_codes)):
    column_dict[internal_codes.mid_code[i]] = internal_codes.topic[i]
df.rename(columns=column_dict, inplace=True)
df.head()

Unnamed: 0_level_0,Coronavirus disease 2019,COVID-19 vaccine,Side effect,Mental health,Housing,Economy,Unemployment,Mask,Social distancing,Economic impact of the COVID-19 pandemic,Social anxiety disorder
geoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alabama,60,56,88,68,36,49,65,70,38,0,70
Alaska,90,69,72,100,57,38,34,76,65,0,65
Arizona,84,76,73,74,35,56,44,77,49,0,81
Arkansas,66,51,86,70,38,48,37,68,51,0,65
California,78,78,69,68,47,64,25,80,65,69,84


In [21]:
df.to_csv("interest_by_region_in_covid_topics.csv")