## Search Volumes

In [46]:
# imports
import pandas as pd
import os
from dotenv import load_dotenv
from serpapi import GoogleSearch
from datetime import date


### Importing Data

Importing search terms csv as python dataframe.

In [47]:
# Getting serpapi key
load_dotenv()
api_key = os.getenv("SERPAI_KEY")

# Read the CSV file, skipping the first two rows
search_terms_df = pd.read_csv("Search_terms.csv", skiprows=2)

  search_terms_df = pd.read_csv("Search_terms.csv", skiprows=2)


In [48]:
search_terms_df.head()

Unnamed: 0,Search term,Match type,Added/Excluded,Campaign,Ad group,Currency code,Avg. CPM,Impr.,Interactions,Interaction rate,Avg. cost,Cost,Campaign type,Conv. rate,Conversions,Cost / conv.
0,mental health support line,Exact match (close variant),,Nebo - Helmsley Grant - Nonbrand - Crisis Support,Helpline / Hotline,USD,280.0,40,5,12.50%,2.24,11.2,Search,180.00%,9.0,1.24
1,depression support group,Broad match,,Nebo - Helmsley Grant - Nonbrand - Crisis Support,Helpline / Hotline,USD,187.06,17,1,5.88%,3.18,3.18,Search,100.00%,1.0,3.18
2,teen help hotline,Broad match,,Nebo - Helmsley Grant - Nonbrand - Crisis Support,Suicide Prevention,USD,456.25,16,4,25.00%,1.83,7.3,Search,100.00%,4.0,1.83
3,family acceptance project lds,Broad match,,Nebo - Helmsley Grant - Nonbrand - Crisis Support,Helpline / Hotline,USD,840.0,1,1,100.00%,0.84,0.84,Search,100.00%,1.0,0.84
4,hope line,Broad match,,Nebo - Helmsley Grant - Nonbrand - Crisis Support,Suicide Prevention,USD,605.0,6,1,16.67%,3.63,3.63,Search,200.00%,2.0,1.82


In [51]:
len(search_terms_df["Search term"].value_counts())
search_terms_df.size


2515920

In [53]:
# Get value counts
counts = search_terms_df["Search term"].value_counts()

# Keep only terms that appeared exactly once
unique_terms = counts[counts == 1].index

# Filter dataframe to only keep those unique terms
search_terms_df = search_terms_df[search_terms_df["Search term"].isin(unique_terms)]

search_terms_df.size

2005296

### Filtering Data

Removing duplicates and removing keywords that don't have interest over time data.

In [None]:
# Removing duplciates from the dataframe
def remove_duplicates(df):
    return df.drop_duplicates(subset=[df.columns[0]])

search_terms_df = remove_duplicates(search_terms_df)

print("dataframe size: " + str(search_terms_df.size))

dataframe size: 160


In [44]:
# Checking if keyword has interest_over_time data and removing that keyword if it doesn't
def check_interest(keyword, api_key):
    """Return True if keyword has Google Trends interest, False otherwise."""
    params = {
        "engine": "google_trends",
        "q": keyword,
        "api_key": api_key
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    # Safely get timeline_data
    timeline_data = results.get("interest_over_time", {}).get("timeline_data", [])

    return bool(timeline_data)  # True if non-empty, False if empty

# Apply to dataframe and filter rows
search_terms_df["Has Interest"] = search_terms_df["Search term"].apply(lambda x: check_interest(x, api_key))
filtered_df = search_terms_df[search_terms_df["Has Interest"]]
filtered_df = filtered_df.drop(columns=["Has Interest"])

print("dataframe size: " + str(filtered_df.size))

dataframe size: 0


### Finding Percent Change

In [None]:
def percent_change_from_timeline(timeline_data):
    """Calculate percent change from timeline data (assumes data exists)."""
    first_value = timeline_data[0]['values'][0]['extracted_value']
    last_value = timeline_data[-1]['values'][0]['extracted_value']

    if first_value == 0:
        return None  # Avoid divide by zero

    return ((last_value - first_value) / first_value) * 100

In [None]:
# Apply percent change calculation to each search term
filtered_df["Growth Percentage"] = filtered_df["Search term"].apply(
    lambda keyword: percent_change_from_timeline(get_interest_data(keyword, api_key))
)

sorted_search_terms_df = filtered_df.sort('Growth Percentage', descending = True)

# Return top 10 keywords
top_10_keywords = sorted_search_terms_df["Search term"].head(10).tolist()
top_10_keywords

KeyboardInterrupt: 

interest_over_time in SerpApi’s Google Trends engine refers to how popular a search term is over a specified period, Google doesn't publicly release information about search volume. 

However, there is publicly released information about Google Ad search volume. If we want to be more accurate, we can try to benchmark some of our search words against that in the future.