In [4]:
# 📦 Automation Notebook: Import libraries
import pandas as pd
from pytrends.request import TrendReq
import os
from datetime import datetime
import time
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
# 🔑 Define keywords
KEYWORDS = [
    "meditation",
    "mindfulness",
    "breathwork",
    "yoga nidra",
    "guided meditation"
]

In [6]:
# 🌍 Initialize pytrends connection
pytrends = TrendReq(hl='en-US', tz=360)

# 📈 Build payload for weekly interest over time (last 5 years)
pytrends.build_payload(KEYWORDS, timeframe='today 5-y', geo='')

# 📊 Fetch interest over time
df_trend = pytrends.interest_over_time().reset_index()

# 🧹 Drop 'isPartial' column if it exists
if 'isPartial' in df_trend.columns:
    df_trend = df_trend.drop(columns=['isPartial'])

# ✅ Display preview
display(df_trend.head())

Unnamed: 0,date,meditation,mindfulness,breathwork,yoga nidra,guided meditation
0,2020-07-26,92,21,1,3,6
1,2020-08-02,88,21,1,3,6
2,2020-08-09,90,20,1,2,6
3,2020-08-16,91,24,1,3,6
4,2020-08-23,89,23,1,3,5


In [11]:
# 💾 Save raw interest over time to automated folder
os.makedirs("../data/automated/trend", exist_ok=True)
df_trend.to_csv("../data/automated/trend/trend_interest_over_time.csv", index=False)

In [8]:
# 🔁 Melt into long format for easier calculation
df_long = df_trend.melt(id_vars="date", var_name="keyword", value_name="search_interest")

# 📊 Calculate percent change from first to last date
df_pct_change = (
    df_long.sort_values("date")
    .groupby("keyword")
    .agg(first_value=("search_interest", "first"), last_value=("search_interest", "last"))
    .reset_index()
)

df_pct_change["percent_change"] = (
    (df_pct_change["last_value"] - df_pct_change["first_value"]) / df_pct_change["first_value"] * 100
).round(2)

# 🔁 Clean and reorder columns
df_pct_change = df_pct_change[["keyword", "percent_change"]].sort_values(by="percent_change", ascending=False)

# 💾 Save
df_pct_change.to_csv("../data/automated/trend/trend_percent_change.csv", index=False)

# ✅ Preview
display(df_pct_change)

Unnamed: 0,keyword,percent_change
0,breathwork,300.0
4,yoga nidra,0.0
3,mindfulness,-4.76
1,guided meditation,-16.67
2,meditation,-25.0


In [9]:
# 📊 Find top 3 peaks per keyword
df_top_peaks = (
    df_long.sort_values(["keyword", "search_interest"], ascending=[True, False])
    .groupby("keyword")
    .head(3)
    .sort_values(["keyword", "search_interest"], ascending=[True, False])
    .reset_index(drop=True)
)

# 💾 Save top peaks to file
os.makedirs("../data/automated/trend", exist_ok=True)
df_top_peaks.to_csv("../data/automated/trend/trend_top_peaks.csv", index=False)

# ✅ Preview
display(df_top_peaks)

Unnamed: 0,date,keyword,search_interest
0,2025-07-27,breathwork,4
1,2024-01-07,breathwork,3
2,2024-01-14,breathwork,3
3,2020-07-26,guided meditation,6
4,2020-08-02,guided meditation,6
5,2020-08-09,guided meditation,6
6,2021-01-17,meditation,100
7,2021-01-03,meditation,98
8,2020-09-13,meditation,97
9,2020-09-13,mindfulness,26


In [10]:
# 🗓️ Add month column
df_long["month"] = df_long["date"].dt.month_name()

# 📊 Group by month and keyword, then average
df_heatmap = (
    df_long.groupby(["month", "keyword"])["search_interest"]
    .mean()
    .reset_index()
)

# 🔄 Ensure calendar order
month_order = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
]
df_heatmap["month"] = pd.Categorical(df_heatmap["month"], categories=month_order, ordered=True)
df_heatmap = df_heatmap.sort_values(["month", "keyword"])

# 💾 Save to file
df_heatmap.to_csv("../data/automated/trend/trend_monthly_heatmap.csv", index=False)

# ✅ Preview
display(df_heatmap.head())

Unnamed: 0,month,keyword,search_interest
20,January,breathwork,1.913043
21,January,guided meditation,4.043478
22,January,meditation,81.782609
23,January,mindfulness,19.73913
24,January,yoga nidra,2.913043


In [12]:
pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=False)

Unnamed: 0_level_0,meditation,mindfulness,breathwork,yoga nidra,guided meditation
geoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,77,17,0,3,3
Albania,80,14,2,1,3
Algeria,79,16,2,1,2
American Samoa,100,0,0,0,0
Andorra,45,50,0,5,0
...,...,...,...,...,...
Western Sahara,100,0,0,0,0
Yemen,75,25,0,0,0
Zambia,89,9,0,0,2
Zimbabwe,85,12,0,0,3


In [13]:
# 🌍 Pull current country-level interest
df_country = pytrends.interest_by_region(
    resolution='COUNTRY',
    inc_low_vol=True,
    inc_geo_code=False
).reset_index()

# ✅ Keep only non-zero rows (remove countries with zero across all keywords)
df_country = df_country.loc[df_country[KEYWORDS].sum(axis=1) > 0]

# 🧼 Melt to long format for consistency
df_country_long = df_country.melt(id_vars='geoName', var_name='keyword', value_name='search_interest')
df_country_long = df_country_long.rename(columns={"geoName": "country"})

# ✅ Preview
display(df_country_long.head())

Unnamed: 0,country,keyword,search_interest
0,Afghanistan,meditation,77
1,Albania,meditation,80
2,Algeria,meditation,79
3,American Samoa,meditation,100
4,Andorra,meditation,45


In [14]:
# 🗂️ Create output folder if needed
os.makedirs("../data/automated", exist_ok=True)

# 📅 Timestamp for filename
timestamp = datetime.today().strftime("%Y-%m-%d")

# 💾 Save current snapshot of country interest
filename = f"../data/automated/country_interest_{timestamp}.csv"
df_country_long.to_csv(filename, index=False)

print(f"✅ Saved: {filename}")

✅ Saved: ../data/automated/country_interest_2025-07-31.csv
