# 🌍 Notebook 3: Geographic Trends & Tableau Prep

In this notebook, we explore **country-level search interest** in meditation-related keywords over the past 5 years. Our goal is to identify **geographic patterns**, compare keyword popularity by region, and prepare the data for visual storytelling in Tableau.

We’ll be working with:
- `df_country_cleaned`: Country-wise interest scores for each keyword
- `df_country_pivoted`: Pivoted wide-format dataset for heatmaps and bar charts
- `df_related_cleaned`: Popular and rising related queries to understand context

This notebook is part of the **Meditation Trend Pulse** project, aimed at tracking global trends in meditation and mindfulness practices.

In [2]:
import pandas as pd

In [4]:
# 📦 Load cleaned datasets for geographic and query analysis
df_country_cleaned = pd.read_csv("../data/processed/country_interest_long.csv")
df_country_pivoted = pd.read_csv("../data/processed/country_interest_pivot.csv")
df_related_cleaned = pd.read_csv("../data/processed/related_queries_cleaned.csv")

# 🔍 Preview
print("✅ df_country_cleaned:", df_country_cleaned.shape)
print("✅ df_country_pivoted:", df_country_pivoted.shape)
print("✅ df_related_cleaned:", df_related_cleaned.shape)

df_country_cleaned.head()

✅ df_country_cleaned: (258, 3)
✅ df_country_pivoted: (70, 6)
✅ df_related_cleaned: (250, 4)


Unnamed: 0,country,keyword,search_interest
0,Argentina,meditation,2
1,Australia,meditation,98
2,Austria,meditation,49
3,Bangladesh,meditation,10
4,Belgium,meditation,25


In [6]:
# 📊 Count how many countries show non-zero interest for each keyword
keyword_country_coverage = (
    df_country_cleaned[df_country_cleaned["search_interest"] > 0]
    .groupby("keyword")["country"]
    .nunique()
    .reset_index(name="num_countries_with_interest")
    .sort_values(by="num_countries_with_interest", ascending=False)
)

# Display coverage
display(keyword_country_coverage)

Unnamed: 0,keyword,num_countries_with_interest
3,mindfulness,64
2,meditation,55
0,breathwork,54
4,yoga nidra,50
1,guided meditation,35


In [7]:
# 🔢 Total search interest per keyword across all countries
keyword_global_interest = (
    df_country_cleaned
    .groupby("keyword")["search_interest"]
    .sum()
    .reset_index(name="total_search_interest")
    .sort_values(by="total_search_interest", ascending=False)
)

display(keyword_global_interest)

Unnamed: 0,keyword,total_search_interest
3,mindfulness,1959
2,meditation,1290
0,breathwork,1006
4,yoga nidra,636
1,guided meditation,540


In [8]:
# 📌 Group by keyword and country, then rank by interest
top_countries_by_keyword = (
    df_country_cleaned.sort_values(["keyword", "search_interest"], ascending=[True, False])
    .groupby("keyword")
    .head(10)
    .reset_index(drop=True)
)

display(top_countries_by_keyword)

Unnamed: 0,country,keyword,search_interest
0,Australia,breathwork,100
1,Ireland,breathwork,71
2,New Zealand,breathwork,70
3,Switzerland,breathwork,56
4,Netherlands,breathwork,48
5,Canada,breathwork,45
6,United Kingdom,breathwork,45
7,Sweden,breathwork,40
8,Denmark,breathwork,39
9,Austria,breathwork,34


In [11]:
# 🌍 Total interest per keyword (denominator)
total_interest_by_keyword = df_country_cleaned.groupby("keyword")["search_interest"].sum()

# 🌍 Merge global total into original DataFrame
df_country_share = df_country_cleaned.copy()
df_country_share["share_of_global_interest"] = (
    df_country_share
    .apply(lambda row: row["search_interest"] / total_interest_by_keyword[row["keyword"]], axis=1)
)

# 🔍 Preview
display(df_country_share.head())

Unnamed: 0,country,keyword,search_interest,share_of_global_interest
0,Argentina,meditation,2,0.00155
1,Australia,meditation,98,0.075969
2,Austria,meditation,49,0.037984
3,Bangladesh,meditation,10,0.007752
4,Belgium,meditation,25,0.01938


In [12]:
# 🗺️ Top 5 countries by share for each keyword
df_top_countries_per_keyword = (
    df_country_share
    .sort_values(["keyword", "share_of_global_interest"], ascending=[True, False])
    .groupby("keyword")
    .head(5)
    .reset_index(drop=True)
)

# 💡 Optional formatting for readability
df_top_countries_per_keyword["share_of_global_interest"] = (
    df_top_countries_per_keyword["share_of_global_interest"] * 100
).round(2)

# 🖼️ Preview
display(df_top_countries_per_keyword)

Unnamed: 0,country,keyword,search_interest,share_of_global_interest
0,Australia,breathwork,100,9.94
1,Ireland,breathwork,71,7.06
2,New Zealand,breathwork,70,6.96
3,Switzerland,breathwork,56,5.57
4,Netherlands,breathwork,48,4.77
5,Ireland,guided meditation,100,18.52
6,Australia,guided meditation,95,17.59
7,Canada,guided meditation,61,11.3
8,New Zealand,guided meditation,56,10.37
9,United Kingdom,guided meditation,45,8.33


In [13]:
# 🗺️ Count how often each country appears in top 5 across keywords
country_appearance_counts = (
    df_top_countries_per_keyword["country"]
    .value_counts()
    .reset_index()
    .rename(columns={"index": "country", "country": "num_keywords_in_top5"})
)

display(country_appearance_counts.head(10))

Unnamed: 0,num_keywords_in_top5,count
0,Ireland,5
1,Australia,4
2,New Zealand,3
3,Switzerland,3
4,Netherlands,3
5,Canada,2
6,United Kingdom,1
7,Denmark,1
8,Spain,1
9,Iceland,1


In [15]:
# 1. Country count per keyword where interest > 0
keyword_country_coverage.to_csv("../data/processed/country_num_countries_with_interest.csv", index=False)

# 2. Total global interest per keyword
keyword_global_interest.to_csv("../data/processed/country_total_interest_by_keyword.csv", index=False)

# 3. Top 10 countries per keyword (raw interest score)
top_countries_by_keyword.to_csv("../data/processed/country_top10_by_keyword.csv", index=False)

# 4. Country-level share of each keyword’s global interest
df_country_share.to_csv("../data/processed/country_share_of_interest.csv", index=False)

# 5. Top 5 countries per keyword based on share
df_top_countries_per_keyword.to_csv("../data/processed/country_top5_by_keyword.csv", index=False)

# 6. Frequency of countries appearing in top 5 lists
country_appearance_counts.to_csv("../data/processed/country_top5_appearance_counts.csv", index=False)