<a href="https://colab.research.google.com/github/kzumreen/FoodTrendsPrediction/blob/main/CSV_PreparingGoogleTrends.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytrends pandas --quiet


In [None]:
"""
Collect Google Trends data (worldwide + selected countries)
for multiple food-related keywords and save to one CSV file.
Requires: pip install pytrends pandas
"""

from pytrends.request import TrendReq
import pandas as pd
import time
from requests.exceptions import RequestException

# ----------------------------------
# CONFIGURATION
# ----------------------------------
KEYWORDS = ["matcha", "dubai chocolate", "baked feta cheese pasta", "air fryer"]
COUNTRY_CODES = ["AE", "GB", "IN", "US", "KR"]
TIMEFRAME = "2015-01-01 2025-10-01"   # or "today 5-y" for last 5 years
SLEEP_SECONDS = 3
OUTPUT_CSV = "pytrends_worldwide_and_countries.csv"
RETRIES = 3
# ----------------------------------

def init_pytrends():
    """Initialize the pytrends request object."""
    return TrendReq(hl="en-US", tz=360)

def safe_build_and_get(pytrends, kw_list, timeframe, geo):
    """Safely get interest over time data with retries."""
    for attempt in range(RETRIES):
        try:
            pytrends.build_payload(kw_list, timeframe=timeframe, geo=geo)
            df = pytrends.interest_over_time()
            if "isPartial" in df.columns:
                df = df.drop(columns=["isPartial"])
            return df
        except RequestException as e:
            print(f"Request failed (attempt {attempt+1}/{RETRIES}) for {kw_list} in {geo}: {e}")
            time.sleep(3 + attempt)
        except Exception as e:
            print(f"Error fetching {kw_list} ({geo}): {e}")
            time.sleep(3 + attempt)
    print(f"⚠️ Skipped {kw_list} for {geo} after {RETRIES} retries.")
    return pd.DataFrame()

def fetch_all():
    """Fetch worldwide + country-level data for all keywords."""
    pytrends = init_pytrends()
    combined = None

    # ---- Worldwide data ----
    for kw in KEYWORDS:
        print(f"\n🌍 Fetching WORLD data for '{kw}'")
        df = safe_build_and_get(pytrends, [kw], TIMEFRAME, geo="")
        if df.empty:
            continue
        df = df.rename(columns={kw: f"{kw.replace(' ', '_')}_WORLD"})
        combined = df if combined is None else combined.join(df, how="outer")
        time.sleep(SLEEP_SECONDS)

    # ---- Country-level data ----
    for code in COUNTRY_CODES:
        for kw in KEYWORDS:
            print(f"🌎 Fetching {code} data for '{kw}'")
            df = safe_build_and_get(pytrends, [kw], TIMEFRAME, geo=code)
            if df.empty:
                continue
            df = df.rename(columns={kw: f"{kw.replace(' ', '_')}_{code}"})
            combined = df if combined is None else combined.join(df, how="outer")
            time.sleep(SLEEP_SECONDS)

    # ---- Clean up ----
    combined.index = pd.to_datetime(combined.index, errors="coerce")
    combined = combined.sort_index()
    print("\n✅ Data collection complete.")
    print("Shape:", combined.shape)
    print("Columns:", combined.columns.tolist())

    combined.to_csv(OUTPUT_CSV)
    print(f"💾 Saved to {OUTPUT_CSV}")
    return combined

if __name__ == "__main__":
    data = fetch_all()



🌍 Fetching WORLD data for 'matcha'


  df = df.fillna(False)



🌍 Fetching WORLD data for 'dubai chocolate'


  df = df.fillna(False)



🌍 Fetching WORLD data for 'baked feta cheese pasta'


  df = df.fillna(False)



🌍 Fetching WORLD data for 'air fryer'


  df = df.fillna(False)


🌎 Fetching AE data for 'matcha'


  df = df.fillna(False)


🌎 Fetching AE data for 'dubai chocolate'


  df = df.fillna(False)


🌎 Fetching AE data for 'baked feta cheese pasta'
🌎 Fetching AE data for 'air fryer'


  df = df.fillna(False)


🌎 Fetching GB data for 'matcha'


  df = df.fillna(False)


🌎 Fetching GB data for 'dubai chocolate'


  df = df.fillna(False)


🌎 Fetching GB data for 'baked feta cheese pasta'


  df = df.fillna(False)


🌎 Fetching GB data for 'air fryer'


  df = df.fillna(False)


🌎 Fetching IN data for 'matcha'


  df = df.fillna(False)


🌎 Fetching IN data for 'dubai chocolate'


  df = df.fillna(False)


🌎 Fetching IN data for 'baked feta cheese pasta'


  df = df.fillna(False)


🌎 Fetching IN data for 'air fryer'


  df = df.fillna(False)


🌎 Fetching US data for 'matcha'


  df = df.fillna(False)


🌎 Fetching US data for 'dubai chocolate'


  df = df.fillna(False)


🌎 Fetching US data for 'baked feta cheese pasta'


  df = df.fillna(False)


🌎 Fetching US data for 'air fryer'


  df = df.fillna(False)


🌎 Fetching KR data for 'matcha'


  df = df.fillna(False)


🌎 Fetching KR data for 'dubai chocolate'


  df = df.fillna(False)


🌎 Fetching KR data for 'baked feta cheese pasta'
🌎 Fetching KR data for 'air fryer'


  df = df.fillna(False)



✅ Data collection complete.
Shape: (130, 22)
Columns: ['matcha_WORLD', 'dubai_chocolate_WORLD', 'baked_feta_cheese_pasta_WORLD', 'air_fryer_WORLD', 'matcha_AE', 'dubai_chocolate_AE', 'air_fryer_AE', 'matcha_GB', 'dubai_chocolate_GB', 'baked_feta_cheese_pasta_GB', 'air_fryer_GB', 'matcha_IN', 'dubai_chocolate_IN', 'baked_feta_cheese_pasta_IN', 'air_fryer_IN', 'matcha_US', 'dubai_chocolate_US', 'baked_feta_cheese_pasta_US', 'air_fryer_US', 'matcha_KR', 'dubai_chocolate_KR', 'air_fryer_KR']
💾 Saved to pytrends_worldwide_and_countries.csv


In [None]:
!ls -lh pytrends_worldwide_and_countries.csv


-rw-r--r-- 1 root root 8.8K Oct 20 02:32 pytrends_worldwide_and_countries.csv


In [None]:
from google.colab import files
files.download("pytrends_worldwide_and_countries.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>