In [2]:
import time
import pandas as pd
from pytrends.request import TrendReq
from pytrends.exceptions import TooManyRequestsError
from requests.exceptions import RequestException

In [17]:
# Function to fetch interest by region
def fetch_interest_by_region(keyword, geo='europe', timeframe='2021-06-01 2024-06-01'):  # Specific date range
    pytrends = TrendReq(hl='en-GB', tz=0)
    pytrends.build_payload([keyword], geo=geo, timeframe=timeframe)
    try:
        interest_by_region = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=True)
        return interest_by_region
    except RequestException as e:
        print(f"Request failed: {e}. Waiting for a while before retrying...")
        time.sleep(60)  # Sleep for 60 seconds before retrying
        return fetch_interest_by_region(keyword, geo, timeframe)

# Function to fetch weekly interest
def fetch_weekly_interest(keyword, geo, timeframe='2021-06-01 2024-06-01', retries=5):  # Specific date range
    pytrends = TrendReq(hl='en-GB', tz=0)
    for attempt in range(retries):
        try:
            pytrends.build_payload([keyword], geo=geo, timeframe=timeframe)
            interest_over_time = pytrends.interest_over_time()
            if 'isPartial' in interest_over_time.columns:
                interest_over_time = interest_over_time.drop(columns=['isPartial'])
            return interest_over_time
        except RequestException as e:
            print(f"Request failed (attempt {attempt + 1}/{retries}): {e}. Retrying after a delay...")
            time.sleep(60)  # Sleep for 60 seconds before retrying
    print(f"Failed to fetch data for {geo} after {retries} attempts.")
    return pd.DataFrame()  # Return an empty DataFrame if all retries fail

# Function to fetch the list of European countries
def fetch_european_countries():
    return ['AL', 'AD', 'AM', 'AT', 'AZ', 'BY', 'BE', 'BA', 'BG', 'HR', 'CY', 'CZ', 'DK', 'EE', 'FO', 'FI', 'FR', 'GE', 'DE', 'GI', 'GR', 'GL', 'HU', 'IS', 'IE', 'IT', 'KZ', 'LV', 'LI', 'LT', 'LU', 'MK', 'MT', 'MD', 'MC', 'ME', 'NL', 'NO', 'PL', 'PT', 'RO', 'RU', 'RS', 'SK', 'SI', 'ES', 'SE', 'CH', 'TR', 'UA', 'GB', 'VA']

# Set the keyword and timeframe
keyword = "iPhone"
timeframe = "2021-06-01 2024-06-01"  # Specific date range

# Fetch the list of European countries
european_countries = fetch_european_countries()

# Dictionary to store data for each country
country_data_dict = {}

# Fetch interest data for each country in Europe
for country_code in european_countries:
    while True:
        try:
            country_data = fetch_weekly_interest(keyword, geo=country_code, timeframe=timeframe)
            if not country_data.empty:
                country_data_dict[country_code] = country_data
                print(f"Successfully fetched data for {country_code}")
            else:
                print(f"No data for {country_code}")
            break
        except TooManyRequestsError:
            print("Too many requests. Retrying after a delay...")
            time.sleep(60)  # Sleep for 60 seconds before retrying


Successfully fetched data for AL
Too many requests. Retrying after a delay...
Successfully fetched data for AD
Successfully fetched data for AM
Successfully fetched data for AT
Successfully fetched data for AZ
Successfully fetched data for BY
Successfully fetched data for BE
Successfully fetched data for BA
Successfully fetched data for BG
Successfully fetched data for HR
Successfully fetched data for CY
Successfully fetched data for CZ
Successfully fetched data for DK
Too many requests. Retrying after a delay...
Successfully fetched data for EE
Successfully fetched data for FO
Successfully fetched data for FI
Successfully fetched data for FR
Successfully fetched data for GE
Too many requests. Retrying after a delay...
Successfully fetched data for DE
Successfully fetched data for GI
Successfully fetched data for GR
Successfully fetched data for GL
Successfully fetched data for HU
Too many requests. Retrying after a delay...
Too many requests. Retrying after a delay...
Successfully fet

In [18]:
# Convert country_data_dict into a DataFrame where countries are columns
combined_data = pd.concat(country_data_dict.values(), axis=1, keys=country_data_dict.keys())

# Reset the index to make 'date' a column
combined_data.reset_index(inplace=True)

# Rename the columns to keep only the country codes
combined_data.columns = [col[0] for col in combined_data.columns]

# Display the resulting DataFrame
combined_data

Unnamed: 0,date,AL,AD,AM,AT,AZ,BY,BE,BA,BG,...,RU,RS,SK,SI,ES,SE,CH,TR,UA,GB
0,2021-05-30,42,50,33,37,37,47,37,45,38,...,46,41,39,35,33,48,35,42,41,53
1,2021-06-06,47,40,40,42,41,46,34,38,32,...,47,41,40,36,34,53,34,42,48,52
2,2021-06-13,39,29,43,37,42,46,35,42,35,...,50,40,43,35,34,52,34,46,44,50
3,2021-06-20,45,45,39,41,37,43,37,40,33,...,46,41,42,40,37,52,34,45,44,51
4,2021-06-27,46,34,36,43,38,48,42,44,34,...,47,41,43,41,38,55,36,43,45,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,2024-04-28,46,28,46,47,43,52,46,61,44,...,39,54,49,46,44,47,37,44,49,51
153,2024-05-05,39,43,41,46,44,57,44,67,42,...,39,61,53,45,44,47,40,46,51,57
154,2024-05-12,39,45,44,45,46,57,43,57,38,...,37,59,50,45,45,47,38,46,48,51
155,2024-05-19,40,26,43,47,45,54,47,59,41,...,38,58,53,47,44,52,42,46,49,51


In [19]:
combined_data.describe()

Unnamed: 0,date,AL,AD,AM,AT,AZ,BY,BE,BA,BG,...,RU,RS,SK,SI,ES,SE,CH,TR,UA,GB
count,157,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0,...,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0
mean,2022-11-27 00:00:00,50.847134,56.401274,50.152866,49.146497,52.159236,60.808917,49.687898,59.961783,46.388535,...,50.025478,61.407643,58.394904,49.751592,50.22293,58.961783,45.242038,53.88535,52.299363,57.942675
min,2021-05-30 00:00:00,38.0,25.0,33.0,37.0,37.0,43.0,34.0,38.0,32.0,...,36.0,40.0,39.0,35.0,33.0,46.0,34.0,40.0,35.0,48.0
25%,2022-02-27 00:00:00,45.0,45.0,44.0,43.0,47.0,53.0,44.0,54.0,41.0,...,43.0,55.0,51.0,45.0,45.0,53.0,40.0,48.0,47.0,53.0
50%,2022-11-27 00:00:00,49.0,54.0,48.0,47.0,51.0,59.0,47.0,60.0,44.0,...,47.0,61.0,57.0,48.0,48.0,57.0,43.0,52.0,51.0,55.0
75%,2023-08-27 00:00:00,54.0,68.0,54.0,51.0,55.0,67.0,52.0,65.0,49.0,...,56.0,66.0,62.0,52.0,53.0,61.0,47.0,58.0,54.0,61.0
max,2024-05-26 00:00:00,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
std,,9.859374,16.328306,9.529418,9.40391,8.920956,10.140762,9.553395,9.374601,9.277523,...,10.83498,9.74808,10.802871,9.065047,9.280251,9.153874,9.727967,8.309686,9.385248,8.702521


In [20]:
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 51 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    157 non-null    datetime64[ns]
 1   AL      157 non-null    int32         
 2   AD      157 non-null    int32         
 3   AM      157 non-null    int32         
 4   AT      157 non-null    int32         
 5   AZ      157 non-null    int32         
 6   BY      157 non-null    int32         
 7   BE      157 non-null    int32         
 8   BA      157 non-null    int32         
 9   BG      157 non-null    int32         
 10  HR      157 non-null    int32         
 11  CY      157 non-null    int32         
 12  CZ      157 non-null    int32         
 13  DK      157 non-null    int32         
 14  EE      157 non-null    int32         
 15  FO      157 non-null    int32         
 16  FI      157 non-null    int32         
 17  FR      157 non-null    int32         
 18  GE      15

In [21]:
combined_data.to_csv('combined_data.csv', index=False)