In [None]:
!pip install pytrends

In [None]:
from pytrends.request import TrendReq
import pandas as pd
import logging
from datetime import datetime

from itertools import product
from tenacity import retry, stop_after_attempt, wait_exponential, after_log

# Configure logging
logging.basicConfig(datefmt='%d-%m-%Y:%H:%M:%S', filename="pytrendslog.log", filemode='w',
                    level=logging.INFO, format='%(levelname)s - %(message)s')
logger = logging.getLogger('pytrendslogger')

cats_df = pd.read_csv("/kaggle/input/categories/trendscategories_distinct.csv")


# Initialize pytrends
pytrends = TrendReq(hl='en-US', tz=360, retries=0, backoff_factor=0)

# Define the retry decorator with logging
retry_deco = retry(reraise=True, stop=stop_after_attempt(8),
                        wait=wait_exponential(multiplier=1, min=2, max=1800),
                        after=after_log(logger, logging.INFO))



  

keyword = ""
cat = cats_df['id']
start_date='2018-01-01'
end_date = '2023-10-30'
geo= ['US', "GB"]

cross_product = (pd.DataFrame(product(cat, geo), columns=['cat', 'geo'] )
                .assign(keyword=keyword,start_date=start_date, end_date=end_date))


cat_arg_df = (cross_product
              .query('geo == "US"')
              .reset_index(drop=True)
              .assign(iteration=lambda x: range(1, len(x) + 1),
                      total=lambda x: len(x)))
                      


# alternative with dic directly...

# keyword = ""
# start_date = '2018-01-01'
# end_date = '2023-10-30'
# geo = ['US', "GB"]
# cat_arg_dicts = [
#     {'keyword': keyword, 'cat': cat_id, 'start_date': start_date, 'end_date': end_date, 'geo': g}
#     for cat_id, g in product(cats_df['id'], geo)]
# 


cat_arg_dicts = cat_arg_df.to_dict(orient="records")

len(cat_arg_dicts)
cat_arg_dicts[1]





####  Enter function HERE  ########################


@retry_deco
def get_interestovertime(keyword="", cat=0, start_date='2018-01-01', end_date='2023-05-30', geo='', gprop='', iteration=0, total=0, **kwargs):
    pytrends.build_payload(kw_list=[keyword], cat=cat, timeframe=f"{start_date} {end_date}", geo=geo, gprop=gprop)
    
    iot_df = pytrends.interest_over_time()
    
    if iot_df.empty:
        pass
        # data = [[datetime.strptime(start_date, '%Y-%m-%d'), 0, keyword, cat]] # datetime.strptime(start_date_str, '%Y-%m-%d').date()
        # columns=['date', 'value', 'query', 'category_df']
        # iot_df = pd.DataFrame(data, columns=columns)  # Ensure the same structure as the expected dataframe
    else:
        iot_df = (
            iot_df.reset_index()
            .rename(columns={keyword: "value"})
            .assign(query=keyword, category_df=cat)
            #.assign(date=lambda x: x['date'].dt.date) # iot['date'] = iot['date'].dt.date REMOVE hour component 
        )

        if 'isPartial' in iot_df.columns:
            iot_df = iot_df.drop(columns='isPartial')
        
        if 'index' in iot_df.columns:
            iot_df = iot_df.drop(columns='index')

    logger.info(f'\n logger: got IOT {len(iot_df)} rows .....for seed term : "{keyword}" and category: "{cat}" ......\n progress: {iteration} of  {total}\n') 

    return iot_df


# For get_top_rising  (it either gets top and  rising or nothing at all...to keep it simple!)
# WORKS
@retry_deco
def get_top_rising(keyword="", cat=0, start_date='2018-01-01', end_date='2023-05-30', geo='', gprop='', iteration=0, total=0, **kwargs): 
    logger.info(f'\n logger: fetching top & rising for seed term : "{keyword}" and category: "{cat}"......\n') 
    
    pytrends.build_payload(kw_list=[keyword], cat=cat, timeframe=f"{start_date} {end_date}", geo=geo, gprop=gprop)
    
    top_rising_dict = pytrends.related_queries()

    topdf = top_rising_dict[keyword]["top"]
    risingdf = top_rising_dict[keyword]["rising"]

    
    if topdf is not None and risingdf is not None:
        topdf = topdf.assign(type="top")
        risingdf = risingdf.assign(type="rising")  
        top_rising_df = pd.concat([topdf, risingdf], axis=0, ignore_index=True).assign(category_df=cat) # -not needed in case of pmap /otherwise we'll need cat + keyword

    else:   # then surely rising df is also none...
        #top_rising_df = pd.DataFrame(columns=['query', 'value', 'type', 'category_df'])
        top_rising_df = pd.DataFrame()

    # else:
    #     top_rising_df = topdf.assign(type="top")
            
    logger.info(f'\n logger: got top_rising {len(top_rising_df)} rows .....for seed term : "{keyword}" and category: "{cat}" ......\n progress: {iteration} of  {total}\n') 
    
    return top_rising_df
 






import traceback

# Initialize an empty list to hold summary information
summary_info = []
results= []

for i, dictio in enumerate(cat_arg_dicts):
    # Initialize a dictionary to store the summary for this iteration
    summary = dictio.copy()
    try:
        logger.info(f"Starting iteration: {i}")
        result = get_top_rising(**dictio)
        
        # Add result info to summary
        summary['Number of Results'] = len(result)
        summary['Exception'] = ''
        summary['Stack Trace'] = ''  # Initialize Stack Trace as empty
        
        # If result is not empty, append it to results list
        if not result.empty:
            results.append(result)
        
    except Exception as e:
        # If an exception occurs, log it and store in the summary
        error_message = str(e)
        stack_trace = traceback.format_exc()  # Get the stack trace
        logger.exception(f"Exception for {dictio}: {error_message}")
        
        # Update summary with exception details and indicate 0 results
        summary['Number of Results'] = 0
        summary['Exception'] = error_message
        summary['Stack Trace'] = stack_trace  # Add the stack trace to the same row
    
    # Append the summary dictionary to the list
    summary_info.append(summary)

# Convert the list of dictionaries to a DataFrame
summary_df = pd.DataFrame(summary_info)

# If you had results, concatenate them into a DataFrame
if results:
    df_response = pd.concat(results)
    df_response.to_pickle("df_response_v3.pkl")
    df_response.to_csv("df_response_v3.csv")
else:
    # No results to save, but we still create an empty DataFrame with the appropriate columns
    df_response = pd.DataFrame(columns=['query', 'value', 'type', 'category_df'])

# Now you have `summary_df` with all the information and `df_response` with the concatenated results




In [None]:
summary_df.to_pickle("df_summary_v3.pkl")
summary_df.to_csv("df_summary_v3.csv")

In [None]:
len(df_response)

In [None]:
df_response