In [2]:
from trendspy import Trends, BatchPeriod
import logging
import pandas as pd


class TrendsWrapper:
    """
    A wrapper class for accessing Google Trends data using the TrendsPy library.
    
    Provides reusable methods for popular Google Trends features:
    - Interest over time
    - Interest by region
    - Related queries and topics
    - Real-time trending searches and news
    - Geographic analysis
    
    Usage:
        tw = TrendsWrapper(proxy={"http": "http://proxy.example.com:3128"})
        df = tw.get_interest_over_time(['python', 'javascript'])
    """
    
    def __init__(self, proxy=None):
        """
        Initialize the TrendsWrapper with optional proxy configuration.

        Args:
            proxy (dict or str, optional): Proxy configuration in requests format.
        """
        self.proxy = proxy
        self.trends = Trends(proxy=proxy)
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

    def get_interest_over_time(self, keywords, timeframe='today 12-m', geo=None, category=None):
        """
        Retrieve interest over time for specified keywords.

        Args:
            keywords (list): List of keywords to analyze.
            timeframe (str): Timeframe for the analysis (default: 'today 12-m').
            geo (str, optional): Geographic region code (default: None).
            category (str, optional): Category ID for the analysis (default: None).
        
        Returns:
            pandas.DataFrame: DataFrame containing interest over time.
        """
        logging.info(f"Fetching interest over time for keywords: {keywords}")
        return self.trends.interest_over_time(
            keywords, timeframe=timeframe, geo=geo, cat=category
        )

    def get_interest_by_region(self, keyword, geo=None, resolution='COUNTRY'):
        """
        Analyze geographic distribution of interest.

        Args:
            keyword (str): Keyword to analyze.
            geo (str, optional): Geographic region code (default: None).
            resolution (str, optional): Geographic resolution ('COUNTRY', 'REGION', 'CITY').
        
        Returns:
            pandas.DataFrame: DataFrame containing interest by region.
        """
        logging.info(f"Fetching interest by region for keyword: {keyword}")
        return self.trends.interest_by_region(
            keyword, geo=geo, resolution=resolution
        )

    def get_related_queries(self, keyword):
        """
        Retrieve related queries for a keyword.

        Args:
            keyword (str): Keyword to analyze.

        Returns:
            dict: Dictionary of related queries (rising and top).
        """
        logging.info(f"Fetching related queries for keyword: {keyword}")
        return self.trends.related_queries(keyword, 
                                           headers={'referer': 'https://www.google.com/'})

    def get_related_topics(self, keyword):
        """
        Retrieve related topics for a keyword.

        Args:
            keyword (str): Keyword to analyze.

        Returns:
            dict: Dictionary of related topics (rising and top).
        """
        logging.info(f"Fetching related topics for keyword: {keyword}")
        return self.trends.related_topics(keyword)

    def get_trending_now(self, geo='US'):
        """
        Retrieve currently trending searches.

        Args:
            geo (str): Geographic region code (default: 'US').

        Returns:
            list: List of trending topics.
        """
        logging.info(f"Fetching trending searches for geo: {geo}")
        return self.trends.trending_now(geo=geo)

    def get_trending_news(self, geo='US'):
        """
        Retrieve trending searches with associated news articles.

        Args:
            geo (str): Geographic region code (default: 'US').

        Returns:
            list: List of trending topics with associated news articles.
        """
        logging.info(f"Fetching trending searches with news for geo: {geo}")
        return self.trends.trending_now_by_rss(geo=geo)

    def get_historical_trends(self, keywords, timeframe=BatchPeriod.Past24H):
        """
        Retrieve independent historical data for multiple keywords.

        Args:
            keywords (list): List of keywords to analyze.
            timeframe (BatchPeriod): Timeframe for the analysis (default: Past24H).

        Returns:
            pandas.DataFrame: DataFrame containing historical trends.
        """
        logging.info(f"Fetching historical trends for keywords: {keywords}")
        return self.trends.trending_now_showcase_timeline(keywords, timeframe=timeframe)

    def search_categories(self, query):
        """
        Search for category IDs based on a query string.

        Args:
            query (str): Query string to search for categories.

        Returns:
            list: List of matching categories with their IDs.
        """
        logging.info(f"Searching for categories matching: {query}")
        return self.trends.categories(find=query)

    def search_locations(self, query):
        """
        Search for location codes based on a query string.

        Args:
            query (str): Query string to search for locations.

        Returns:
            list: List of matching locations with their IDs.
        """
        logging.info(f"Searching for locations matching: {query}")
        return self.trends.geo(find=query)

    def set_proxy(self, proxy):
        """
        Update the proxy configuration.

        Args:
            proxy (dict or str): Proxy configuration in requests format.
        """
        logging.info("Updating proxy configuration.")
        self.trends.set_proxy(proxy)


def parse_related_queries(keyword, trends_wrapper, output_file=None):
    """
    Fetch, parse, and save related queries for a given keyword using TrendsPy.

    Args:
        keyword (str): The search keyword for which related queries are fetched.
        trends_wrapper (TrendsWrapper): An instance of the TrendsWrapper class.
        output_file (str, optional): Path to save the resulting CSV file. Defaults to None.

    Returns:
        pandas.DataFrame: DataFrame containing the parsed related queries.
    
    Columns in DataFrame:
        - query: The related search query text.
        - value: Popularity or growth score of the related query.
        - category: Indicates whether the query is 'top' or 'rising'.
        - keyword: The original keyword used in the search.
    """
    # Fetch related queries
    related_queries = trends_wrapper.get_related_queries(keyword)
    
    # Initialize a list to store parsed data
    data = []

    # Process each category ('top' and 'rising') in the related queries
    for category, queries in related_queries.items():
        if queries is not None:  # Ensure there are queries in the category
            for _, query_data in queries.iterrows():  # Iterate over the DataFrame rows
                data.append({
                    'query': query_data['query'],  # Query text
                    'value': query_data['value'],  # Popularity or growth score
                    'category': category,  # 'top' or 'rising'
                    'keyword': keyword  # The original keyword for traceability
                })

    # Convert the collected data into a pandas DataFrame
    df_related_queries = pd.DataFrame(data)

    # Save the DataFrame to a CSV file if an output file path is provided
    if output_file:
        df_related_queries.to_csv(output_file, index=False)
        print(f"Related queries saved to {output_file}")

    # Return the DataFrame for further analysis
    return df_related_queries

In [8]:
# Initialize the TrendsWrapper
from trendspy import Trends

tr = Trends()

# Get trending searches for geo='US'
trending_now_us = tr.trending_now(geo='US')

# Print the results
print("Trending Now in the US:")
for topic in trending_now_us:
    print(topic)

# Initialize the API client
tr = Trends()
trends_tech = list(filter(lambda item: 18 in item.topics, trending_now_us))
news = tr.trending_now_news_by_ids(trends_tech[0].news_tokens, max_news=3)

for news_item in news:
    print(news_item, '\n')

Trending Now in the US:
Keyword        : linda mcmahon
Geo            : US
Volume         : 500000 (1000%)
Timeframe      : 2024-11-19 20:40:00 - now
Trend keywords : 21 keywords (linda mcmahon,linda mcmahon education,vince mcm...)
News tokens    : 27 tokens
Keyword        : dr oz
Geo            : US
Volume         : 500000 (1000%)
Timeframe      : 2024-11-19 20:50:00 - now
Trend keywords : 23 keywords (dr oz,dr. oz,dr oz trump,mehmet oz,dr oz medica...)
News tokens    : 27 tokens
Keyword        : pse outage map
Geo            : US
Volume         : 500000 (1000%)
Timeframe      : 2024-11-19 23:00:00 - now
Trend keywords : 72 keywords (pse outage map,bay,puget sound energy,power out...)
News tokens    : 143 tokens
Keyword        : dalton knecht
Geo            : US
Volume         : 200000 (1000%)
Timeframe      : 2024-11-20 04:30:00 - now
Trend keywords : 15 keywords (dalton knecht,dalton knecht stats,lakers,los an...)
News tokens    : 44 tokens
Keyword        : jay leno
Geo            :

In [6]:
from trendspy import Trends

# Initialize the API client
tr = Trends()
trends_tech = list(filter(lambda item: 18 in item.topics, trending_now_us))
news = tr.trending_now_news_by_ids(trends_tech[0].news_tokens, max_news=3)

for news_item in news:
    print(news_item, '\n')

Title   : The PlayStation Portal remote player experience to evolve with new system 
update
URL     : https://blog.playstation.com/2024/11/19/the-playstation-portal-remote-player-experience-to-evolve-with-new-system-update/
Source  : PlayStation.Blog
Picture : https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSryCVqCkH3eSK4ZBMfRHGGYV0T5BzMAtb3nZPPc3r8z-qZZbuvL29C4gJxi2w
Time    : 2024-11-19 22:00:03 

Title   : Sony Finally Turns the PlayStation Portal Into the Handheld Streamer We 
Always Wanted
URL     : https://gizmodo.com/playstation-portal-handheld-streamer-2000526956
Source  : Gizmodo
Picture : https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSUl30RWz_prVA6Jj1Aui9-b6R3fvFwo-8_NpbN7sA5eZ7rC29E4vu0eh_S5k4
Time    : 2024-11-20 15:50:30 

Title   : Sony’s new PlayStation Portal update lets you stream PS5 games from the 
cloud
URL     : https://www.theverge.com/2024/11/19/24300991/playstation-portal-cloud-streaming-update-beta-ps5-games
Source  : The Verge
Picture : https://e

### Testing unified fetch_trending_data function

In [31]:
import time 

@time_it
def fetch_trending_data():
    """
    Fetch trending data and related news articles, merge the results into a DataFrame.
    
    This function:
    1. Retrieves trending topics using the `trending_now` method.
    2. Limits the size of the resulting DataFrame to 100 entries.
    3. Fetches news articles for each trending topic using the `trending_now_news_by_ids` method.
    4. Combines the news data with the trending topics.
    
    Returns:
        pandas.DataFrame: DataFrame containing the merged trending data, with columns:
            - Keyword: The trending keyword.
            - Volume: The search volume for the keyword.
            - Timeframe: The time range during which the keyword has been trending.
            - Trend keywords: The first 10 related keywords.
            - News articles: Titles, URLs, and other details of related news articles.
    """
    try:
        # Fetch trending topics
        trending_now_data = tr.trending_now(geo='US')

        # Limit the size of the trending topics to 100
        trending_now_data = trending_now_data[:3]

        # Log the raw data structure for debugging
        # logging.info(f"Raw trending data: {trending_now_data}")

        # Initialize a list to store data
        data = []

        # Process each TrendKeyword object
        for trend in trending_now_data:
            # Extract basic information about the trend
            keyword = trend.keyword
            volume = trend.volume
            timeframe = f"{trend._convert_to_datetime(trend.started_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S')} - " \
                        f"{trend._convert_to_datetime(trend.ended_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S') if trend.is_trend_finished else 'now'}"
            trend_keywords = ', '.join(trend.trend_keywords[:10]) if trend.trend_keywords else 'No related keywords'

            # Fetch news articles related to the trend using `trending_now_news_by_ids`
            news_articles_data = tr.trending_now_news_by_ids(trend.news_tokens, max_news=5)
            news_articles = []
            for article in news_articles_data:
                news_articles.append(f"{article.title} ({article.source}) - {article.url}")
                time.sleep(0.5)

            # Combine all news articles into a single string
            news_articles_str = '; '.join(news_articles) if news_articles else 'No articles'

            # Append the extracted data to the list
            data.append({
                'Keyword': keyword,
                'Volume': volume,
                'Timeframe': timeframe,
                'Trend keywords': trend_keywords,
                'News articles': news_articles_str
            })

        # Convert the list of data into a pandas DataFrame
        trending_df = pd.DataFrame(data)

        return trending_df

    except Exception as e:
        # Log the detailed error for debugging
        logging.error("An error occurred while fetching trending data.", exc_info=True)
        return pd.DataFrame()  # Return an empty DataFrame on error


In [32]:
import time
from functools import wraps

def time_it(func):
    """
    A decorator that measures and prints the time taken by a function to complete.
    
    Args:
        func (callable): The function to be decorated.
    
    Returns:
        callable: The wrapped function with timing functionality.
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()  # Record the start time
        result = func(*args, **kwargs)  # Call the wrapped function
        end_time = time.time()  # Record the end time
        duration = end_time - start_time  # Calculate the duration
        print(f"Function '{func.__name__}' completed in {duration:.2f} seconds.")
        return result  # Return the result of the function
    return wrapper

In [33]:
#execution time for 3 results
data = fetch_trending_data()

Function 'fetch_trending_data' completed in 7.88 seconds.


In [34]:
data

Unnamed: 0,Keyword,Volume,Timeframe,Trend keywords,News articles
0,linda mcmahon,500000,2024-11-19 20:40:00 - now,"linda mcmahon, linda mcmahon education, vince ...",Trump names Linda McMahon as his pick for Educ...
1,dr oz,500000,2024-11-19 20:50:00 - now,"dr oz, dr. oz, dr oz trump, mehmet oz, dr oz m...",Fetterman would be willing to confirm Dr. Oz —...
2,pse outage map,500000,2024-11-19 23:00:00 - now,"pse outage map, bay, puget sound energy, power...",A new threat arrives from a deadly bomb cyclon...


In [36]:
data['News articles'].loc[0]

"Trump names Linda McMahon as his pick for Education secretary (CNN) - https://www.cnn.com/2024/11/19/politics/linda-mcmahon-education-secretary-trump/index.html; Trump’s Latest Cabinet Pick Is Also Mired in a Sexual Abuse Scandal (The New Republic) - https://newrepublic.com/post/188630/linda-mcmahon-trump-cabinet-sexual-abuse-scandal; Former W.W.E. Boss Tapped to Run Education, and Trans Rights on Capitol Hill (The New York Times) - https://www.nytimes.com/2024/11/20/podcasts/trump-education-trans-rights-capitol-hill.html; Linda McMahon made a fortune with WWE. Wrestling scandals now shadow her \nrise. (The Washington Post) - https://www.washingtonpost.com/politics/2024/11/19/linda-mcmahon-trump-administration-wwe-allegations/; Betsy DeVos praises Linda McMahon as 'great appointment' to lead Trump's \nEducation Department (Fox News) - https://www.foxnews.com/video/6364953486112"

In [13]:
data = fetch_trending_data()

2024-11-20 17:32:18,990 - INFO - Raw trending data: [TrendKeyword(keyword='linda mcmahon', geo='US', volume=500000, started_timestamp=[1732048800], ended_timestamp=None), TrendKeyword(keyword='dr oz', geo='US', volume=500000, started_timestamp=[1732049400], ended_timestamp=None), TrendKeyword(keyword='pse outage map', geo='US', volume=500000, started_timestamp=[1732057200], ended_timestamp=None), TrendKeyword(keyword='dalton knecht', geo='US', volume=200000, started_timestamp=[1732077000], ended_timestamp=None), TrendKeyword(keyword='jay leno', geo='US', volume=50000, started_timestamp=[1732047000], ended_timestamp=None), TrendKeyword(keyword='méxico - honduras', geo='US', volume=500000, started_timestamp=[1732067400], ended_timestamp=None), TrendKeyword(keyword='liam payne funeral', geo='US', volume=20000, started_timestamp=[1732092000], ended_timestamp=None), TrendKeyword(keyword='brazil vs uruguay', geo='US', volume=200000, started_timestamp=[1732050600], ended_timestamp=None), Tren

In [14]:
print(data)

Empty DataFrame
Columns: []
Index: []
