In [15]:
from datetime import datetime, timedelta
from pytrends.request import TrendReq
import pandas as pd
import os
filename = 'trend_output.txt'

In [16]:
# The maximum for a timeframe for which we get daily data is 270.
# Therefore we could go back 269 days. However, since there might
# be issues when rescaling, e.g. zero entries, we should have an
# overlap that does not consist of only one period. Therefore,
# I limit the step size to 250. This leaves 19 periods for overlap.

# maxstep = 269
# overlap = 3
maxstep = 20
overlap = 5

step    = maxstep - overlap + 1
kw_list = ["BTC USD","buy bitcoin"]
start_date = datetime(2015, 2, 1).date()


In [22]:
#First I want to test that I get the same results as: https://trends.google.com/trends/explore?date=2018-01-04%202018-09-30&geo=US&q=buy%20bitcoin
start_date = datetime(2018,1,4).date()
end_date = datetime(2018,9,30).date()
# Create new timeframe for which we download data
timeframe = start_date.strftime('%Y-%m-%d')+' '+end_date.strftime('%Y-%m-%d')
pytrend.build_payload(kw_list=kw_list, timeframe = timeframe)
interest_over_time_df = pytrend.interest_over_time()
# Save dataset
interest_over_time_df.to_csv(filename)

            BTC USD  buy bitcoin  isPartial
date                                       
2018-01-04       68           34      False
2018-01-05       71           30      False
2018-01-06       69           26      False
2018-01-07       61           29      False
2018-01-08       60           26      False
2018-01-09       62           24      False
2018-01-10       59           23      False
2018-01-11       61           23      False
2018-01-12       53           20      False
2018-01-13       49           20      False
2018-01-14       41           18      False
2018-01-15       47           17      False
2018-01-16       71           20      False
2018-01-17      100           27      False
2018-01-18       81           20      False
2018-01-19       54           15      False
2018-01-20       50           15      False
2018-01-21       42           13      False
2018-01-22       48           13      False
2018-01-23       49           14      False
2018-01-24       43           12

In [17]:
## FIRST RUN ##

# Login to Google. Only need to run this once, the rest of requests will use the same session.
pytrend = TrendReq()

# Run the first time (if we want to start from today, otherwise we need to ask for an end_date as well
today = datetime.today().date()
old_date = today

# Go back in time
new_date = today - timedelta(days=step)

# Create new timeframe for which we download data
timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d')
pytrend.build_payload(kw_list=kw_list, timeframe = timeframe)
interest_over_time_df = pytrend.interest_over_time()


In [18]:
print(interest_over_time_df)

            BTC USD  buy bitcoin  isPartial
date                                       
2018-11-01       71           31      False
2018-11-02       65           30      False
2018-11-03       55           34      False
2018-11-04       62           25      False
2018-11-05       66           31      False
2018-11-06       68           25      False
2018-11-07       61           24      False
2018-11-08       73           33      False
2018-11-09       59           33      False
2018-11-10       60           26      False
2018-11-11       63           22      False
2018-11-12       66           30      False
2018-11-13       61           21      False
2018-11-14      100           30      False


In [19]:
while new_date>start_date:
    
    ### Save the new date from the previous iteration.
    # Overlap == 1 would mean that we start where we
    # stopped on the iteration before, which gives us
    # indeed overlap == 1.
    old_date = new_date + timedelta(days=overlap-1)
    
    ### Update the new date to take a step into the past
    # Since the timeframe that we can apply for daily data
    # is limited, we use step = maxstep - overlap instead of
    # maxstep.
    new_date = new_date - timedelta(days=step)
    # If we went past our start_date, use it instead
    if new_date < start_date:
        new_date = start_date
        
    # New timeframe
    timeframe = new_date.strftime('%Y-%m-%d')+' '+old_date.strftime('%Y-%m-%d')
    print(timeframe)

    # Download data
    pytrend.build_payload(kw_list=kw_list, timeframe = timeframe)
    temp_df = pytrend.interest_over_time()
    if (temp_df.empty):
        raise ValueError('Google sent back an empty dataframe. Possibly there were no searches at all during the this period! Set start_date to a later date.')
    # Renormalize the dataset and drop last line
    for kw in kw_list:
        beg = new_date
        end = old_date - timedelta(days=1)
        
        # Since we might encounter zeros, we loop over the
        # overlap until we find a non-zero element
        for t in range(1,overlap+1):
            #print('t = ',t)
            #print(temp_df[kw].iloc[-t])
            if temp_df[kw].iloc[-t] != 0:
                scaling = interest_over_time_df[kw].iloc[t-1]/temp_df[kw].iloc[-t]
                #print('Found non-zero overlap!')
                break
            elif t == overlap:
                print('Did not find non-zero overlap, set scaling to zero! Increase Overlap!')
                scaling = 0
        # Apply scaling
        temp_df.loc[beg:end,kw]=temp_df.loc[beg:end,kw]*scaling
    interest_over_time_df = pd.concat([temp_df[:-overlap],interest_over_time_df])


2018-10-16 2018-11-05
               BTC USD  buy bitcoin  isPartial
date                                          
2018-10-16  104.411765         38.0      False
2018-10-17   90.838235         32.0      False
2018-10-18   86.661765         28.0      False
2018-10-19   87.705882         36.0      False
2018-10-20   83.529412         31.0      False
2018-10-21   87.705882         32.0      False
2018-10-22   78.308824         25.0      False
2018-10-23   84.573529         28.0      False
2018-10-24   88.750000         32.0      False
2018-10-25   72.044118         27.0      False
2018-10-26   77.264706         27.0      False
2018-10-27   72.044118         34.0      False
2018-10-28   72.044118         27.0      False
2018-10-29   84.573529         30.0      False
2018-10-30   75.176471         24.0      False
2018-10-31   82.485294         34.0      False
2018-11-01   71.000000         31.0      False
2018-11-02   65.000000         30.0      False
2018-11-03   55.000000         34.0   