In [17]:
import pandas as pd
from pytrends.request import TrendReq


#function to get monthly Google Trends data since 2003
def get_monthly_trends(kw_list):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload(kw_list, cat=0, timeframe='2003-01-01 2024-01-01', geo='', gprop='')
    trends = pytrends.interest_over_time()
    return trends


In [19]:

kw = ["Inflation","GDP","CPI","Recession","Economic forecast","Unemployment","Interest rate","Stock market","Federal Reserve","Real estate prices","Job openings","Wage growth","FED","Bank of England","Economic stimulus"] 
data = get_monthly_trends(kw)
print(data)


In [3]:
from statsmodels.tsa.stattools import adfuller
data.drop('isPartial', axis=1, inplace=True)
# Include enough lags to detect non-stationarity
for i in data.columns:
    # Perform the Augmented Dickey-Fuller test
    adf_result = adfuller(data[i], maxlag=60)
    p_value = adf_result[1]
    
    # Check the p-value to determine stationarity
    if p_value > 0.05:
        # If non-stationary, apply differencing
        data[i] = data[i].diff()
        print(i, "is not stationary and has been differenced.")
    else:
        # If stationary, leave as is
        print(i, "is stationary.")
        
# Drop the NaN values generated by the differencing process
data.dropna(inplace=True)



Inflation rate is not stationary and has been differenced.
GDP growth is not stationary and has been differenced.
Consumer price index is stationary.
Recession is stationary.
Economic forecast is stationary.


In [53]:
# Drop columns that are presumed non-seasonal
non_seasonal_columns = ["Consumer price index", "Recession","Economic forecast"]
data_s = data.drop(non_seasonal_columns, axis=1)

# Apply seasonal differencing to highlight seasonality in the data
data_s = data_s.diff(52)  # Assuming 52 weeks in a year for weekly data

# Drop NaN values introduced by seasonal differencing
data_s.dropna(inplace=True)

# Isolate the non-seasonal columns in a separate DataFrame
data_ns = data[non_seasonal_columns].copy()

# Align the index of non-seasonal DataFrame with the seasonally differenced DataFrame
# This is necessary because differencing has reduced the length of USGDP_df_s
data_ns = data_ns.loc[data_s.index]

# Merge the non-seasonal and seasonally differenced data
data_merged = data_ns.merge(data_s, left_index=True, right_index=True)

In [4]:
pytrends = TrendReq(hl='en-US', tz=360)

In [54]:
# Data before shifting
data.tail()

Unnamed: 0_level_0,Inflation rate,GDP growth,Consumer price index,Recession,Economic forecast,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-21,-15.0,0.0,2,12,0,False
2024-01-28,0.0,0.0,2,13,0,False
2024-02-04,-2.0,0.0,2,11,0,False
2024-02-11,1.0,0.0,2,19,0,False
2024-02-18,14.0,0.0,0,17,0,True


In [55]:
# Data after shifting
data.index = data.index + datetime.timedelta(7,0,0)
data.tail()

Unnamed: 0_level_0,Inflation rate,GDP growth,Consumer price index,Recession,Economic forecast,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-28,-15.0,0.0,2,12,0,False
2024-02-04,0.0,0.0,2,13,0,False
2024-02-11,-2.0,0.0,2,11,0,False
2024-02-18,1.0,0.0,2,19,0,False
2024-02-25,14.0,0.0,0,17,0,True
