In [1]:
# Import appropriate modules
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [2]:
# Read the google_trends_df.csv file from the Resources folder into a Pandas DataFrame
google_trends_df = pd.read_csv(Path("./Resources/google_trends_df.csv")
)
# Review the DataFrame
google_trends_df


Unnamed: 0.1,Unnamed: 0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,...,Debt_ceiling,retail_spending,Consumer_spending,Consumer,Earnings,Economic_contraction,Depression,Shock,Monetary_policy,VIX
0,Category: Finance,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,Month,,,,,,,,,,...,,,,,,,,,,
3,2004-01,0.0,0.0,0.0,0.0,31.0,50.0,0,34.0,0.0,...,0,0.0,0.0,100.0,45.0,0.0,37.0,34.0,0.0,0.0
4,2004-02,0.0,0.0,0.0,0.0,61.0,41.0,0,51.0,0.0,...,0,0.0,0.0,78.0,51.0,0.0,0.0,0.0,74.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,2021-06,7.0,0.0,0.0,0.0,15.0,37.0,1,16.0,7.0,...,0,0.0,5.0,43.0,33.0,0.0,17.0,21.0,5.0,18.0
213,2021-07,0.0,0.0,0.0,5.0,14.0,52.0,1,18.0,6.0,...,1,0.0,5.0,46.0,47.0,0.0,27.0,31.0,10.0,17.0
214,2021-08,3.0,0.0,0.0,10.0,15.0,38.0,13,15.0,5.0,...,1,0.0,0.0,45.0,48.0,0.0,17.0,24.0,14.0,17.0
215,2021-09,0.0,0.0,0.0,5.0,23.0,41.0,86,31.0,7.0,...,12,0.0,5.0,45.0,65.0,0.0,28.0,20.0,14.0,19.0


In [3]:
# Review the data types associated with the columns
google_trends_df.dtypes

Unnamed: 0               object
Leading_Indicators      float64
PMI                     float64
CCI                     float64
Jobless_Claims          float64
GDP                     float64
war                     float64
Impeachment              object
Catastrophe             float64
Natural_disaster        float64
Inflation               float64
unemployment            float64
Market_crash             object
Covid                    object
virus                    object
Pandemic                 object
vaccination             float64
Delta                   float64
Covid_mutation          float64
FED                     float64
Tapering                float64
Liquidity               float64
Banking                 float64
Correction_of_market    float64
coup                    float64
Debt_ceiling             object
retail_spending         float64
Consumer_spending       float64
Consumer                float64
Earnings                float64
Economic_contraction    float64
Depressi

In [5]:
google_trends_df = google_trends_df.drop(["Unnamed: 0"],axis=1)
google_trends_df

Unnamed: 0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,Inflation,...,Debt_ceiling,retail_spending,Consumer_spending,Consumer,Earnings,Economic_contraction,Depression,Shock,Monetary_policy,VIX
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,0.0,0.0,0.0,0.0,31.0,50.0,0,34.0,0.0,84.0,...,0,0.0,0.0,100.0,45.0,0.0,37.0,34.0,0.0,0.0
4,0.0,0.0,0.0,0.0,61.0,41.0,0,51.0,0.0,55.0,...,0,0.0,0.0,78.0,51.0,0.0,0.0,0.0,74.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,7.0,0.0,0.0,0.0,15.0,37.0,1,16.0,7.0,80.0,...,0,0.0,5.0,43.0,33.0,0.0,17.0,21.0,5.0,18.0
213,0.0,0.0,0.0,5.0,14.0,52.0,1,18.0,6.0,60.0,...,1,0.0,5.0,46.0,47.0,0.0,27.0,31.0,10.0,17.0
214,3.0,0.0,0.0,10.0,15.0,38.0,13,15.0,5.0,53.0,...,1,0.0,0.0,45.0,48.0,0.0,17.0,24.0,14.0,17.0
215,0.0,0.0,0.0,5.0,23.0,41.0,86,31.0,7.0,48.0,...,12,0.0,5.0,45.0,65.0,0.0,28.0,20.0,14.0,19.0


In [6]:
# Create a list of categorical variables 
categorical_variables = list(google_trends_df.dtypes[google_trends_df.dtypes=="object"].index)
# Display the categorical variables list
print(categorical_variables)


['Impeachment', 'Market_crash', 'Covid', 'virus', 'Pandemic', 'Debt_ceiling']


In [7]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)


In [12]:
# Encode the categorcal variables using OneHotEncoder
encoded_data =enc.fit_transform(google_trends_df[categorical_variables])
encoded_data

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [14]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(encoded_data,columns=enc.get_feature_names(categorical_variables))
encoded_df

Unnamed: 0,Impeachment_0,Impeachment_1,Impeachment_100,Impeachment_13,Impeachment_18,Impeachment_2,Impeachment_29,Impeachment_3,Impeachment_37,Impeachment_4,...,Debt_ceiling_2,Debt_ceiling_3,Debt_ceiling_38,Debt_ceiling_4,Debt_ceiling_5,Debt_ceiling_52,Debt_ceiling_6,Debt_ceiling_73,Debt_ceiling_<1,Debt_ceiling_nan
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
213,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
214,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
encoded_df = pd.concat([encoded_df, google_trends_df.drop(columns=categorical_variables)], axis=1)
encoded_df


Unnamed: 0,Impeachment_0,Impeachment_1,Impeachment_100,Impeachment_13,Impeachment_18,Impeachment_2,Impeachment_29,Impeachment_3,Impeachment_37,Impeachment_4,...,coup,retail_spending,Consumer_spending,Consumer,Earnings,Economic_contraction,Depression,Shock,Monetary_policy,VIX
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,100.0,45.0,0.0,37.0,34.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,78.0,51.0,0.0,0.0,0.0,74.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,5.0,43.0,33.0,0.0,17.0,21.0,5.0,18.0
213,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.0,0.0,5.0,46.0,47.0,0.0,27.0,31.0,10.0,17.0
214,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,13.0,0.0,0.0,45.0,48.0,0.0,17.0,24.0,14.0,17.0
215,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14.0,0.0,5.0,45.0,65.0,0.0,28.0,20.0,14.0,19.0
