# Build Market Movement Classifier. 

In [1]:
# Python module. 
import re, os, talib 
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import yfinance as yf
import xgboost as xgb
import shap 
import optuna 
from optuna import visualization as opt_viz 
from sklearn.model_selection import train_test_split, TimeSeriesSplit 
from sklearn.pipeline import Pipeline 
from sklearn.linear_model import ElasticNet 
from sklearn.metrics import mean_squared_error 
from feature_engine.encoding import OneHotEncoder 

# Change the current directory from (./notebook) to root directory. 
while not re.match(r".+MADS-CAP$", os.getcwd()): 
	os.chdir("..") 
	
print(f"Current directory: ({os.getcwd()})") 

# For clearing safe warnings. Not important. 
from IPython.display import clear_output

# Custom modules. 
from source.modules.processor_features import (
	compile_features_each_ticker,
	concat_eventdates, concat_eachyear, 
	add_eventflags, merge_with_ticker, 
)
from source.modules.processor_ticker import (
	get_ticker_yfinance, compute_forward_return
)
from source.modules.processor_technical import get_candlesticks 

# Custom configs. 
from source.config_py.config import (
	DIR_DATASET_CONSOLIDATED, DIR_DATASET_TICKER, 
	DIR_DATASET_SENTIMENT, DIR_DATASET_TECH_IND, DIR_MLMODEL_MLESTIM, 
	TICKER_DATE_COLLECT, TICKER_TO_COLLECT, TICKER_TO_EXCLUDE, 
	PARAM_SEED, EXPERIMENT_TRIAL 
) 

  from .autonotebook import tqdm as notebook_tqdm


Current directory: (/Users/lioneltay/Dropbox/Courses/michigan_mads/SIADS_697_/submission/MADS-CAP)


## Configurations (general). 

In [2]:
# Matplotlib setting. 
%matplotlib inline 

# Pandas DF config. 
pd.set_option("display.max_rows", 50, "display.max_columns", 100, "display.max_colwidth", 50)

# Ensure reproducibility. 
np.random.seed(PARAM_SEED) 

# List of ticker to collect data. 
ticker_to_collect = TICKER_TO_COLLECT.difference(TICKER_TO_EXCLUDE) 

# Date range. 
date_beg, date_end = TICKER_DATE_COLLECT 

# Whether to load the cache file for the fundamental data. 
load_cache = True 

# For clearing the output. Not important. 
clear_output()

## Get ticker data. 

In [3]:
# Assign (load_cache) to (False) to overwrite existing data for all tickers. 
# Otherwise, it will append unavailable data to the existing dataset. 
# Take about 30 minutes to complete the entire S&P tickers. 

filepath = os.path.join(DIR_DATASET_TICKER, "ticker_dailydata.csv") 
df_tickers = compile_features_each_ticker(
	get_ticker_yfinance, filepath, ticker_to_collect, 
	load_cache=load_cache, **dict(date_beg=date_beg, date_end=date_end) 
) 

# Preview. 
df_tickers 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL
...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN


## Create target labels. 

In [4]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Return lags. 
	returns_lags = [1, 5, 10, 21, 126, 252] 

	# For trimming outliers. 
	trim_out = 0.0001 

	# Define the rolling window and min period for computing 
	# the mean reversion. 252 == 1-year. 
	window = 252 

	# Define market movement scale. 
	volt_lo, volt_hi = 0.25, 1.0 

	# Create numerical labels. 
	df_feature_w_label = compute_forward_return(
		df_tickers.copy(), returns_lags, trim_out=trim_out, 
		window=252, volt_range=(volt_lo, volt_hi) 
	) 

	# Clear output. Not important. 
	clear_output() 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 

# Preview. 
df_feature_w_label 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307


## Merge with VIX data. 

In [6]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Get VIX features. 
	df_vix = get_ticker_yfinance("^vix", date_beg, date_end) 

	# Make a copy of the dataframe to avoid error related to pandas (SettingWarnings). 
	usecols = ["date", "open", "close"] 
	df_feature_w_label = merge_with_ticker(
		df_feature_w_label.copy(), df_vix[usecols].copy(), merge_suffix="vix"
	) 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 

# Preview. 
df_feature_w_label 

Getting ticker data from (Yahoo Finance) for (^vix).


Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252,vix_date,vix_open,vix_close
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,,1998-11-30,22.79,26.01
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,,1998-12-01,27.38,24.97
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,,1998-12-02,25.63,25.43
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,,1998-12-03,25.53,28.70
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,,1998-12-04,26.28,25.31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252,2022-02-18,26.66,27.75
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354,2022-02-22,31.80,28.81
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022,2022-02-23,28.04,31.02
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307,2022-02-24,37.50,30.32


## Merge with event date occurance. 

In [7]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Get the event date flags. 
	df_eventdates = concat_eventdates() 
	df_feature_w_label = add_eventflags(df_feature_w_label.copy(), df_eventdates) 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 
	
# Preview. 
df_feature_w_label 

Read from (economic_reported_date.csv)
Read from (firsttrdrday_ofmonth.csv)
Read from (observance_dates_ext.csv)
Read from (santa_rally.csv)
Read from (triple_witching_week.csv)


Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252,vix_date,vix_open,vix_close,jobs_opening_labor_turnover,non_farm_employment_adp_mom,non_farm_employment_mom,unemployment_claims,unemployment_rate,avg_hourly_earnings_mom,personal_dispensable_income_mom,personal_consumption_mom,ism_pmi_manufacturer,ism_pmi_services,chicago_pmi,industry_production_mom,phil_fed_manufacturer,capacity_utilisation,manufacturer_new_order_mom,manufacturer_new_order_ex_trans_mom,retail_sales_ex_auto_mom,retail_sales_mom,uom_consumer_sentiment,producer_ppi_mom,producer_ppi_ex_food_energy_mom,consumer_cpi_mom,consumer_cpi_ex_food_energy_mom,pce_ex_food_energy_mom,housing_hpi_mom,housing_hpi_cs_yoy,building_permit,housing_starts,exist_home_sales,pending_home_sales,new_home_sales,gdp_advance_us,gdp_deflator_advance_us,gdp_us,gdp_deflator,crude_oil_inventory,natural_gas_inventory,fomc_presscf,fomc_minutes,opec,opec_jmmc,firsttrdrday_ofmonth,black_friday,christmas,columbus,cyber_monday,good_friday,labor,martin_lut_king,new_year,thanksgiving,us_event_sep11,us_independence,us_memorial,us_president,us_veterans,valentine,santa_rally,tww_trdrday
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,,1998-11-30,22.79,26.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,,1998-12-01,27.38,24.97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,,1998-12-02,25.63,25.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,,1998-12-03,25.53,28.70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,,1998-12-04,26.28,25.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252,2022-02-18,26.66,27.75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354,2022-02-22,31.80,28.81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022,2022-02-23,28.04,31.02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307,2022-02-24,37.50,30.32,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


## Merge with economic data. 

In [8]:
'''
!!! WIP. TBD. 
''' 



'\n!!! WIP. TBD. \n'

## Merge with sentiment data. 

### Concat sentiment data. 

In [9]:
# Information. 
infcols = [
	"ticker", "rpna_date_utc", "timestamp_utc", 
	"news_type", "source", "position_name", 
] 
# Categorical features. 
catcols = [
	"topic", "group", "type", "sub_type", "category", 
] 
# Numerical features. 
numcols = [
	"relevance", "ess", "aes", "aev", "ens", "ens_similarity_gap", 
	"css", "nip", "peq", "bee", "bmq", "bam", "bca", "ber", "anl_chg", "mcq", 
] 

# Get RavenPack sentiment data. 
df_sentiment = concat_eachyear(
	DIR_DATASET_SENTIMENT, keep_tickers=ticker_to_collect, 
	keep_cols=infcols + catcols + numcols, yearrange=(2010,2022)
) 
df_sentiment.rename(columns={"rpna_date_utc": "date"}, inplace=True) 

# Not important. Clear mixed types warning when reading CSV into dataframe. 
clear_output() 

# Preview. 
df_sentiment 

Unnamed: 0,ticker,date,timestamp_utc,news_type,source,position_name,topic,group,type,sub_type,category,relevance,ess,aes,aev,ens,ens_similarity_gap,css,nip,peq,bee,bmq,bam,bca,ber,anl_chg,mcq
0,C,2011-01-01,2011-01-01 00:00:42.496,TABULAR-MATERIAL,B5569E,,,,,,,37.0,,78.0,208.0,,,52.0,41.0,50.0,50.0,100.0,50.0,50.0,50.0,50.0,50.0
1,C,2011-01-01,2011-01-01 05:01:43.246,FULL-ARTICLE,18A55F,,,,,,,3.0,,78.0,208.0,,,50.0,44.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
2,C,2011-01-01,2011-01-01 05:04:12.297,FULL-ARTICLE,18A55F,,,,,,,23.0,,78.0,208.0,,,50.0,40.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
3,C,2011-01-01,2011-01-01 05:04:52.345,FULL-ARTICLE,18A55F,,,,,,,4.0,,78.0,208.0,,,50.0,47.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
4,C,2011-01-02,2011-01-02 03:57:38.333,FULL-ARTICLE,B5569E,,,,,,,43.0,,78.0,208.0,,,50.0,41.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267685,GOOGL,2020-10-21 00:00:00,2020-10-21 14:21:54.693000,FULL-ARTICLE,B5569E,,,,,,,3.0,,66.0,145.0,,,52.0,25.0,50.0,50.0,100.0,50.0,50.0,50.0,50.0,50.0
267686,GOOGL,2020-10-21 00:00:00,2020-10-21 14:24:54.265000,PRESS-RELEASE,B5569E,,,,,,,4.0,,66.0,145.0,,,52.0,41.0,50.0,50.0,100.0,50.0,50.0,50.0,50.0,50.0
267687,GOOGL,2020-10-21 00:00:00,2020-10-21 14:29:41.502000,FULL-ARTICLE,B5569E,,,,,,,3.0,,66.0,145.0,,,52.0,34.0,50.0,50.0,100.0,50.0,50.0,50.0,50.0,50.0
267688,GOOGL,2020-10-21 00:00:00,2020-10-21 14:40:20.023000,FULL-ARTICLE,B5569E,,,,,,,45.0,,66.0,145.0,,,50.0,51.0,50.0,100.0,0.0,50.0,50.0,0.0,50.0,50.0


In [10]:
# Preview the news topics, types, and categories. 
df_sentiment.dropna(how="all", subset=catcols) 

Unnamed: 0,ticker,date,timestamp_utc,news_type,source,position_name,topic,group,type,sub_type,category,relevance,ess,aes,aev,ens,ens_similarity_gap,css,nip,peq,bee,bmq,bam,bca,ber,anl_chg,mcq
10,C,2011-01-03,2011-01-03 00:31:00.121,NEWS-FLASH,B5569E,,business,price-targets,price-target,upgrade,price-target-upgrade-rater,20.0,50.0,78.0,208.0,100.0,100.00000,55.0,72.0,100.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
11,C,2011-01-03,2011-01-03 00:32:10.321,NEWS-FLASH,B5569E,,business,price-targets,price-target,upgrade,price-target-upgrade-rater,20.0,50.0,78.0,208.0,100.0,100.00000,55.0,72.0,100.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
15,C,2011-01-03,2011-01-03 01:21:49.902,FULL-ARTICLE,B5569E,,business,price-targets,price-target,upgrade,price-target-upgrade-rater,20.0,50.0,78.0,208.0,75.0,0.03529,56.0,60.0,100.0,100.0,100.0,50.0,100.0,100.0,50.0,50.0
17,C,2011-01-03,2011-01-03 01:34:37.367,FULL-ARTICLE,B5569E,,business,price-targets,price-target,upgrade,price-target-upgrade-rater,20.0,50.0,78.0,208.0,75.0,0.04337,56.0,48.0,100.0,100.0,100.0,50.0,100.0,100.0,50.0,50.0
24,C,2011-01-03,2011-01-03 03:47:26.398,NEWS-FLASH,B5569E,,business,price-targets,price-target,upgrade,price-target-upgrade-rater,20.0,50.0,78.0,208.0,100.0,100.00000,55.0,64.0,100.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267575,GOOGL,2020-10-21 00:00:00,2020-10-21 06:12:12.549000,FULL-ARTICLE,B5569E,,society,legal,legal-issues,,legal-issues-defendant,100.0,22.0,68.0,142.0,75.0,0.00058,55.0,42.0,100.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
267584,GOOGL,2020-10-21 00:00:00,2020-10-21 06:32:30.934000,FULL-ARTICLE,AA6E89,,society,legal,legal-issues,,legal-issues-defendant,100.0,22.0,67.0,143.0,56.0,0.01410,55.0,41.0,100.0,50.0,50.0,50.0,50.0,0.0,50.0,50.0
267585,GOOGL,2020-10-21 00:00:00,2020-10-21 06:32:30.940000,FULL-ARTICLE,AA6E89,,society,legal,legal-issues,,legal-issues-defendant,100.0,22.0,67.0,144.0,42.0,0.00000,50.0,41.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
267631,GOOGL,2020-10-21 00:00:00,2020-10-21 10:29:31.204000,FULL-ARTICLE,B5569E,,society,legal,legal-issues,,legal-issues-defendant,100.0,22.0,66.0,145.0,100.0,100.00000,53.0,52.0,100.0,50.0,100.0,50.0,50.0,50.0,50.0,100.0


### Aggregate sentiment data. 

In [11]:
'''
!!! WIP for the aggregation part. Some features need a different aggregation method.
''' 

# Process the sentiment data before merging with the ticker data on date. 
# Sentiment data contains multiple rows of information on each date. 
# Ensure that you aggregate them first so that the dates are unique. 

groupcols = ["ticker", "date"] 

df_sentiment_agg = df_sentiment \
	.dropna(axis="index", how="all", subset=catcols) \
	.set_index(groupcols) \
	.groupby(groupcols) \
	.agg("max") \
	.reset_index(drop=False) 

# Preview. 
df_sentiment_agg 

Dropping invalid columns in DataFrameGroupBy.max is deprecated. In a future version, a TypeError will be raised. Before calling .max, select only columns which should be valid for the function.


Unnamed: 0,ticker,date,timestamp_utc,news_type,source,topic,group,type,category,relevance,ess,aes,aev,ens,ens_similarity_gap,css,nip,peq,bee,bmq,bam,bca,ber,anl_chg,mcq
0,AAPL,2010-01-04,2010-01-04 21:23:57.424,NEWS-FLASH,B5569E,business,products-services,product-release,product-release,100.0,67.0,57.0,77.0,100.0,27.42061,50.0,45.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
1,AAPL,2010-01-05,2010-01-05 16:10:05.565,NEWS-FLASH,B5569E,business,products-services,unit-acquisition,unit-acquisition-acquirer,100.0,67.0,58.0,82.0,100.0,100.00000,52.0,45.0,50.0,50.0,100.0,50.0,50.0,50.0,50.0,100.0
2,AAPL,2010-01-06,2010-01-06 22:30:28.051,FULL-ARTICLE,B5569E,business,insider-trading,insider-sell,insider-sell,100.0,40.0,56.0,81.0,100.0,33.95867,47.0,36.0,50.0,50.0,0.0,50.0,50.0,50.0,50.0,0.0
3,AAPL,2010-01-07,2010-01-07 19:59:36.534,FULL-ARTICLE,B5569E,business,assets,patent,patent-filing,100.0,64.0,56.0,82.0,100.0,97.03524,50.0,34.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
4,AAPL,2010-01-08,2010-01-08 14:28:03.016,NEWS-FLASH,B5569E,business,order-imbalances,buy-moo,mkt-open-buy-imbalance,100.0,67.0,56.0,82.0,100.0,3.00000,50.0,45.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54842,VZ,2022-04-25,2022-04-25 21:27:46.493,NEWS-FLASH,B5569E,business,stock-prices,stock,stock-loss,100.0,40.0,66.0,108.0,100.0,100.00000,39.0,76.0,0.0,50.0,0.0,50.0,0.0,50.0,50.0,0.0
54843,VZ,2022-04-26,2022-04-26 21:27:38.471,FULL-ARTICLE,1E5E35,business,stock-prices,stock,stock-loss,100.0,40.0,61.0,92.0,75.0,0.99991,50.0,43.0,0.0,100.0,50.0,50.0,50.0,0.0,50.0,100.0
54844,VZ,2022-04-27,2022-04-27 21:27:51.946,FULL-ARTICLE,1E5E35,business,stock-prices,stock,stock-loss,100.0,40.0,62.0,86.0,100.0,1.00015,39.0,43.0,0.0,50.0,0.0,50.0,0.0,0.0,50.0,0.0
54845,VZ,2022-04-28,2022-04-28 12:04:51.078,TABULAR-MATERIAL,B5569E,business,insider-trading,insider-sell,insider-sell,100.0,40.0,62.0,89.0,100.0,21.99604,47.0,39.0,50.0,50.0,0.0,50.0,50.0,50.0,50.0,0.0


### Merge with ticker data. 

In [12]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Make a copy of the dataframe to avoid error related to pandas (SettingWarnings). 
	df_feature_w_label = merge_with_ticker(
		df_feature_w_label.copy(), df_sentiment_agg.copy(), 
		merge_suffix="rp", merge_on=["ticker", "date"], relation="one_to_many"
	) 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 

# Preview. 
df_feature_w_label 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252,vix_date,vix_open,vix_close,jobs_opening_labor_turnover,non_farm_employment_adp_mom,non_farm_employment_mom,unemployment_claims,unemployment_rate,avg_hourly_earnings_mom,personal_dispensable_income_mom,personal_consumption_mom,ism_pmi_manufacturer,ism_pmi_services,chicago_pmi,industry_production_mom,phil_fed_manufacturer,capacity_utilisation,manufacturer_new_order_mom,manufacturer_new_order_ex_trans_mom,retail_sales_ex_auto_mom,retail_sales_mom,uom_consumer_sentiment,producer_ppi_mom,producer_ppi_ex_food_energy_mom,consumer_cpi_mom,consumer_cpi_ex_food_energy_mom,pce_ex_food_energy_mom,housing_hpi_mom,housing_hpi_cs_yoy,...,gdp_deflator,crude_oil_inventory,natural_gas_inventory,fomc_presscf,fomc_minutes,opec,opec_jmmc,firsttrdrday_ofmonth,black_friday,christmas,columbus,cyber_monday,good_friday,labor,martin_lut_king,new_year,thanksgiving,us_event_sep11,us_independence,us_memorial,us_president,us_veterans,valentine,santa_rally,tww_trdrday,rp_ticker,rp_date,rp_timestamp_utc,rp_news_type,rp_source,rp_topic,rp_group,rp_type,rp_category,rp_relevance,rp_ess,rp_aes,rp_aev,rp_ens,rp_ens_similarity_gap,rp_css,rp_nip,rp_peq,rp_bee,rp_bmq,rp_bam,rp_bca,rp_ber,rp_anl_chg,rp_mcq
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,,1998-11-30,22.79,26.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,,1998-12-01,27.38,24.97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,,1998-12-02,25.63,25.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,,1998-12-03,25.53,28.70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,,1998-12-04,26.28,25.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252,2022-02-18,26.66,27.75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-18,2022-02-18 21:30:38.146,NEWS-FLASH,B5569E,society,stock-prices,stock,stock-loss,100.0,78.0,73.0,230.0,100.0,100.00000,55.0,57.0,100.0,50.0,100.0,50.0,50.0,50.0,50.0,100.0
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354,2022-02-22,31.80,28.81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-22,2022-02-22 17:51:21.515,FULL-ARTICLE,B5569E,society,legal,legal-issues,legal-issues-plaintiff,100.0,44.0,73.0,225.0,100.0,100.00000,38.0,35.0,50.0,50.0,50.0,50.0,0.0,50.0,50.0,0.0
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022,2022-02-23,28.04,31.02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-23,2022-02-23 21:57:24.876,TABULAR-MATERIAL,B5569E,business,partnerships,partnership,partnership,100.0,61.0,73.0,226.0,100.0,5.97622,52.0,41.0,50.0,50.0,100.0,50.0,100.0,100.0,50.0,100.0
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307,2022-02-24,37.50,30.32,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-24,2022-02-24 16:27:07.793,PRESS-RELEASE,B5569E,business,stock-prices,stock,stock-gain,100.0,69.0,73.0,230.0,100.0,100.00000,52.0,59.0,50.0,100.0,100.0,50.0,50.0,50.0,50.0,100.0


## Get technical indicator data. 

In [13]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Load the cached technical indicator. 
	filepath_feat = os.path.join(DIR_DATASET_TECH_IND, "technical_indicator.csv") 
	df_techind = pd.read_csv(filepath_feat) 

	# Make a copy of the dataframe to avoid error related to pandas (SettingWarnings). 
	df_feature_w_label = merge_with_ticker(
		df_feature_w_label.copy(), df_techind.copy(), 
		merge_suffix="techind", merge_on=["ticker", "date"], relation="one_to_many"
	) 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 

# Preview. 
df_feature_w_label 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252,vix_date,vix_open,vix_close,jobs_opening_labor_turnover,non_farm_employment_adp_mom,non_farm_employment_mom,unemployment_claims,unemployment_rate,avg_hourly_earnings_mom,personal_dispensable_income_mom,personal_consumption_mom,ism_pmi_manufacturer,ism_pmi_services,chicago_pmi,industry_production_mom,phil_fed_manufacturer,capacity_utilisation,manufacturer_new_order_mom,manufacturer_new_order_ex_trans_mom,retail_sales_ex_auto_mom,retail_sales_mom,uom_consumer_sentiment,producer_ppi_mom,producer_ppi_ex_food_energy_mom,consumer_cpi_mom,consumer_cpi_ex_food_energy_mom,pce_ex_food_energy_mom,housing_hpi_mom,housing_hpi_cs_yoy,...,firsttrdrday_ofmonth,black_friday,christmas,columbus,cyber_monday,good_friday,labor,martin_lut_king,new_year,thanksgiving,us_event_sep11,us_independence,us_memorial,us_president,us_veterans,valentine,santa_rally,tww_trdrday,rp_ticker,rp_date,rp_timestamp_utc,rp_news_type,rp_source,rp_topic,rp_group,rp_type,rp_category,rp_relevance,rp_ess,rp_aes,rp_aev,rp_ens,rp_ens_similarity_gap,rp_css,rp_nip,rp_peq,rp_bee,rp_bmq,rp_bam,rp_bca,rp_ber,rp_anl_chg,rp_mcq,techind_ticker,techind_date,techind_macd_MACD_Hist,techind_macd_MACD,techind_macd_MACD_Signal,techind_stoch_SlowK,techind_stoch_SlowD
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,,1998-11-30,22.79,26.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,,1998-12-01,27.38,24.97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-01,,,,,
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,,1998-12-02,25.63,25.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-02,,,,,
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,,1998-12-03,25.53,28.70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-03,,,,,
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,,1998-12-04,26.28,25.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-04,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252,2022-02-18,26.66,27.75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-18,2022-02-18 21:30:38.146,NEWS-FLASH,B5569E,society,stock-prices,stock,stock-loss,100.0,78.0,73.0,230.0,100.0,100.00000,55.0,57.0,100.0,50.0,100.0,50.0,50.0,50.0,50.0,100.0,AMZN,2022-02-18,0.8078,-0.6473,-1.4551,41.2438,45.4652
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354,2022-02-22,31.80,28.81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-22,2022-02-22 17:51:21.515,FULL-ARTICLE,B5569E,society,legal,legal-issues,legal-issues-plaintiff,100.0,44.0,73.0,225.0,100.0,100.00000,38.0,35.0,50.0,50.0,50.0,50.0,0.0,50.0,50.0,0.0,AMZN,2022-02-22,0.3925,-0.9645,-1.3570,22.3386,37.3668
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022,2022-02-23,28.04,31.02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-23,2022-02-23 21:57:24.876,TABULAR-MATERIAL,B5569E,business,partnerships,partnership,partnership,100.0,61.0,73.0,226.0,100.0,5.97622,52.0,41.0,50.0,50.0,100.0,50.0,100.0,100.0,50.0,100.0,AMZN,2022-02-23,-0.2188,-1.6305,-1.4117,11.2069,24.9298
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307,2022-02-24,37.50,30.32,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,AMZN,2022-02-24,2022-02-24 16:27:07.793,PRESS-RELEASE,B5569E,business,stock-prices,stock,stock-gain,100.0,69.0,73.0,230.0,100.0,100.00000,52.0,59.0,50.0,100.0,100.0,50.0,50.0,50.0,50.0,100.0,AMZN,2022-02-24,-0.1608,-1.6127,-1.4519,24.1429,19.2295


## Get candlesticks data. 

In [14]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "df_feature_w_label.parquet") 

if load_cache and os.path.isfile(filepath): 
	df_feature_w_label = pd.read_parquet(filepath) 
else: 
	# Get candlestick data. 
	df_feature_w_label = get_candlesticks(df_feature_w_label.copy()) 

	# Cache the processed dataset. 
	df_feature_w_label.to_parquet(filepath, index=False) 

# Preview. 
df_feature_w_label 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,tscore_c2c_lag1,return_c2c_lag5,tscore_c2c_lag5,return_c2c_lag10,tscore_c2c_lag10,return_c2c_lag21,tscore_c2c_lag21,return_c2c_lag126,tscore_c2c_lag126,return_c2c_lag252,tscore_c2c_lag252,vix_date,vix_open,vix_close,jobs_opening_labor_turnover,non_farm_employment_adp_mom,non_farm_employment_mom,unemployment_claims,unemployment_rate,avg_hourly_earnings_mom,personal_dispensable_income_mom,personal_consumption_mom,ism_pmi_manufacturer,ism_pmi_services,chicago_pmi,industry_production_mom,phil_fed_manufacturer,capacity_utilisation,manufacturer_new_order_mom,manufacturer_new_order_ex_trans_mom,retail_sales_ex_auto_mom,retail_sales_mom,uom_consumer_sentiment,producer_ppi_mom,producer_ppi_ex_food_energy_mom,consumer_cpi_mom,consumer_cpi_ex_food_energy_mom,pce_ex_food_energy_mom,housing_hpi_mom,housing_hpi_cs_yoy,...,tww_trdrday,rp_ticker,rp_date,rp_timestamp_utc,rp_news_type,rp_source,rp_topic,rp_group,rp_type,rp_category,rp_relevance,rp_ess,rp_aes,rp_aev,rp_ens,rp_ens_similarity_gap,rp_css,rp_nip,rp_peq,rp_bee,rp_bmq,rp_bam,rp_bca,rp_ber,rp_anl_chg,rp_mcq,techind_ticker,techind_date,techind_macd_MACD_Hist,techind_macd_MACD,techind_macd_MACD_Signal,techind_stoch_SlowK,techind_stoch_SlowD,cdl3blackcrows,cdldarkcloudcover,cdldoji,cdldojistar,cdldragonflydoji,cdlengulfing,cdleveningdojistar,cdleveningstar,cdlhammer,cdlhangingman,cdlharami,cdlinvertedhammer,cdlmorningdojistar,cdlmorningstar,cdlrickshawman,cdlshootingstar,cdltristar
0,1998-11-30,0.26,0.27,0.24,0.24,561489600.0,0.0,0.0,AAPL,,,,,,,,,,,,,1998-11-30,22.79,26.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1998-12-01,0.24,0.27,0.24,0.26,865737600.0,0.0,0.0,AAPL,0.080043,,,,,,,,,,,,1998-12-01,27.38,24.97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-01,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1998-12-02,0.26,0.28,0.26,0.27,962483200.0,0.0,0.0,AAPL,0.037740,,,,,,,,,,,,1998-12-02,25.63,25.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-02,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1998-12-03,0.28,0.28,0.26,0.26,626046400.0,0.0,0.0,AAPL,-0.037740,,,,,,,,,,,,1998-12-03,25.53,28.70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-03,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1998-12-04,0.26,0.26,0.24,0.25,721369600.0,0.0,0.0,AAPL,-0.039221,,,,,,,,,,,,1998-12-04,26.28,25.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,,,,,,,,,,,,,,,,,,,,,,,,,,AAPL,1998-12-04,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,155.50,155.50,150.89,152.60,63604000.0,0.0,0.0,AMZN,-0.013344,0.711105,-0.000904,0.073084,-0.003298,0.545528,0.000290,0.219061,-0.000385,0.736227,-0.000168,1.243252,2022-02-18,26.66,27.75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,AMZN,2022-02-18,2022-02-18 21:30:38.146,NEWS-FLASH,B5569E,society,stock-prices,stock,stock-loss,100.0,78.0,73.0,230.0,100.0,100.00000,55.0,57.0,100.0,50.0,100.0,50.0,50.0,50.0,50.0,100.0,AMZN,2022-02-18,0.8078,-0.6473,-1.4551,41.2438,45.4652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
174255,2022-02-22,150.48,152.98,148.49,150.20,66128000.0,0.0,0.0,AMZN,-0.015852,0.841002,-0.006597,0.788386,-0.005142,0.880533,0.002401,0.822236,-0.000692,1.183027,-0.000252,1.375354,2022-02-22,31.80,28.81,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,...,0,AMZN,2022-02-22,2022-02-22 17:51:21.515,FULL-ARTICLE,B5569E,society,legal,legal-issues,legal-issues-plaintiff,100.0,44.0,73.0,225.0,100.0,100.00000,38.0,35.0,50.0,50.0,50.0,50.0,0.0,50.0,50.0,0.0,AMZN,2022-02-22,0.3925,-0.9645,-1.3570,22.3386,37.3668,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
174256,2022-02-23,151.65,151.76,144.65,144.83,64244000.0,0.0,0.0,AMZN,-0.036407,1.929873,-0.016017,1.960147,-0.011407,2.003722,0.000095,0.155895,-0.001124,1.802706,-0.000361,1.547022,2022-02-23,28.04,31.02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,AMZN,2022-02-23,2022-02-23 21:57:24.876,TABULAR-MATERIAL,B5569E,business,partnerships,partnership,partnership,100.0,61.0,73.0,226.0,100.0,5.97622,52.0,41.0,50.0,50.0,100.0,50.0,100.0,100.0,50.0,100.0,AMZN,2022-02-23,-0.2188,-1.6305,-1.4117,11.2069,24.9298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
174257,2022-02-24,139.84,151.75,139.50,151.36,100786000.0,0.0,0.0,AMZN,0.044100,2.350245,-0.008869,1.077387,-0.006478,1.113115,0.003587,1.152612,-0.000714,1.204090,-0.000039,0.997307,2022-02-24,37.50,30.32,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,AMZN,2022-02-24,2022-02-24 16:27:07.793,PRESS-RELEASE,B5569E,business,stock-prices,stock,stock-gain,100.0,69.0,73.0,230.0,100.0,100.00000,52.0,59.0,50.0,100.0,100.0,50.0,50.0,50.0,50.0,100.0,AMZN,2022-02-24,-0.1608,-1.6127,-1.4519,24.1429,19.2295,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
