In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import GridSearchCV
import data_engineering as de
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
import simulation

## Data Wrangling

1. Load the Data
2. Create related features (e.g. article counts, open price rolling averages)
3. split by stock

In [2]:
df = pd.read_csv('../data/complete_next_open.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69433 entries, 0 to 69432
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Publishing Time  63703 non-null  object 
 1   Market Date      69433 non-null  object 
 2   Ticker           69433 non-null  object 
 3   Sector           69433 non-null  object 
 4   finvader_neg     63703 non-null  float64
 5   finvader_neu     63703 non-null  float64
 6   finvader_pos     63703 non-null  float64
 7   finvader_tot     63703 non-null  float64
 8   Source           63703 non-null  object 
 9   Headline         63703 non-null  object 
 10  Text             63703 non-null  object 
 11  URL              63703 non-null  object 
 12  Open             69433 non-null  float64
 13  High             69433 non-null  float64
 14  Low              69433 non-null  float64
 15  Close            69433 non-null  float64
 16  Volume           69433 non-null  int64  
 17  Dividends   

In [3]:
def overall_sentiment(x:int):
    threshold = .1
    if x > threshold:
        return 'pos'
    elif x < -threshold:
        return 'neg'
    else:
        return 'neu'

In [4]:
df['overall_sen'] = df['finvader_tot'].apply(overall_sentiment)
df['overall_sen'] = df['overall_sen'].astype('category')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69433 entries, 0 to 69432
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   Publishing Time  63703 non-null  object  
 1   Market Date      69433 non-null  object  
 2   Ticker           69433 non-null  object  
 3   Sector           69433 non-null  object  
 4   finvader_neg     63703 non-null  float64 
 5   finvader_neu     63703 non-null  float64 
 6   finvader_pos     63703 non-null  float64 
 7   finvader_tot     63703 non-null  float64 
 8   Source           63703 non-null  object  
 9   Headline         63703 non-null  object  
 10  Text             63703 non-null  object  
 11  URL              63703 non-null  object  
 12  Open             69433 non-null  float64 
 13  High             69433 non-null  float64 
 14  Low              69433 non-null  float64 
 15  Close            69433 non-null  float64 
 16  Volume           69433 non-null  int64  

In [5]:
counts = df.groupby(['Market Date', 'Ticker'])['overall_sen'].value_counts()
counts.loc['2019-03-15', 'AAPL']['pos']


0

In [6]:
features = ['finvader_neg',
            'finvader_neu',
            'finvader_pos',
            'finvader_tot',
            'Open',
            'High',
            'Low',
            'Close',
            'Volume',
            'Dividends',
            'Stock Splits']
df_mean = df.groupby(['Market Date', 'Ticker'])[features].mean().reset_index()
df_mean

Unnamed: 0,Market Date,Ticker,finvader_neg,finvader_neu,finvader_pos,finvader_tot,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2019-03-01,AAPL,,,,,41.887973,42.097075,41.553888,42.053814,103544800.0,0.0,0.0
1,2019-03-01,ABBV,,,,,62.740368,63.589807,62.354977,62.999920,8567900.0,0.0,0.0
2,2019-03-01,AMZN,,,,,82.756500,83.712997,82.550003,83.586502,99498000.0,0.0,0.0
3,2019-03-01,BAC,,,,,25.918994,26.201778,25.812949,25.901320,45771500.0,0.0,0.0
4,2019-03-01,GOOGL,,,,,56.549999,57.500000,56.549999,57.425999,34086000.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19180,2024-03-28,MSFT,,,,,420.202052,421.110417,418.365369,419.962494,21871200.0,0.0,0.0
19181,2024-03-28,NVDA,,,,,900.000000,913.000000,891.929993,903.559998,43521200.0,0.0,0.0
19182,2024-03-28,UNH,,,,,495.000000,495.869995,489.299988,494.700012,3820000.0,0.0,0.0
19183,2024-03-28,V,,,,,277.975547,279.283124,276.608082,278.564453,5844400.0,0.0,0.0


In [7]:
labels = {'pos_art_count':'pos', 'neg_art_count':'neg', 'neu_art_count':'neu'}
for l in labels:
    df_mean[l] = df_mean.apply(lambda x: counts.loc[x['Market Date'], x['Ticker']][labels[l]], axis = 1)
df_mean.loc[df_mean['finvader_tot'].isna(), 'neu_art_count'] = 0
df_mean['total_articles'] = df_mean['pos_art_count'] + df_mean['neg_art_count'] + df_mean['neu_art_count']


In [8]:
df_mean['Market Date'] = pd.to_datetime(df_mean['Market Date'])
df_mean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19185 entries, 0 to 19184
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Market Date     19185 non-null  datetime64[ns]
 1   Ticker          19185 non-null  object        
 2   finvader_neg    13455 non-null  float64       
 3   finvader_neu    13455 non-null  float64       
 4   finvader_pos    13455 non-null  float64       
 5   finvader_tot    13455 non-null  float64       
 6   Open            19185 non-null  float64       
 7   High            19185 non-null  float64       
 8   Low             19185 non-null  float64       
 9   Close           19185 non-null  float64       
 10  Volume          19185 non-null  float64       
 11  Dividends       19185 non-null  float64       
 12  Stock Splits    19185 non-null  float64       
 13  pos_art_count   19185 non-null  int64         
 14  neg_art_count   19185 non-null  int64         
 15  ne

In [9]:
tickers = df_mean['Ticker'].unique()
ticker_frames = {}
for tick in tickers:
    ticker_frames[tick] = df_mean[df_mean['Ticker'] == tick].set_index('Market Date').drop(columns  = ['Ticker', 'Dividends'])
ticker_frames['AAPL']

Unnamed: 0_level_0,finvader_neg,finvader_neu,finvader_pos,finvader_tot,Open,High,Low,Close,Volume,Stock Splits,pos_art_count,neg_art_count,neu_art_count,total_articles
Market Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2019-03-01,,,,,41.887973,42.097075,41.553888,42.053814,103544800.0,0.0,0,0,0,0
2019-03-04,,,,,42.226871,42.721989,41.813471,42.265327,109744800.0,0.0,0,0,0,0
2019-03-05,,,,,42.286956,42.301376,41.950465,42.188412,78949600.0,0.0,0,0,0,0
2019-03-06,,,,,41.981718,42.178806,41.806265,41.945667,83241600.0,0.0,0,0,0,0
2019-03-07,,,,,41.789428,41.926429,41.344785,41.460152,99185600.0,0.0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-22,,,,,171.527346,172.815607,169.829652,172.046646,71106600.0,0.0,0,0,0,0
2024-03-25,,,,,170.338972,171.707111,169.220478,170.618591,54288300.0,0.0,0,0,0,0
2024-03-26,,,,,169.769734,171.187808,169.350304,169.480133,57388400.0,0.0,0,0,0,0
2024-03-27,,,,,170.179175,173.364857,169.879579,173.075241,60273300.0,0.0,0,0,0,0


In [10]:
for tick, frame in ticker_frames.items():
    frame['3avg Open'] = frame['Open'].rolling(window = 3).mean()
    frame['7avg Open'] = frame['Open'].rolling(window= 7).mean()
ticker_frames['AAPL']

Unnamed: 0_level_0,finvader_neg,finvader_neu,finvader_pos,finvader_tot,Open,High,Low,Close,Volume,Stock Splits,pos_art_count,neg_art_count,neu_art_count,total_articles,3avg Open,7avg Open
Market Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-03-01,,,,,41.887973,42.097075,41.553888,42.053814,103544800.0,0.0,0,0,0,0,,
2019-03-04,,,,,42.226871,42.721989,41.813471,42.265327,109744800.0,0.0,0,0,0,0,,
2019-03-05,,,,,42.286956,42.301376,41.950465,42.188412,78949600.0,0.0,0,0,0,0,42.133933,
2019-03-06,,,,,41.981718,42.178806,41.806265,41.945667,83241600.0,0.0,0,0,0,0,42.165181,
2019-03-07,,,,,41.789428,41.926429,41.344785,41.460152,99185600.0,0.0,0,0,0,0,42.019367,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-22,,,,,171.527346,172.815607,169.829652,172.046646,71106600.0,0.0,0,0,0,0,174.606511,173.838503
2024-03-25,,,,,170.338972,171.707111,169.220478,170.618591,54288300.0,0.0,0,0,0,0,172.892170,173.504671
2024-03-26,,,,,169.769734,171.187808,169.350304,169.480133,57388400.0,0.0,0,0,0,0,170.545351,173.337754
2024-03-27,,,,,170.179175,173.364857,169.879579,173.075241,60273300.0,0.0,0,0,0,0,170.095960,172.601608


In [11]:
for tick, frame in ticker_frames.items():
    frame['indicator'] = -frame['Open'] + frame.shift(-1)['Open']
    frame['indicator'] = frame['indicator'].apply(lambda x: 1 if x >= 0 else 0)
    # ticker_frames[tick] = frame[frame['finvader_tot'].notna()]
    c0 = frame.index.to_series().between(left = '2019-03-15', right = '2024-03-18', inclusive = 'both')
    ticker_frames[tick] = frame[c0]
    ticker_frames[tick] = ticker_frames[tick].fillna(0)
ticker_frames['JNJ']

Unnamed: 0_level_0,finvader_neg,finvader_neu,finvader_pos,finvader_tot,Open,High,Low,Close,Volume,Stock Splits,pos_art_count,neg_art_count,neu_art_count,total_articles,3avg Open,7avg Open,indicator
Market Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-03-15,0.000000,0.000000,0.000,0.000000,119.451371,119.676420,118.438633,119.105141,11922500.0,0.0,0,0,0,0,120.008236,119.860672,0
2019-03-18,0.000000,0.000000,0.000,0.000000,119.148382,119.260913,118.343390,118.732903,7369600.0,0.0,0,0,0,0,119.448470,119.751853,0
2019-03-19,0.000000,0.000000,0.000,0.000000,119.113766,120.161135,118.992584,119.832207,5882500.0,0.0,0,0,0,0,119.237840,119.692493,1
2019-03-20,0.000000,0.000000,0.000,0.000000,119.737051,119.892851,118.282853,118.836830,5475700.0,0.0,0,0,0,0,119.333066,119.719701,0
2019-03-21,0.000000,0.000000,0.000,0.000000,118.585810,120.126559,118.542528,119.537964,5765100.0,0.0,0,0,0,0,119.145542,119.515674,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-12,0.031333,0.840667,0.128,0.401333,160.284357,161.802087,159.837968,161.435059,8018700.0,0.0,3,0,0,3,158.697182,158.906920,1
2024-03-13,0.000000,1.000000,0.000,0.000000,161.226738,161.375530,158.528548,159.808212,6260000.0,0.0,0,0,1,1,159.907402,159.078390,0
2024-03-14,0.000000,1.000000,0.000,0.000000,159.649488,160.244668,157.417530,157.933365,6988300.0,0.0,0,0,1,1,160.386861,159.123738,0
2024-03-15,0.000000,0.000000,0.000,0.000000,156.792573,157.913517,155.840279,156.911606,13165500.0,0.0,0,0,0,0,159.222933,158.925339,1


## Logistic Regression

Time to model.

Will test this model on the simulation at the end

In [12]:
lr = LogisticRegression(penalty = 'l1', solver = 'liblinear')
dummy = DummyClassifier(strategy= 'most_frequent')
parameters = {'C' : [.001, .01, .1, 1, 10, 100]}
clf = GridSearchCV(lr, parameters)
 

In [13]:
best_para = {}
lr_scores = {}
feature_ranks = {}
dummy_scores = {}
for tick, frame in ticker_frames.items():
    test, train = de.train_test_split(frame)
    X_train  =train.drop(columns = 'indicator')
    y_train =  train['indicator']
    X_test = test.drop(columns = 'indicator')
    y_test = test['indicator'] 
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    model = clf.fit(X_train_scaled, y_train)
    rfe = RFE(estimator=lr, n_features_to_select=5).fit(X_train_scaled, y_train)
    dumb = dummy.fit(X_train_scaled, y_train)
    feature_ranks[tick] = [frame.columns[i] for i in rfe.get_support(1)]
    best_para[tick] = list(model.best_params_.values())
    predict_true = model.predict(X_test_scaled)
    predict_dummy = dummy.predict(X_test_scaled)
    lr_scores[tick] = (accuracy_score(y_test, predict_true), precision_score(y_test, predict_true), 
                       recall_score(y_test, predict_true), f1_score(y_test, predict_true))
    dummy_scores[tick] = (accuracy_score(y_test, predict_dummy), precision_score(y_test, predict_dummy),
                           recall_score(y_test, predict_dummy), f1_score(y_test, predict_dummy))




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
for tick in lr_scores:
    a,b,c,d = lr_scores[tick]
    w, x, y, z = dummy_scores[tick]
    print(tick, a-w, b-x, c-y, d-z)
print('ticker,      accuracy,       precision,      recall,     f1')

AAPL 0.19959879638916755 0.1589186823927874 -0.08487084870848705 0.09051831858849402
ABBV 0.25977933801404207 0.2434754922662724 -0.11904761904761907 0.12590993484299307
AMZN 0.3029087261785356 0.33683641157016697 -0.21832358674463936 0.13557045173100735
BAC 0.24874623871614843 0.242781360534987 -0.19498069498069504 0.09927020871101189
GOOGL 0.2798395185556669 0.3067535941156804 -0.20952380952380956 0.12145746387618284
JNJ 0.34804413239719156 0.8092691622103387 0.8714011516314779 0.8391866913123844
JPM 0.250752256770311 0.2331259264519222 -0.17704280155642027 0.10371517122810847
LLY 0.2898696088264794 0.25218026962242657 -0.11591355599214148 0.14296348122825653
MA 0.25275827482447333 0.23673719302934093 -0.14232209737827717 0.1151940059242963
MRK 0.27783350050150446 0.3189589515434269 -0.22537878787878785 0.1174419737055673
MSFT 0.16148445336008022 0.26644330898454527 -0.425891181988743 -0.027879567127397364
NVDA 0.1313941825476429 0.21018798020856821 -0.4171322160148976 -0.04463299589

In [15]:
#Note that the features are just top 5 most important, order is not ranking among the top 5
best_para, feature_ranks

({'AAPL': [100],
  'ABBV': [10],
  'AMZN': [10],
  'BAC': [100],
  'GOOGL': [10],
  'JNJ': [10],
  'JPM': [100],
  'LLY': [100],
  'MA': [10],
  'MRK': [10],
  'MSFT': [100],
  'NVDA': [10],
  'UNH': [10],
  'V': [100],
  'WFC': [100]},
 {'AAPL': ['Open', 'Close', 'total_articles', '3avg Open', '7avg Open'],
  'ABBV': ['finvader_neu', 'Open', 'Close', 'pos_art_count', '7avg Open'],
  'AMZN': ['Open',
   'neu_art_count',
   'total_articles',
   '3avg Open',
   '7avg Open'],
  'BAC': ['finvader_pos', 'Open', 'Close', 'neu_art_count', '3avg Open'],
  'GOOGL': ['finvader_pos', 'Open', 'Close', 'neu_art_count', '3avg Open'],
  'JNJ': ['finvader_neg', 'finvader_neu', 'Open', 'Close', '7avg Open'],
  'JPM': ['finvader_tot', 'Close', 'neg_art_count', '3avg Open', '7avg Open'],
  'LLY': ['neg_art_count',
   'neu_art_count',
   'total_articles',
   '3avg Open',
   '7avg Open'],
  'MA': ['finvader_tot',
   'Close',
   'pos_art_count',
   'neg_art_count',
   '7avg Open'],
  'MRK': ['finvader_neu',

In [46]:
log_best_param = pd.DataFrame(best_para, index = ['C'])
log_features_top5= pd.DataFrame(feature_ranks)
log_test_scores = pd.DataFrame(lr_scores, index = ['accuracy', 'precision', 'recall', 'f1'])
dummy_test_scores = pd.DataFrame(dummy_scores, index = ['accuracy', 'precision', 'recall', 'f1'])
log_test_scores


Unnamed: 0,AAPL,ABBV,AMZN,BAC,GOOGL,JNJ,JPM,LLY,MA,MRK,MSFT,NVDA,UNH,V,WFC
accuracy,0.74323,0.807422,0.817452,0.768305,0.806419,0.825476,0.766299,0.800401,0.788365,0.807422,0.696088,0.67001,0.785356,0.802407,0.748245
precision,0.70255,0.791118,0.85138,0.76234,0.833333,0.809269,0.748673,0.762712,0.772344,0.848548,0.801047,0.748804,0.730015,0.854077,0.724252
recall,0.915129,0.880952,0.781676,0.805019,0.790476,0.871401,0.822957,0.884086,0.857678,0.774621,0.574109,0.582868,0.932563,0.755218,0.836852
f1,0.794872,0.833622,0.815041,0.783099,0.811339,0.839187,0.784059,0.818926,0.812777,0.809901,0.668852,0.655497,0.818951,0.801611,0.776492


In [47]:
log_best_param.to_csv('../results/log_results/log_best_param.csv')
log_features_top5.to_csv('../results/log_results/log_features_top5.csv')
log_test_scores.to_csv('../results/log_results/log_test_scores.csv')
dummy_test_scores.to_csv('../results/log_results/dummy_test_scores.csv')

## Running Simulation

In [16]:
cv_trades = [{},{},{},{}]
cv_opens = [{},{},{},{}]
dumb_trades = [{},{},{},{}]

for tick, frame in ticker_frames.items():
    train, test = de.train_test_split(frame)
    #CrossValue
    i=0
    for train_idx, test_idx in de.get_cv_splits(train):
        cv_opens[i][tick] = train.loc[test_idx, "Open"].to_numpy()
        df_tt = train.loc[train_idx]
        df_ho = train.loc[test_idx]
        #df_tt is new train test, df_ho is test set in my cv split. now train model on df_tt
        X_train, y_train = df_tt.drop(columns = 'indicator'), df_tt['indicator']
        X_test, y_test = df_ho.drop(columns = 'indicator'), df_ho['indicator']
        scaler = MinMaxScaler()
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        model = clf.fit(X_train_scaled, y_train)
        dumb = dummy.fit(X_train_scaled, y_train)
        predict = model.predict(X_test_scaled)
        predict[predict == 0] = -1
        pred_dumb = dummy.predict(X_test_scaled)
        pred_dumb[pred_dumb ==0] == -1
        cv_trades[i][tick] = predict
        dumb_trades[i][tick] = pred_dumb
        i+=1



In [17]:
def get_performance(trade_dict, test_dict):
    n = len(trade_dict["AAPL"])
    x_t = [1] * n
    for i in range(1,n):
        x_t[i] = x_t[i-1] / 2
        for tick in trade_dict:
            x_t[i] += (x_t[i-1] / 30) * (1 + trade_dict[tick][i-1] * (test_dict[tick][i] - test_dict[tick][i-1]) / test_dict[tick][i-1])
    return x_t[-1]

In [34]:
lr_simulation_scores = {}
dumb_simulation_scores = {}
for i in range(4):
    lr_simulation_scores[i] = get_performance(cv_trades[i], cv_opens[i])
    dumb_simulation_scores[i]= get_performance(dumb_trades[i], cv_opens[i])
    print('logistic regression:', lr_simulation_scores[i])
    print('dummy:', dumb_simulation_scores[i])

logistic regression: 1.53355102876984
dummy: 0.9842163124698313
logistic regression: 1.4487215318971245
dummy: 0.9815142832991793
logistic regression: 1.4725586745653647
dummy: 1.0298111886139245
logistic regression: 1.4124186244646315
dummy: 0.990866929656873


In [48]:
log_simulation_scores = pd.DataFrame(lr_simulation_scores, index=['simulation_scores'])
dummy_simulation_scores = pd.DataFrame(dumb_simulation_scores, index=['simulation scores'])
log_simulation_scores

Unnamed: 0,0,1,2,3
simulation_scores,1.533551,1.448722,1.472559,1.412419


In [49]:
log_simulation_scores.to_csv('../results/log_results/log_simulation_scores.csv')
dummy_simulation_scores.to_csv('../results/log_results/dummy_simulation_scores.csv')

In [19]:
dumb_trades

[{'AAPL': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
  'ABBV': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
  'AMZN': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
  'BAC': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
  'GOOGL': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 