# Exploration of SMA data

Import libraries and preliminary set-up

In [1]:
import pandas as pd
import quandl
import os
import matplotlib.pyplot as plt
quandl.ApiConfig.api_key = "REMOVED"

Read in all the dataframes.

In [2]:
dfsSMA = {}
# relevant_csvs = ['SMA_FBUP.csv',
#  'SMA_TWTT.csv',
#  'SMA_YTCD.csv',
#  'SMA_FBP.csv',
#  'SMA_TWTD.csv',
#  'SMA_INSP.csv',
#  'SMA_INSD.csv',
#  'SMA_FBD.csv',
#  'SMA_YTVD.csv']

relevant_csvs = ['SMA_FBUP.csv']

for e in relevant_csvs:
    dfsSMA[e[:-4]] = pd.read_csv(os.path.join('datasets', 'SMA', e))

KeyboardInterrupt: 

The date format is YYYY-MM-DD

Only use companies for which we have stock market data

In [2]:
correct_tickers = pd.read_csv('datasets/correct_tickers.csv')

In [3]:
correct_tickers_set = set(list(correct_tickers.ticker))

In [9]:
len(correct_tickers_set)

422

Create a new dictionary of dataframes that includes only the relevant companies

In [10]:
dfsSMA_cc = {}

for indicator in dfsSMA.keys():
    df_temp = dfsSMA[indicator]
    df_temp = df_temp[df_temp['brand_ticker'].isin(correct_tickers_set)]
    dfsSMA_cc[indicator] = df_temp

In [11]:
dfsSMA_cc

{'SMA_FBUP':          brand_ticker           page_id                            post_id  \
 0                 MCD      100023357865     100023357865_10155483809542866   
 1                 MCD      100023357865     100023357865_10155483146272866   
 2                 MCD      100023357865     100023357865_10155483089412866   
 3                 MCD      100023357865     100023357865_10155483075287866   
 4                 MCD      100023357865     100023357865_10155482779322866   
 5                 MCD      100023357865     100023357865_10155482646617866   
 6                 MCD      100023357865     100023357865_10155481806422866   
 7                 MCD      100023357865     100023357865_10155479831442866   
 8                 MCD      100023357865     100023357865_10155479563672866   
 9                 MCD      100023357865     100023357865_10155476869487866   
 10                MCD      100023357865     100023357865_10155476842672866   
 11                MCD      100023357865

Verified manually that this actually works - it filtered out about 30% data

See how many different values there are in different selected attributes.

In [14]:
len(dfsSMA_cc['SMA_FBP']['sector'].value_counts())

KeyError: 'SMA_FBP'

In [15]:
len(dfsSMA_cc['SMA_FBP']['geography'].value_counts())

KeyError: 'SMA_FBP'

In [None]:
len(dfsSMA_cc['SMA_FBP']['type'].value_counts())

In [None]:
len(dfsSMA_cc['SMA_FBUP']['type'].value_counts())

In [None]:
len(dfsSMA_cc['SMA_INSD']['sector'].value_counts())

The decision based on this is to drop items like sector or geography, but keep e.g. the types

Select which indicators are relevant (this is before looking at p-values):

In [4]:
metrics = {}
metrics['SMA_FBUP']=['brand_ticker', 'type', 'likes', 'comments', 'shares', 'sentiment',
       'response_time', 'date']

metrics['SMA_TWTT']=['brand_ticker', 'retweet_count', 'replies_count',
       'favorite_count', 'engagement_score', 'date']

metrics['SMA_YTCD']=['brand_ticker', 'date', 'subscribers_count', 'page_views_count', 'uploads_count']

metrics['SMA_FBP']=['brand_ticker', 'type',
       'total-reactions', 'reactions-like', 'reactions_love',
       'reactions-haha', 'reactions-wow', 'reactions-sad', 'reactions-angry',
       'comments', 'shares', 'reach', 'impressions', 'engagement_score',
       'sentiment', 'date']

metrics['SMA_TWTD']=['brand_ticker',
       'date', 'followers_count', 'followees_count', 'tweets_count',
       'retweets_count', 'replies_count', 'favorites_count',
       'total_tweets_count', 'brand_proactive_count', 'brand_replies_count',
       'brand_retweets_count', 'average_reply_time', 'engagement_score']

metrics['SMA_INSP']=['brand_ticker', 'type', 'likes_count', 'comments_count',
       'engagement_score', 'date']

metrics['SMA_INSD']=['brand_ticker', 
       'date', 'followers_count', 'followees_count', 'posts_count',
       'likes_count', 'comments_count', 'total_posts_count',
       'engagement_score']

metrics['SMA_FBD']=['brand_ticker', 'date', 'fans',
       'new_fans', 'fan_post_count', 'admin_post_count', 'admin_post_likes',
       'admin_post_comments', 'admin_post_shares', 'admin_post_reach',
       'admin_post_impressions', 'engagement_score', 'people_talking_about',
       'promoted-admin_post_count', 'promoted-admin_post_likes',
       'promoted-admin_post_comments', 'promoted-admin_post_shares',
       'promoted-admin_post_reach', 'promoted-admin_post_impressions',
       'promoted-engagement_score', 'organic-admin_post_count',
       'organic-admin_post_likes', 'organic-admin_post_comments',
       'organic-admin_post_shares', 'organic-admin_post_reach',
       'organic-admin_post_impressions', 'organic-engagement_score']

metrics['SMA_YTVD']=['brand_ticker', 'date', 'yt_duration', 'views_count', 'likes_count',
       'dislikes_count', 'comments_count']

In [5]:
categ_metrics = {}
categ_metrics['SMA_FBUP']=['SMA_FBUP_type', 'SMA_FBUP_sentiment']

categ_metrics['SMA_TWTT']=[]

categ_metrics['SMA_YTCD']=[]

categ_metrics['SMA_FBP']=['SMA_FBP_type', 'SMA_FBP_sentiment']

categ_metrics['SMA_TWTD']=[]

categ_metrics['SMA_INSP']=['SMA_INSP_type']

categ_metrics['SMA_INSD']=[]

categ_metrics['SMA_FBD']=[]

categ_metrics['SMA_YTVD']=[]

In [13]:
dfsSMA_cc_ra = {} # relevant attributes only

for indicator in dfsSMA_cc.keys():
    dfsSMA_cc_ra[indicator] = dfsSMA_cc[indicator][metrics[indicator]]
    # rename the indicators to include also the general name - this is needed for one big frame
    metrics_dct = {}
    for metric in metrics[indicator]:
        if metric != 'brand_ticker' and metric != 'date':
            new_metric_name = indicator + '_' + metric
            metrics_dct[metric] = new_metric_name
    dfsSMA_cc_ra[indicator] = dfsSMA_cc_ra[indicator].rename(columns=metrics_dct)

Now we need to merge all the nine dataframes together.

Have a look at some information about the different dataframes - e.g. how many companies do we have there?

In [14]:
for indicator in dfsSMA_cc_ra.keys():
    print(indicator)
    print(dfsSMA_cc_ra[indicator].shape[0])
#     print(dfsSMA_cc_ra[indicator])
    print(len(set((dfsSMA_cc_ra[indicator]['brand_ticker']))))

SMA_FBUP
5024200
114


Merge this with stock prices

In [15]:
# stock_files = os.listdir('price_data')

In [16]:
# sf1 = pd.read_csv('price_data/' + stock_files[0])
# stock_files_df = pd.read_csv('price_data/' + stock_files[0])
# stock_files_df = stock_files_df.rename(columns={'Date': 'date', 'Adj_Close': 'adj_close'})[['date', 'adj_close']]
# name = stock_files[0].split('.csv')[0]
# stock_files_df['brand_ticker'] = name

In [17]:
# for st_df_name in stock_files[1:]:
#     st_df_tmp = pd.read_csv('price_data/' + st_df_name)
#     st_df_tmp = st_df_tmp.rename(columns={'Date': 'date', 'Adj_Close': 'adj_close'})[['date', 'adj_close']]
#     name = st_df_name.split('.csv')[0]
#     st_df_tmp['brand_ticker'] = name
#     stock_files_df = pd.concat([stock_files_df, st_df_tmp])

KeyboardInterrupt: 

In [None]:
# stock_files_df.to_csv('stock_prices_all.csv')

In [6]:
stock_files_df = pd.read_csv('stock_prices_all.csv')

Try to merge the two dataframes:

In [38]:
stock_files_df = stock_files_df.reset_index(drop=True)

In [39]:
stock_files_df = stock_files_df.drop('Unnamed: 0', axis=1)

In [40]:
stock_files_df

Unnamed: 0,date,adj_close,brand_ticker
0,2016-11-01,23.00,AA
1,2016-11-02,22.91,AA
2,2016-11-03,24.15,AA
3,2016-11-04,25.20,AA
4,2016-11-07,25.08,AA
5,2016-11-08,26.40,AA
6,2016-11-09,28.72,AA
7,2016-11-10,29.10,AA
8,2016-11-11,29.30,AA
9,2016-11-14,29.62,AA


In [23]:
dfsSMA_cc_ra['SMA_FBUP'] = dfsSMA_cc_ra['SMA_FBUP'].reset_index(drop=True)
# dfsSMA_cc_ra['SMA_FBUP'] = dfsSMA_cc_ra['SMA_FBUP'].drop('Unnamed: 0', axis=1)

In [24]:
dfsSMA_cc_ra['SMA_FBUP']

Unnamed: 0,brand_ticker,SMA_FBUP_type,SMA_FBUP_likes,SMA_FBUP_comments,SMA_FBUP_shares,SMA_FBUP_sentiment,SMA_FBUP_response_time,date
0,MCD,PHOTO,7,2,0,NEUTRAL,0,2017-07-18
1,MCD,PHOTO,6,11,0,NEUTRAL,159487,2017-07-18
2,MCD,PHOTO,6,7,0,NEUTRAL,0,2017-07-18
3,MCD,PHOTO,29,12,0,POSITIVE,161380,2017-07-18
4,MCD,STATUS,1,4,1,NEUTRAL,0,2017-07-18
5,MCD,PHOTO,56,7,0,NEUTRAL,170853,2017-07-18
6,MCD,PHOTO,8,10,0,NEUTRAL,14056,2017-07-17
7,MCD,STATUS,4,1,0,NEUTRAL,61974,2017-07-17
8,MCD,PHOTO,24,7,0,NEUTRAL,69455,2017-07-17
9,MCD,PHOTO,5,1,0,NEUTRAL,42547,2017-07-16


In [25]:
sma_fbup_price = pd.merge(dfsSMA_cc_ra['SMA_FBUP'], stock_files_df,on=['brand_ticker', 'date'], how='inner')

In [30]:
sma_fbup_price.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3715464 entries, 0 to 3715463
Data columns (total 9 columns):
brand_ticker              object
SMA_FBUP_type             object
SMA_FBUP_likes            int64
SMA_FBUP_comments         int64
SMA_FBUP_shares           int64
SMA_FBUP_sentiment        object
SMA_FBUP_response_time    int64
date                      object
adj_close                 float64
dtypes: float64(1), int64(4), object(4)
memory usage: 283.5+ MB


In [31]:
sma_fbup_categ_vars = ['SMA_FBUP_type', 'SMA_FBUP_sentiment']
for categ_var in sma_fbup_categ_vars:
    sma_fbup_price[categ_var] = sma_fbup_price[categ_var].astype('category')

In [43]:
sma_fbup_non_categ_vars = list(set(sma_fbup_price.columns)-set(sma_fbup_categ_vars))

In [45]:
sma_fbup_price_with_categs = pd.get_dummies(sma_fbup_price[sma_fbup_categ_vars], drop_first=True)
# drop first does it correctly even for multiple categorical variables
sma_fbup_price_with_categs[sma_fbup_non_categ_vars] = sma_fbup_price[sma_fbup_non_categ_vars]
sma_fbup_price_with_categs

Unnamed: 0,SMA_FBUP_type_PHOTO,SMA_FBUP_type_STATUS,SMA_FBUP_type_UNKNOWN,SMA_FBUP_type_VIDEO,SMA_FBUP_sentiment_NEUTRAL,SMA_FBUP_sentiment_POSITIVE,SMA_FBUP_sentiment_UNDEFINED,SMA_FBUP_likes,SMA_FBUP_comments,adj_close,SMA_FBUP_response_time,brand_ticker,SMA_FBUP_shares,date
0,1,0,0,0,1,0,0,7,2,145.835780,0,MCD,0,2017-07-18
1,1,0,0,0,1,0,0,6,11,145.835780,159487,MCD,0,2017-07-18
2,1,0,0,0,1,0,0,6,7,145.835780,0,MCD,0,2017-07-18
3,1,0,0,0,0,1,0,29,12,145.835780,161380,MCD,0,2017-07-18
4,0,1,0,0,1,0,0,1,4,145.835780,0,MCD,1,2017-07-18
5,1,0,0,0,1,0,0,56,7,145.835780,170853,MCD,0,2017-07-18
6,0,1,0,0,1,0,0,0,1,145.835780,0,MCD,0,2017-07-18
7,0,1,0,0,0,1,0,2,1,145.835780,0,MCD,0,2017-07-18
8,0,1,0,0,1,0,0,0,0,145.835780,0,MCD,0,2017-07-18
9,0,1,0,0,1,0,0,0,1,145.835780,18739,MCD,0,2017-07-18


In [2]:
# sma_fbup_price_with_categs.to_csv('sma_fbup_2.csv')
sma_fbup_price_with_categs = pd.read_csv('sma_fbup_2.csv')

In [3]:
import numpy as np

In [4]:
from sklearn.linear_model import LinearRegression

In [57]:
# X = sma_fbup_price_with_categs.drop(['adj_close', 'date', 'brand_ticker'], axis=1)
# y = sma_fbup_price_with_categs['adj_close']

In [58]:
# reg = LinearRegression().fit(X, y)
# reg.score(X, y)

0.024593599071176975

In [16]:
sma_fbup_price_with_categs_2 = sma_fbup_price_with_categs.rename(columns={'adj_close': 'Adj_Close', 'date': 'datepll'})

In [17]:
sma_fbup_price_with_categs_2

Unnamed: 0.1,Unnamed: 0,SMA_FBUP_type_PHOTO,SMA_FBUP_type_STATUS,SMA_FBUP_type_UNKNOWN,SMA_FBUP_type_VIDEO,SMA_FBUP_sentiment_NEUTRAL,SMA_FBUP_sentiment_POSITIVE,SMA_FBUP_sentiment_UNDEFINED,SMA_FBUP_likes,SMA_FBUP_comments,Adj_Close,SMA_FBUP_response_time,brand_ticker,SMA_FBUP_shares,datepll
0,0,1,0,0,0,1,0,0,7,2,145.835780,0,MCD,0,2017-07-18
1,1,1,0,0,0,1,0,0,6,11,145.835780,159487,MCD,0,2017-07-18
2,2,1,0,0,0,1,0,0,6,7,145.835780,0,MCD,0,2017-07-18
3,3,1,0,0,0,0,1,0,29,12,145.835780,161380,MCD,0,2017-07-18
4,4,0,1,0,0,1,0,0,1,4,145.835780,0,MCD,1,2017-07-18
5,5,1,0,0,0,1,0,0,56,7,145.835780,170853,MCD,0,2017-07-18
6,6,0,1,0,0,1,0,0,0,1,145.835780,0,MCD,0,2017-07-18
7,7,0,1,0,0,0,1,0,2,1,145.835780,0,MCD,0,2017-07-18
8,8,0,1,0,0,1,0,0,0,0,145.835780,0,MCD,0,2017-07-18
9,9,0,1,0,0,1,0,0,0,1,145.835780,18739,MCD,0,2017-07-18


In [21]:
available_tickers = []
# sma_fbup_price_with_categs_2 = sma_fbup_price_with_categs_2.reset_index()
# sma_fbup_price_with_categs_2 = sma_fbup_price_with_categs_2.drop('Unnamed: 0', axis=1)

for ticker, sub_df in sma_fbup_price_with_categs_2.groupby('brand_ticker'):
    available_tickers.append(ticker)
    sub_df = sub_df.drop('brand_ticker', axis=1)
#     sub_df = sub_df.select_dtypes(exclude=['object'])
    sub_df.to_csv('./selected_sma_fbup_features/%s.csv' % ticker)

In [22]:
print(4)

4


In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn import linear_model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.metrics import mean_squared_error
from math import sqrt
from itertools import combinations

selected_features = []
    
def select_kpca_components(ticker):

    interpolated_data = pd.read_csv('./selected_sma_fbup_features/%s.csv' % ticker)
    adj_close = interpolated_data['Adj_Close']
    adj_close_diff = adj_close.diff(1)
    interpolated_data['Adj_Close_Lag'] = adj_close_diff.shift(1)

    def get_y_and_date(df_param):
        df_param = df_param.drop([0], axis=0)
        df_param = df_param.dropna()
        adj_close_diff = df_param['Adj_Close'].diff(1)
        adj_close_diff = adj_close_diff.dropna()
        return df_param['datepll'], adj_close_diff

    def evaluate_model_performance(true_consumption, predictions):
        from sklearn.metrics import mean_squared_error
        mean_squared_error = mean_squared_error(true_consumption, predictions)
        root_mean_squared_error = sqrt(mean_squared_error) 
        return root_mean_squared_error

    def forecaster(X, y, date):
        tscv = TimeSeriesSplit(n_splits=int(y.shape[0]*0.85))
        predictions = []
        true_consumption = []
        dates = []

        for train_index, test_index in tscv.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)

            X_train, X_test = X[:len(train_index) - 1], X[len(train_index): len(train_index) + 1]
            y_train, y_test = y[:len(train_index) - 1], y[len(train_index): len(train_index) + 1]
            date_train, date_test = date[:len(train_index)], date[len(train_index): len(train_index) +1]

            clf = linear_model.LinearRegression()
            clf.fit(X_train, y_train) # training is conducted on the sample before t
            # print(clf.coef_)
    #         print("reached")
    #         print(X_test.head(1))
    #         print(y_test.head(1))
            prediction = clf.predict(X_test) # nowcast and forecast are distinguished by having different X sets, the caster is the same!!!
            # print(prediction.item(0))
    #         print(y_test.values[0])
            predictions.append(prediction.item(0))
            true_consumption.append(y_test.values[0])
            # print("THE TRUE CONS")
    #         print(y_test.values[0])
    #         print(true_consumption)
            dates.append(date_test.values[0])
            # print(y_test.values[0])
    #         print(prediction.item(0))
            return true_consumption, predictions, dates

    def recursive_kpca_iterator(X_used, X_unused, y, date, df_param, root_mean_sq_error_param):
    #     print("Starting the recursive iterative procedure")
        root_mean_sq_error = root_mean_sq_error_param # as a reference point
        useful_x = ''
        global selected_features 
        
        if len(X_unused) == 0:
    #         print("X_unused is 0")
    #         print(X_used)
            return X_used

        for x in X_unused:
            combo = X_used
            combo.append(x)
            # print(combo)
            X = df_param[combo]
            X = X.dropna()

            true_consumption, predictions, dates = forecaster(X, y, date)                

            if root_mean_sq_error > evaluate_model_performance(true_consumption, predictions):
                root_mean_sq_error = evaluate_model_performance(true_consumption, predictions)
                useful_x = x
            combo.remove(x)

        if root_mean_sq_error < root_mean_sq_error_param:
    #         print(useful_x)
            X_used_new = X_used.copy()
            X_used_new.append(useful_x)
            X_unused_new = X_unused.copy()
            X_unused_new.remove(useful_x)
    #         print(X_used_new) 
    #         print(X_unused_new)
    #         print(root_mean_sq_error)
            recursive_kpca_iterator(X_used_new, X_unused_new, y, date, df_param, root_mean_sq_error)
        else:
    #         print("The error term did not improve!")
            selected_features = list(set(X_used))
            print(selected_features)
            print(root_mean_sq_error)
        return X_used

    def generate_lag_consumption_and_composite_X_baseline(df_param):
    #     print(df_param.head())

        return df_param['Adj_Close_Lag']

    def perform_recursive_kpca_iterator(df_param):

        date, y = get_y_and_date(df_param)
        X_baseline = generate_lag_consumption_and_composite_X_baseline(df_param)
        X_baseline = X_baseline.dropna()
        del df_param['Adj_Close']
        del df_param['datepll']

        X_used = ['Adj_Close_Lag']
        X_unused = df_param.columns

    #     print(X_used)
    #     print(X_unused)
        if int(y.shape[0]*0.85) < 2:
            return
        recursive_kpca_iterator(list(X_used), list(X_unused), y, date, df_param, 10000.0) # instead of saving, just print the result

        # At the end of the procedure the best performing KPCA components will be printed with the RMSFE of the best GT-based model
    
    perform_recursive_kpca_iterator(interpolated_data)
#     print(interpolated_data.columns.tolist()[0],type(interpolated_data.columns.tolist()[0]))
    print(selected_features)
#     print(selected_features[0], type(selected_features[0]))
#     print(set(interpolated_data.columns.tolist()).intersection(set(selected_features)))
    selected_data = interpolated_data[selected_features]
    selected_data.to_csv('./selected_kpca_components_sma_fbup/%s.csv' % ticker)

for ticker in available_tickers:
    print(ticker)
    select_kpca_components(ticker)

AAPL
['SMA_FBUP_shares', 'SMA_FBUP_sentiment_NEUTRAL', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
3.1554596936905766e-06
['SMA_FBUP_shares', 'SMA_FBUP_sentiment_NEUTRAL', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
ABBV
['Adj_Close_Lag', 'SMA_FBUP_response_time', 'SMA_FBUP_type_PHOTO']
0.3324312915218691
['Adj_Close_Lag', 'SMA_FBUP_response_time', 'SMA_FBUP_type_PHOTO']
ABT
['Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_sentiment_UNDEFINED']
0.5564016774912909
['Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_sentiment_UNDEFINED']
ADBE
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
0.00017233426322781542
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
ADI
['Adj_Close_Lag', 'SMA_FBUP_type_PHOTO', 'Unnamed: 0', 'SMA_FBUP_comments']
0.261025539812465
['Adj_Close_Lag', 'SMA_FBUP_type_PHOTO', 'Unnamed: 0', 'SMA_FBUP_comments']
ADM
['Adj_Clo

['SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_likes', 'Adj_Close_Lag']
9.061463514894313e-06
['SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_likes', 'Adj_Close_Lag']
ES
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_comments', 'SMA_FBUP_likes', 'Unnamed: 0', 'Adj_Close_Lag']
1.5799985757249075
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_comments', 'SMA_FBUP_likes', 'Unnamed: 0', 'Adj_Close_Lag']
ESRX
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_type_UNKNOWN', 'Unnamed: 0', 'Adj_Close_Lag', 'SMA_FBUP_type_STATUS', 'SMA_FBUP_type_VIDEO']
0.00010313148732166155
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_type_UNKNOWN', 'Unnamed: 0', 'Adj_Close_Lag', 'SMA_FBUP_type_STATUS', 'SMA_FBUP_type_VIDEO']
F
['SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
3.58176273609508e-07
['SMA_FBUP_

MSI
['SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_comments', 'Unnamed: 0', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
0.0027770398141222508
['SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_comments', 'Unnamed: 0', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
MU
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_likes', 'Adj_Close_Lag']
0.8889545590850243
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_response_time', 'SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_likes', 'Adj_Close_Lag']
NOC
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_response_time', 'SMA_FBUP_comments', 'SMA_FBUP_likes', 'Adj_Close_Lag']
1.9084003441927493
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_response_time', 'SMA_FBUP_comments', 'SMA_FBUP_likes', 'Adj_Close_Lag']
NVDA
['Adj_Close_Lag', 'SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_type_PHOTO', 'SMA_FBUP_type_VIDEO']
0.4193178781215293
['Adj_Close_Lag', 'SMA_FBUP_sen

Process now the remaining datasets:

In [7]:
relevant_csvs = ['SMA_TWTT.csv',
 'SMA_YTCD.csv',
 'SMA_FBP.csv',
 'SMA_TWTD.csv',
 'SMA_INSP.csv',
 'SMA_INSD.csv',
 'SMA_FBD.csv',
 'SMA_YTVD.csv',
 'SMA_FBUP.csv']

# relevant_csvs = [
#  'SMA_FBP.csv',
#  'SMA_TWTD.csv',
#  'SMA_INSP.csv',
#  'SMA_INSD.csv',
#  'SMA_FBD.csv',
#  'SMA_YTVD.csv',
#  'SMA_FBUP.csv']

# relevant_csvs = [
#  'SMA_FBP.csv',
#  'SMA_INSP.csv',
#  'SMA_FBD.csv',
#  'SMA_YTVD.csv',
#  'SMA_FBUP.csv']

#  'SMA_FBP.csv',

for e in relevant_csvs:
    print(e)
    indicator = e[:-4]
    dfsSMA = pd.read_csv(os.path.join('datasets', 'SMA', e))
    df_temp = dfsSMA
    df_temp = df_temp[df_temp['brand_ticker'].isin(correct_tickers_set)]
    dfsSMA_cc = df_temp
        
    dfsSMA_cc_ra = dfsSMA_cc[metrics[indicator]]
    metrics_dct = {}
    for metric in metrics[indicator]:
        if metric != 'brand_ticker' and metric != 'date':
            new_metric_name = indicator + '_' + metric
            metrics_dct[metric] = new_metric_name
    dfsSMA_cc_ra = dfsSMA_cc_ra.rename(columns=metrics_dct)
    dfsSMA_cc_ra = dfsSMA_cc_ra.reset_index(drop=True)
    dfsSMA_cc_ra_price = pd.merge(dfsSMA_cc_ra, stock_files_df,on=['brand_ticker', 'date'], how='inner')
    
    if categ_metrics[indicator]:
        for categ_var in categ_metrics[indicator]:
#             import pdb; pdb.set_trace()
            dfsSMA_cc_ra_price[categ_var] = dfsSMA_cc_ra_price[categ_var].astype('category')
        non_categ_vars = list(set(dfsSMA_cc_ra_price.columns)-set(categ_metrics[indicator]))

        sma_temp_price_with_categs = pd.get_dummies(dfsSMA_cc_ra_price[categ_metrics[indicator]], drop_first=True)
        # drop first does it correctly even for multiple categorical variables
        sma_temp_price_with_categs[non_categ_vars] = dfsSMA_cc_ra_price[non_categ_vars]
        sma_temp_price_with_categs.to_csv(indicator.lower() + '_2.csv')
    else:
        dfsSMA_cc_ra_price.to_csv(indicator.lower() + '_2.csv')


SMA_FBP.csv
SMA_INSP.csv
SMA_FBD.csv
SMA_YTVD.csv
SMA_FBUP.csv


In [18]:
available_tickers = {}
# sma_fbup_price_with_categs_2 = sma_fbup_price_with_categs_2.reset_index()
# sma_fbup_price_with_categs_2 = sma_fbup_price_with_categs_2.drop('Unnamed: 0', axis=1)

relevant_csvs = [
 'sma_fbd_2.csv',
 'sma_fbp_2.csv',
 'sma_fbup_2.csv',
 'sma_insd_2.csv',
 'sma_insp_2.csv',
 'sma_twtd_2.csv',
 'sma_twtt_2.csv',
 'sma_ytcd_2.csv',
 'sma_ytvd_2.csv']

# 'SMA_TWTT.csv',
#  'SMA_YTCD.csv',
#  'SMA_FBP.csv',
#  'SMA_TWTD.csv',
#  'SMA_INSP.csv',
#  'SMA_INSD.csv',

for csv in relevant_csvs:
    temp_df = pd.read_csv(csv)
    os.mkdir('./selected_' + csv.split('_2.')[0] + '_features')
    available_tickers[csv.split('_2.')[0]] = []
    
    for ticker, sub_df in temp_df.groupby('brand_ticker'):
        available_tickers[csv.split('_2.')[0]].append(ticker)
        sub_df = sub_df.drop('brand_ticker', axis=1)
    #     sub_df = sub_df.select_dtypes(exclude=['object'])
        sub_df.to_csv('./selected_' + csv.split('_2.')[0] + '_features' + '/%s.csv' % ticker)

In [14]:
import json

In [19]:
with open('available_tickers.json', 'w') as f:
    json.dump(available_tickers, f)

In [25]:
available_tickers

{'sma_fbd': ['AAPL',
  'ABBV',
  'ABC',
  'ABT',
  'ADBE',
  'ADI',
  'ADM',
  'ADP',
  'AES',
  'AET',
  'AMAT',
  'AMD',
  'AMGN',
  'ANTM',
  'APA',
  'APC',
  'BA',
  'BAX',
  'BBY',
  'BCR',
  'BDX',
  'BIIB',
  'BMY',
  'C',
  'CA',
  'CAG',
  'CAH',
  'CAT',
  'CCL',
  'CELG',
  'CHK',
  'CI',
  'CLX',
  'CMI',
  'CNC',
  'CNP',
  'COL',
  'COP',
  'COST',
  'COTY',
  'CPB',
  'CSCO',
  'CSRA',
  'CVS',
  'CVX',
  'CXO',
  'DAL',
  'DD',
  'DE',
  'DGX',
  'DISH',
  'DPS',
  'DVA',
  'DXC',
  'EFX',
  'EIX',
  'ES',
  'ESRX',
  'F',
  'FCX',
  'FDX',
  'FFIV',
  'FIS',
  'GD',
  'GE',
  'GILD',
  'GIS',
  'GLW',
  'GM',
  'GOOGL',
  'HAL',
  'HBI',
  'HCA',
  'HES',
  'HOLX',
  'HON',
  'HPE',
  'HPQ',
  'HRS',
  'HSIC',
  'HUM',
  'IBM',
  'IFF',
  'INTC',
  'IP',
  'JNJ',
  'JNPR',
  'K',
  'KHC',
  'KLAC',
  'KMB',
  'KMI',
  'KO',
  'LLL',
  'LLY',
  'LMT',
  'LRCX',
  'LVLT',
  'MAS',
  'MCD',
  'MCK',
  'MNST',
  'MRK',
  'MS',
  'MSFT',
  'MSI',
  'MU',
  'NEE',
  'NOC',


In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn import linear_model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.metrics import mean_squared_error
from math import sqrt
from itertools import combinations

selected_features = []
    
def select_kpca_components(ticker, csv_part):

    interpolated_data = pd.read_csv('./selected_' + csv_part + '_features/%s.csv' % ticker)
    interpolated_data = interpolated_data.rename(columns={'adj_close': 'Adj_Close', 'date': 'datepll'})
    dates = interpolated_data['datepll']
    adj_close = interpolated_data['Adj_Close']
    adj_close_diff = adj_close.diff(1)
    interpolated_data['Adj_Close_Lag'] = adj_close_diff.shift(1)

    def get_y_and_date(df_param):
        df_param = df_param.drop([0], axis=0)
        df_param = df_param.dropna()
        adj_close_diff = df_param['Adj_Close'].diff(1)
        adj_close_diff = adj_close_diff.dropna()
        return df_param['datepll'], adj_close_diff

    def evaluate_model_performance(true_consumption, predictions):
        from sklearn.metrics import mean_squared_error
        mean_squared_error = mean_squared_error(true_consumption, predictions)
        root_mean_squared_error = sqrt(mean_squared_error) 
        return root_mean_squared_error

    def forecaster(X, y, date):
        tscv = TimeSeriesSplit(n_splits=int(y.shape[0]*0.85))
        predictions = []
        true_consumption = []
        dates = []

        for train_index, test_index in tscv.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)

            X_train, X_test = X[:len(train_index) - 1], X[len(train_index): len(train_index) + 1]
            y_train, y_test = y[:len(train_index) - 1], y[len(train_index): len(train_index) + 1]
            date_train, date_test = date[:len(train_index)], date[len(train_index): len(train_index) +1]

            clf = linear_model.LinearRegression()
            clf.fit(X_train, y_train) # training is conducted on the sample before t
            # print(clf.coef_)
    #         print("reached")
    #         print(X_test.head(1))
    #         print(y_test.head(1))
            prediction = clf.predict(X_test) # nowcast and forecast are distinguished by having different X sets, the caster is the same!!!
            # print(prediction.item(0))
    #         print(y_test.values[0])
            predictions.append(prediction.item(0))
            true_consumption.append(y_test.values[0])
            # print("THE TRUE CONS")
    #         print(y_test.values[0])
    #         print(true_consumption)
            dates.append(date_test.values[0])
            # print(y_test.values[0])
    #         print(prediction.item(0))
            return true_consumption, predictions, dates

    def recursive_kpca_iterator(X_used, X_unused, y, date, df_param, root_mean_sq_error_param):
    #     print("Starting the recursive iterative procedure")
        root_mean_sq_error = root_mean_sq_error_param # as a reference point
        useful_x = ''
        global selected_features 
        
        if len(X_unused) == 0:
    #         print("X_unused is 0")
    #         print(X_used)
            return X_used

        for x in X_unused:
            combo = X_used
            combo.append(x)
            # print(combo)
            X = df_param[combo]
            X = X.dropna()

            true_consumption, predictions, dates = forecaster(X, y, date)                

            if root_mean_sq_error > evaluate_model_performance(true_consumption, predictions):
                root_mean_sq_error = evaluate_model_performance(true_consumption, predictions)
                useful_x = x
            combo.remove(x)

        if root_mean_sq_error < root_mean_sq_error_param:
    #         print(useful_x)
            X_used_new = X_used.copy()
            X_used_new.append(useful_x)
            X_unused_new = X_unused.copy()
            X_unused_new.remove(useful_x)
    #         print(X_used_new) 
    #         print(X_unused_new)
    #         print(root_mean_sq_error)
            recursive_kpca_iterator(X_used_new, X_unused_new, y, date, df_param, root_mean_sq_error)
        else:
    #         print("The error term did not improve!")
            selected_features = list(set(X_used))
            print(selected_features)
            print(root_mean_sq_error)
        return X_used

    def generate_lag_consumption_and_composite_X_baseline(df_param):
    #     print(df_param.head())

        return df_param['Adj_Close_Lag']

    def perform_recursive_kpca_iterator(df_param):

        date, y = get_y_and_date(df_param)
        X_baseline = generate_lag_consumption_and_composite_X_baseline(df_param)
        X_baseline = X_baseline.dropna()
        del df_param['Adj_Close']
        del df_param['datepll']

        X_used = ['Adj_Close_Lag']
        X_unused = df_param.columns

    #     print(X_used)
    #     print(X_unused)
        if int(y.shape[0]*0.85) < 2:
            return
        recursive_kpca_iterator(list(X_used), list(X_unused), y, date, df_param, 10000.0) # instead of saving, just print the result

        # At the end of the procedure the best performing KPCA components will be printed with the RMSFE of the best GT-based model
    
    perform_recursive_kpca_iterator(interpolated_data)
#     print(interpolated_data.columns.tolist()[0],type(interpolated_data.columns.tolist()[0]))
    print(selected_features)
#     print(selected_features[0], type(selected_features[0]))
#     print(set(interpolated_data.columns.tolist()).intersection(set(selected_features)))
    selected_data = interpolated_data[selected_features]
    selected_data['date'] = dates
    selected_data.to_csv('./selected_kpca_components_' + csv_part + '/%s.csv' % ticker)



In [26]:
relevant_csvs = [
 'sma_fbd_2.csv',
 'sma_fbp_2.csv',
 'sma_fbup_2.csv',
 'sma_insd_2.csv',
 'sma_insp_2.csv',
 'sma_twtd_2.csv',
 'sma_twtt_2.csv',
 'sma_ytcd_2.csv',
 'sma_ytvd_2.csv']

for csv in relevant_csvs:
    csv_str = csv.split('_2.')[0]
    os.mkdir('./selected_kpca_components_' + csv_str)
    print(csv_str)
    for ticker in available_tickers[csv_str]:
        print(ticker)
        select_kpca_components(ticker, csv_str.lower())

sma_fbd
AAPL
['SMA_FBD_organic-admin_post_shares', 'SMA_FBD_fans', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
4.6570748401641234e-05
['SMA_FBD_organic-admin_post_shares', 'SMA_FBD_fans', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
ABBV


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['SMA_FBD_admin_post_count', 'SMA_FBD_fan_post_count', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
0.14045888829984743
['SMA_FBD_admin_post_count', 'SMA_FBD_fan_post_count', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
ABC
['SMA_FBD_new_fans', 'Adj_Close_Lag', 'SMA_FBD_admin_post_likes', 'SMA_FBD_people_talking_about']
0.00010942747591763391
['SMA_FBD_new_fans', 'Adj_Close_Lag', 'SMA_FBD_admin_post_likes', 'SMA_FBD_people_talking_about']
ABT
['Unnamed: 0.1.1', 'SMA_FBD_new_fans', 'Unnamed: 0', 'SMA_FBD_engagement_score', 'Adj_Close_Lag']
0.00029165803488950814
['Unnamed: 0.1.1', 'SMA_FBD_new_fans', 'Unnamed: 0', 'SMA_FBD_engagement_score', 'Adj_Close_Lag']
ADBE
['Unnamed: 0', 'Unnamed: 0.1.1', 'Adj_Close_Lag', 'SMA_FBD_admin_post_count']
5.948255497045807e-06
['Unnamed: 0', 'Unnamed: 0.1.1', 'Adj_Close_Lag', 'SMA_FBD_admin_post_count']
ADI
['SMA_FBD_admin_post_count', 'SMA_FBD_fan_post_count', 'Unnamed: 0.1', 'SMA_F

['SMA_FBD_admin_post_count', 'Adj_Close_Lag', 'SMA_FBD_organic-admin_post_likes', 'SMA_FBD_admin_post_likes']
0.4909388880367139
['SMA_FBD_admin_post_count', 'Adj_Close_Lag', 'SMA_FBD_organic-admin_post_likes', 'SMA_FBD_admin_post_likes']
CI
['SMA_FBD_organic-admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_admin_post_likes']
0.0003048798832972037
['SMA_FBD_organic-admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_admin_post_likes']
CLX
['SMA_FBD_admin_post_count', 'Unnamed: 0.1', 'SMA_FBD_new_fans', 'Unnamed: 0', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag']
0.00015228002933165286
['SMA_FBD_admin_post_count', 'Unnamed: 0.1', 'SMA_FBD_new_fans', 'Unnamed: 0', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag']
CMI
['SMA_FBD_fan_post_count', 'SMA_FBD_admin_post_reach', 'SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag']
6.298982670705118e-05
['SMA_FBD_fan_post_count', 'SMA_FBD_admin_post_reach', 'SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_admin_post_likes', 'Adj_Clo

['Unnamed: 0.1.1', 'Adj_Close_Lag', 'SMA_FBD_admin_post_comments']
0.0028992403650072163
['Unnamed: 0.1.1', 'Adj_Close_Lag', 'SMA_FBD_admin_post_comments']
ES
['SMA_FBD_admin_post_count', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
0.0008896339903408067
['SMA_FBD_admin_post_count', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
ESRX
['Adj_Close_Lag', 'SMA_FBD_fan_post_count']
0.7190200145362777
['Adj_Close_Lag', 'SMA_FBD_fan_post_count']
F
['SMA_FBD_fan_post_count', 'SMA_FBD_organic-admin_post_likes', 'SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag']
1.0539633715392515e-06
['SMA_FBD_fan_post_count', 'SMA_FBD_organic-admin_post_likes', 'SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag']
FCX
['Unnamed: 0', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_fans']
0.43280290781683206
['Unnamed: 0', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_fans']
FD

['SMA_FBD_admin_post_reach', 'Adj_Close_Lag']
0.0011316232843347773
['SMA_FBD_admin_post_reach', 'Adj_Close_Lag']
IP
['SMA_FBD_admin_post_count', 'SMA_FBD_new_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_comments', 'SMA_FBD_admin_post_likes', 'SMA_FBD_organic-admin_post_count', 'Adj_Close_Lag']
0.26616563523445225
['SMA_FBD_admin_post_count', 'SMA_FBD_new_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_comments', 'SMA_FBD_admin_post_likes', 'SMA_FBD_organic-admin_post_count', 'Adj_Close_Lag']
JNJ
['SMA_FBD_organic-admin_post_reach', 'SMA_FBD_new_fans', 'SMA_FBD_admin_post_shares', 'SMA_FBD_organic-admin_post_count', 'Adj_Close_Lag']
1.4649448447635188e-05
['SMA_FBD_organic-admin_post_reach', 'SMA_FBD_new_fans', 'SMA_FBD_admin_post_shares', 'SMA_FBD_organic-admin_post_count', 'Adj_Close_Lag']
JNPR
['SMA_FBD_organic-admin_post_shares', 'SMA_FBD_fan_post_count', 'SMA_FBD_new_fans', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_likes', 'Adj_Close_Lag']
0.502285096175339

['Unnamed: 0.1.1', 'SMA_FBD_new_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag']
0.813520157754958
['Unnamed: 0.1.1', 'SMA_FBD_new_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag']
OXY
['Adj_Close_Lag', 'SMA_FBD_people_talking_about']
0.05774447739697824
['Adj_Close_Lag', 'SMA_FBD_people_talking_about']
PEP
['SMA_FBD_admin_post_count', 'SMA_FBD_organic-admin_post_shares', 'SMA_FBD_new_fans', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
0.00027349139675599416
['SMA_FBD_admin_post_count', 'SMA_FBD_organic-admin_post_shares', 'SMA_FBD_new_fans', 'SMA_FBD_admin_post_shares', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
PFE
['SMA_FBD_admin_post_count', 'Unnamed: 0', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
0.03127932123722843
['SMA_FBD_admin_post_count', 'Unnamed: 0', 'SMA_FBD_admin_post_comme

['Adj_Close_Lag', 'SMA_FBD_new_fans', 'SMA_FBD_people_talking_about']
0.0001496969766224995
['Adj_Close_Lag', 'SMA_FBD_new_fans', 'SMA_FBD_people_talking_about']
VZ
['SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
4.8985132996674525e-05
['SMA_FBD_new_fans', 'SMA_FBD_fans', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'Adj_Close_Lag', 'SMA_FBD_people_talking_about']
WBA
['SMA_FBD_admin_post_count', 'Unnamed: 0', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'SMA_FBD_organic-admin_post_comments', 'Adj_Close_Lag']
0.012852484463223846
['SMA_FBD_admin_post_count', 'Unnamed: 0', 'SMA_FBD_engagement_score', 'SMA_FBD_admin_post_shares', 'SMA_FBD_admin_post_comments', 'SMA_FBD_organic-admin_post_comments', 'Adj_Close_Lag']
WDC
['Adj_Close_Lag', 'SMA_FBD_new_fans', 'SMA_FBD_fans']
0.00100

['SMA_FBP_type_PHOTO', 'Adj_Close_Lag']
0.00035619277673179517
['SMA_FBP_type_PHOTO', 'Adj_Close_Lag']
BBY
['SMA_FBP_sentiment_UNDEFINED', 'SMA_FBP_reactions-angry', 'Adj_Close_Lag', 'SMA_FBP_type_VIDEO']
6.260406510158467e-05
['SMA_FBP_sentiment_UNDEFINED', 'SMA_FBP_reactions-angry', 'Adj_Close_Lag', 'SMA_FBP_type_VIDEO']
BCR
['SMA_FBP_type_VIDEO', 'SMA_FBP_reach', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_reactions_love', 'SMA_FBP_shares', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
14.491764083211427
['SMA_FBP_type_VIDEO', 'SMA_FBP_reach', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_reactions_love', 'SMA_FBP_shares', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
BDX
['SMA_FBP_type_VIDEO', 'SMA_FBP_shares', 'SMA_FBP_type_STATUS', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
0.03146076770415451
['SMA_FBP_type_VIDEO', 'SMA_FBP_shares', 'SMA_FBP_type_STATUS', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
BIIB
['SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag', 'SMA

CSRA
['SMA_FBP_type_PHOTO', 'SMA_FBP_engagement_score', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_total-reactions', 'SMA_FBP_shares', 'SMA_FBP_reactions-like', 'SMA_FBP_type_UNKNOWN', 'Adj_Close_Lag']
0.540158259588821
['SMA_FBP_type_PHOTO', 'SMA_FBP_engagement_score', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_total-reactions', 'SMA_FBP_shares', 'SMA_FBP_reactions-like', 'SMA_FBP_type_UNKNOWN', 'Adj_Close_Lag']
CVS
['SMA_FBP_type_VIDEO', 'SMA_FBP_sentiment_UNDEFINED', 'Adj_Close_Lag', 'SMA_FBP_engagement_score']
4.976656811329222e-05
['SMA_FBP_type_VIDEO', 'SMA_FBP_sentiment_UNDEFINED', 'Adj_Close_Lag', 'SMA_FBP_engagement_score']
CVX
['SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_reactions-like', 'SMA_FBP_type_STATUS', 'SMA_FBP_type_UNKNOWN', 'Adj_Close_Lag']
0.0005452742726243032
['SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_reactions-like', 'SMA_FBP_type_STATUS', 'SMA_FBP_type_UNKNOWN', 'Adj_Close_Lag']
CXO
['SMA_FBP_comme

GOOGL
['SMA_FBP_reactions-wow', 'SMA_FBP_type_PHOTO', 'Adj_Close_Lag', 'SMA_FBP_engagement_score']
2.5724688764092174e-05
['SMA_FBP_reactions-wow', 'SMA_FBP_type_PHOTO', 'Adj_Close_Lag', 'SMA_FBP_engagement_score']
HAL
['SMA_FBP_reactions-sad', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_VIDEO', 'SMA_FBP_engagement_score', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_reactions-like', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
0.580389177910423
['SMA_FBP_reactions-sad', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_VIDEO', 'SMA_FBP_engagement_score', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_reactions-like', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
HBI
['SMA_FBP_type_NOTE', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
4.15911333201772e-05
['SMA_FBP_type_NOTE', 'SMA_FBP_type_MUSIC', 'Adj_Close_Lag']
HCA
['SMA_FBP_type_VIDEO', 'SMA_FBP_reactions-sad', 'SMA_FBP_type_PHOTO', 'SMA_FBP_engagement_score', 'SMA_FBP_reactions-wow', 'SMA_FBP_comments', 'SMA_FBP_reactions_love', '

['SMA_FBP_reactions-wow', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_total-reactions', 'SMA_FBP_type_NOTE', 'SMA_FBP_type_STATUS', 'SMA_FBP_type_MUSIC', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
0.0009310143685257524
['SMA_FBP_reactions-wow', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_total-reactions', 'SMA_FBP_type_NOTE', 'SMA_FBP_type_STATUS', 'SMA_FBP_type_MUSIC', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
LRCX
['SMA_FBP_sentiment_NEUTRAL', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_PHOTO', 'Adj_Close_Lag']
1.2056096784722903
['SMA_FBP_sentiment_NEUTRAL', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_PHOTO', 'Adj_Close_Lag']
LVLT
['SMA_FBP_type_VIDEO', 'SMA_FBP_type_PHOTO', 'SMA_FBP_reactions-wow', 'SMA_FBP_reactions-like', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
0.26612507686142206
['SMA_FBP_type_VIDEO', 'SMA_FBP_type_PHOTO', 'SMA_FBP_reactions-wow', 'SMA_FBP_reactions-like', 'SMA_FBP_impressions', 'SMA_FBP_type_MUSIC', 'SMA_FBP_sentiment_

['SMA_FBP_type_VIDEO', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_PHOTO', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_total-reactions', 'Adj_Close_Lag']
3.531283029238941
['SMA_FBP_type_VIDEO', 'SMA_FBP_reactions-angry', 'SMA_FBP_type_PHOTO', 'SMA_FBP_sentiment_POSITIVE', 'SMA_FBP_comments', 'SMA_FBP_total-reactions', 'Adj_Close_Lag']
QCOM
['SMA_FBP_reactions-sad', 'SMA_FBP_engagement_score', 'SMA_FBP_comments', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
1.1380008274636886
['SMA_FBP_reactions-sad', 'SMA_FBP_engagement_score', 'SMA_FBP_comments', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
R
['SMA_FBP_reactions_love', 'Adj_Close_Lag', 'SMA_FBP_reactions-wow']
0.0002475356683095953
['SMA_FBP_reactions_love', 'Adj_Close_Lag', 'SMA_FBP_reactions-wow']
REGN
['SMA_FBP_engagement_score', 'SMA_FBP_comments', 'SMA_FBP_type_STATUS', 'SMA_FBP_sentiment_NEUTRAL', 'Adj_Close_Lag']
0.15530216762604088
['SMA_FBP_engagement_score', 'SMA_FBP_comments', 'SMA_FBP_type_STATUS', 'SMA_FBP_sentiment_NEUTRAL', 'A

['SMA_FBUP_response_time', 'SMA_FBUP_type_PHOTO', 'Adj_Close_Lag']
0.3324312915218691
['SMA_FBUP_response_time', 'SMA_FBUP_type_PHOTO', 'Adj_Close_Lag']
ABT
['SMA_FBUP_sentiment_UNDEFINED', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE']
0.5564016774912909
['SMA_FBUP_sentiment_UNDEFINED', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE']
ADBE
['SMA_FBUP_type_VIDEO', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_shares', 'SMA_FBUP_response_time', 'Adj_Close_Lag']
0.00017233426322781542
['SMA_FBUP_type_VIDEO', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_shares', 'SMA_FBUP_response_time', 'Adj_Close_Lag']
ADI
['Unnamed: 0.1.1', 'SMA_FBUP_sentiment_POSITIVE', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_NEUTRAL']
0.16818179232967623
['Unnamed: 0.1.1', 'SMA_FBUP_sentiment_POSITIVE', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_NEUTRAL']
ADM
['Unnamed: 0', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'Unnamed: 0.1']
0.4419235791199583
['Unnamed: 0', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'Unnamed: 0.1']
ADP
['Unnamed: 0', 'S

['SMA_FBUP_likes', 'SMA_FBUP_type_UNKNOWN', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE']
9.061463514894313e-06
['SMA_FBUP_likes', 'SMA_FBUP_type_UNKNOWN', 'Adj_Close_Lag', 'SMA_FBUP_sentiment_POSITIVE']
ES
['SMA_FBUP_sentiment_UNDEFINED', 'Unnamed: 0.1.1', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'SMA_FBUP_likes']
1.5720688988778853
['SMA_FBUP_sentiment_UNDEFINED', 'Unnamed: 0.1.1', 'SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'SMA_FBUP_likes']
ESRX
['Unnamed: 0.1.1', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
0.0006169143352243722
['Unnamed: 0.1.1', 'SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
F
['SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
3.58176273609508e-07
['SMA_FBUP_shares', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
FCX
['SMA_FBUP_comments', 'Adj_Close_Lag']
0.003684955894823193
['SMA_FBUP_comments', 'Adj_Close_Lag']
FDX
['S

MSI
['SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_type_VIDEO', 'SMA_FBUP_sentiment_POSITIVE', 'Unnamed: 0', 'SMA_FBUP_shares', 'SMA_FBUP_comments', 'Adj_Close_Lag']
0.0027770398141222508
['SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_type_VIDEO', 'SMA_FBUP_sentiment_POSITIVE', 'Unnamed: 0', 'SMA_FBUP_shares', 'SMA_FBUP_comments', 'Adj_Close_Lag']
MU
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_response_time', 'Adj_Close_Lag', 'SMA_FBUP_likes']
0.8889545590850243
['SMA_FBUP_sentiment_POSITIVE', 'SMA_FBUP_type_UNKNOWN', 'SMA_FBUP_response_time', 'Adj_Close_Lag', 'SMA_FBUP_likes']
NOC
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_response_time', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'SMA_FBUP_likes']
1.9084003441927493
['SMA_FBUP_sentiment_NEUTRAL', 'SMA_FBUP_response_time', 'SMA_FBUP_comments', 'Adj_Close_Lag', 'SMA_FBUP_likes']
NVDA
['SMA_FBUP_sentiment_UNDEFINED', 'SMA_FBUP_type_PHOTO', 'Adj_Close_Lag', 'SMA_FBUP_type_VIDEO']
0.4193178781215293
['SMA_FBUP_sentiment_UNDEFINED'

['Unnamed: 0', 'Adj_Close_Lag', 'SMA_INSD_followers_count', 'SMA_INSD_followees_count']
0.0009095498713698191
['Unnamed: 0', 'Adj_Close_Lag', 'SMA_INSD_followers_count', 'SMA_INSD_followees_count']
C
['Adj_Close_Lag', 'SMA_INSD_engagement_score']
0.00557464312270129
['Adj_Close_Lag', 'SMA_INSD_engagement_score']
CAG
['SMA_INSD_total_posts_count', 'Adj_Close_Lag', 'SMA_INSD_followees_count']
4.410140710879899e-05
['SMA_INSD_total_posts_count', 'Adj_Close_Lag', 'SMA_INSD_followees_count']
CAT
['SMA_INSD_posts_count', 'Adj_Close_Lag']
2.4683106474576646
['SMA_INSD_posts_count', 'Adj_Close_Lag']
CCL
['SMA_INSD_total_posts_count', 'Adj_Close_Lag']
9.827723992230763e-05
['SMA_INSD_total_posts_count', 'Adj_Close_Lag']
CHK
['Adj_Close_Lag', 'SMA_INSD_followers_count']
0.006695902805715536
['Adj_Close_Lag', 'SMA_INSD_followers_count']
CI
['SMA_INSD_likes_count', 'SMA_INSD_posts_count', 'Adj_Close_Lag', 'SMA_INSD_comments_count']
1.3070873078134624
['SMA_INSD_likes_count', 'SMA_INSD_posts_count'

0.1661955745546706
['Unnamed: 0', 'SMA_INSD_posts_count', 'Adj_Close_Lag', 'SMA_INSD_comments_count']
MCD
['SMA_INSD_posts_count', 'Adj_Close_Lag']
7.598725202046528e-05
['SMA_INSD_posts_count', 'Adj_Close_Lag']
MCK
['SMA_INSD_posts_count', 'Adj_Close_Lag']
2.416032854735323
['SMA_INSD_posts_count', 'Adj_Close_Lag']
MNST
['SMA_INSD_likes_count', 'SMA_INSD_comments_count', 'SMA_INSD_followees_count', 'Adj_Close_Lag', 'SMA_INSD_engagement_score']
0.0017411496987836048
['SMA_INSD_likes_count', 'SMA_INSD_comments_count', 'SMA_INSD_followees_count', 'Adj_Close_Lag', 'SMA_INSD_engagement_score']
MRK
['Adj_Close_Lag', 'SMA_INSD_engagement_score', 'SMA_INSD_comments_count', 'SMA_INSD_followees_count']
0.000393355229941994
['Adj_Close_Lag', 'SMA_INSD_engagement_score', 'SMA_INSD_comments_count', 'SMA_INSD_followees_count']
MRO
['SMA_INSD_likes_count', 'SMA_INSD_posts_count', 'Adj_Close_Lag', 'SMA_INSD_followees_count']
0.20113275420634558
['SMA_INSD_likes_count', 'SMA_INSD_posts_count', 'Adj_Cl

['Unnamed: 0', 'SMA_INSP_engagement_score', 'SMA_INSP_comments_count', 'Adj_Close_Lag']
0.005494735395053851
['Unnamed: 0', 'SMA_INSP_engagement_score', 'SMA_INSP_comments_count', 'Adj_Close_Lag']
BBY
['Unnamed: 0.1', 'Unnamed: 0.1.1', 'SMA_INSP_likes_count', 'Unnamed: 0', 'SMA_INSP_type_video', 'Adj_Close_Lag']
1.4484932290030983
['Unnamed: 0.1', 'Unnamed: 0.1.1', 'SMA_INSP_likes_count', 'Unnamed: 0', 'SMA_INSP_type_video', 'Adj_Close_Lag']
C
['Unnamed: 0', 'Adj_Close_Lag']
0.018077381634776124
['Unnamed: 0', 'Adj_Close_Lag']
CAG
['SMA_INSP_type_video', 'SMA_INSP_likes_count', 'Adj_Close_Lag']
0.010529713269552017
['SMA_INSP_type_video', 'SMA_INSP_likes_count', 'Adj_Close_Lag']
CAT
['Unnamed: 0', 'Unnamed: 0.1.1', 'SMA_INSP_comments_count', 'Adj_Close_Lag']
0.4996292812487013
['Unnamed: 0', 'Unnamed: 0.1.1', 'SMA_INSP_comments_count', 'Adj_Close_Lag']
CCL
['SMA_INSP_engagement_score', 'SMA_INSP_type_video', 'SMA_INSP_comments_count', 'Adj_Close_Lag']
3.518377206518559e-05
['SMA_INSP_e

['SMA_INSP_engagement_score', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
0.5511163412423823
['SMA_INSP_engagement_score', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
MS
['SMA_INSP_type_video', 'Adj_Close_Lag']
0.7786864508132191
['SMA_INSP_type_video', 'Adj_Close_Lag']
MSFT
['SMA_INSP_engagement_score', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
0.0009631011110738941
['SMA_INSP_engagement_score', 'Unnamed: 0.1.1', 'Adj_Close_Lag']
MSI
['SMA_INSP_engagement_score', 'Adj_Close_Lag']
1.774671633668782
['SMA_INSP_engagement_score', 'Adj_Close_Lag']
MU
['SMA_INSP_type_video', 'Adj_Close_Lag']
0.28709069378601976
['SMA_INSP_type_video', 'Adj_Close_Lag']
NOC
['SMA_INSP_likes_count', 'Adj_Close_Lag']
0.7764353709317728
['SMA_INSP_likes_count', 'Adj_Close_Lag']
NOV
['SMA_INSP_engagement_score', 'Adj_Close_Lag']
0.490551679565634
['SMA_INSP_engagement_score', 'Adj_Close_Lag']
NVDA
['Adj_Close_Lag']
0.3937665803179107
['Adj_Close_Lag']
PEP
['Adj_Close_Lag']
0.0002559755304509068
['Adj_Close_Lag']
PFE
['Unnamed: 0.1', 'Un

['Adj_Close_Lag']
7.357730609148293e-05
['Adj_Close_Lag']
BAX
['Adj_Close_Lag', 'SMA_TWTD_tweets_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_engagement_score']
0.05289270668521071
['Adj_Close_Lag', 'SMA_TWTD_tweets_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_engagement_score']
BBY
['SMA_TWTD_replies_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_followers_count', 'Unnamed: 0.1', 'Unnamed: 0', 'SMA_TWTD_followees_count', 'SMA_TWTD_favorites_count', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
0.13510263546884005
['SMA_TWTD_replies_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_followers_count', 'Unnamed: 0.1', 'Unnamed: 0', 'SMA_TWTD_followees_count', 'SMA_TWTD_favorites_count', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
BDX
['SMA_TWTD_total_tweets_count', 'SMA_TWTD_tweets_count', 'SMA_TWTD_replies_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_followers_count', 'SMA_TWTD_retweets_coun

['SMA_TWTD_tweets_count', 'Unnamed: 0.1', 'Unnamed: 0', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag']
1.3779474349590697
['SMA_TWTD_tweets_count', 'Unnamed: 0.1', 'Unnamed: 0', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag']
EFX
['SMA_TWTD_replies_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
0.0002884574960541758
['SMA_TWTD_replies_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_brand_replies_count', 'Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
EIX
['Adj_Close_Lag', 'SMA_TWTD_brand_proactive_count']
0.0006693375469783783
['Adj_Close_Lag', 'SMA_TWTD_brand_proactive_count']
ESRX
['SMA_TWTD_tweets_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_followers_count', 'Unnamed: 0', 'SMA_TWTD_followees_count', 'Adj_Close_Lag']
1.0134394539914808
['SMA_TWTD_tweets_count', 'SMA_TWTD_brand_proactive_count', 'SMA_TWTD_followers_count', 'Unnamed: 0', 'SMA_TWTD_followees_count', 'Adj_Close_Lag']
F
['SMA_TWTD_total_tweets_cou

['Unnamed: 0', 'SMA_TWTD_followees_count', 'Adj_Close_Lag']
4.492075275808194e-05
['Unnamed: 0', 'SMA_TWTD_followees_count', 'Adj_Close_Lag']
KHC
['Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
0.00021986236613159946
['Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
KMB
['SMA_TWTD_brand_proactive_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_engagement_score', 'SMA_TWTD_favorites_count', 'SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag']
0.003479463638749114
['SMA_TWTD_brand_proactive_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_engagement_score', 'SMA_TWTD_favorites_count', 'SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag']
KMI
['SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag']
0.0008017802078661429
['SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag']
KO
['SMA_TWTD_tweets_count', 'SMA_TWTD_engagement_score', 'SMA_TWTD_followees_count', 'Adj_Close_Lag', 'SMA_TWTD_average_reply_time']
2.5854072266539517e-05
['SMA_TWTD_tweets_count', 'SMA_TWTD_engagement_score', 'SMA_TWTD_followees_count', 'A

['Adj_Close_Lag']
0.001217228582690697
['Adj_Close_Lag']
PPL
['SMA_TWTD_total_tweets_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_tweets_count', 'Adj_Close_Lag']
2.8976541706360326e-07
['SMA_TWTD_total_tweets_count', 'SMA_TWTD_retweets_count', 'SMA_TWTD_tweets_count', 'Adj_Close_Lag']
PSX
['SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag', 'SMA_TWTD_engagement_score']
0.080188661712362
['SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag', 'SMA_TWTD_engagement_score']
PVH
['SMA_TWTD_followees_count', 'SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag', 'SMA_TWTD_brand_proactive_count']
7.928296738111788e-05
['SMA_TWTD_followees_count', 'SMA_TWTD_brand_retweets_count', 'Adj_Close_Lag', 'SMA_TWTD_brand_proactive_count']
QCOM
['SMA_TWTD_followees_count', 'Adj_Close_Lag', 'SMA_TWTD_replies_count', 'SMA_TWTD_favorites_count']
0.00036273342862989174
['SMA_TWTD_followees_count', 'Adj_Close_Lag', 'SMA_TWTD_replies_count', 'SMA_TWTD_favorites_count']
R
['SMA_TWTD_brand_proactive_count', 'SMA_TWTD_follo

AES
['SMA_TWTT_retweet_count', 'Adj_Close_Lag']
0.0012662013361259689
['SMA_TWTT_retweet_count', 'Adj_Close_Lag']
AET
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.05881445114278365
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
AMAT
['SMA_TWTT_retweet_count', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.09068597407927959
['SMA_TWTT_retweet_count', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
AMD
['SMA_TWTT_engagement_score', 'Adj_Close_Lag']
0.002830476836376077
['SMA_TWTT_engagement_score', 'Adj_Close_Lag']
AMGN
['SMA_TWTT_engagement_score', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
1.773653155238454e-05
['SMA_TWTT_engagement_score', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
ANTM
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.06853117343783498
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
APA
['Unnamed: 0', 'SMA_TWTT_replies_count', 'Adj_Close_Lag', 'SMA_TWTT_engagement_score']
0.3837971821627164
['Unnamed: 0', 'SMA_TWTT_replies_count', 'Adj_Close_Lag', 'SMA_TWTT_engagement_score']
BA
['Un

HPE
['SMA_TWTT_favorite_count', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
1.4162609398583768e-05
['SMA_TWTT_favorite_count', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
HPQ
['Unnamed: 0', 'Adj_Close_Lag']
0.0006157522091037176
['Unnamed: 0', 'Adj_Close_Lag']
HRS
['SMA_TWTT_retweet_count', 'Adj_Close_Lag']
8.164137734745919e-05
['SMA_TWTT_retweet_count', 'Adj_Close_Lag']
HSIC
['SMA_TWTT_engagement_score', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.00025643412015879073
['SMA_TWTT_engagement_score', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
HUM
['SMA_TWTT_engagement_score', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.024985038415629393
['SMA_TWTT_engagement_score', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
IBM
['SMA_TWTT_favorite_count', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
2.4495764504481424e-05
['SMA_TWTT_favorite_count', 'Adj_Close_Lag', 'SMA_TWTT_replies_count']
IFF
['Unnamed: 0', 'Adj_Close_Lag']
1.572018900329832
['Unnamed: 0', 'Adj_Close_Lag']
INCY
['SMA_TWTT_engagement_

['Unnamed: 0', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag', 'SMA_TWTT_favorite_count']
0.00226834054441305
['Unnamed: 0', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag', 'SMA_TWTT_favorite_count']
TXT
['Adj_Close_Lag']
0.26285690815602825
['Adj_Close_Lag']
UHS
['Unnamed: 0', 'Adj_Close_Lag']
0.19511424719085824
['Unnamed: 0', 'Adj_Close_Lag']
UNH
['SMA_TWTT_engagement_score', 'SMA_TWTT_favorite_count', 'Unnamed: 0', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
0.011128960289056522
['SMA_TWTT_engagement_score', 'SMA_TWTT_favorite_count', 'Unnamed: 0', 'SMA_TWTT_retweet_count', 'Adj_Close_Lag']
UPS
['Adj_Close_Lag']
0.0009395794757375278
['Adj_Close_Lag']
UTX
['Unnamed: 0', 'Unnamed: 0.1', 'Adj_Close_Lag']
0.007906825795366501
['Unnamed: 0', 'Unnamed: 0.1', 'Adj_Close_Lag']
VFC
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.00013278373186521338
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
VZ
['Adj_Close_Lag', 'SMA_TWTT_replies_count']
0.0010281708931851998
['Adj_Close_Lag', 'SMA_TWTT_replies_count'

['Unnamed: 0', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.10286370926472443
['Unnamed: 0', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
DD
['Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.050513875425943426
['Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
DE
['Unnamed: 0', 'SMA_YTCD_uploads_count', 'Adj_Close_Lag']
0.5046367826714737
['Unnamed: 0', 'SMA_YTCD_uploads_count', 'Adj_Close_Lag']
DGX
['Unnamed: 0', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
1.0340520368051358
['Unnamed: 0', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
DISH
['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
0.3572813548113123
['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
DPS
['Unnamed: 0', 'Adj_Close_Lag']
0.9971874663620293
['Unnamed: 0', 'Adj_Close_Lag']
DVA
['SMA_YTCD_uploads_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']


['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
0.0033017531050413885
['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
MRO
['SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.16998531082679708
['SMA_YTCD_page_views_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
MS
['Unnamed: 0.1', 'Unnamed: 0', 'SMA_YTCD_uploads_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.712549975636783
['Unnamed: 0.1', 'Unnamed: 0', 'SMA_YTCD_uploads_count', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
MSFT
['Adj_Close_Lag']
0.0018365283886300696
['Adj_Close_Lag']
MSI
['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
0.03058149798539362
['SMA_YTCD_uploads_count', 'SMA_YTCD_page_views_count', 'Adj_Close_Lag']
MU
['Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.546189283510691
['Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
NBL
['Unnamed: 0', 'Adj_Close_Lag', 'SMA_YTCD_subscribers_count']
0.3976250

['SMA_YTVD_comments_count', 'Unnamed: 0.1.1', 'SMA_YTVD_views_count', 'SMA_YTVD_yt_duration', 'Unnamed: 0', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
0.2678667719562937
['SMA_YTVD_comments_count', 'Unnamed: 0.1.1', 'SMA_YTVD_views_count', 'SMA_YTVD_yt_duration', 'Unnamed: 0', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
AMGN
['SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_yt_duration', 'SMA_YTVD_comments_count']
0.3789811696310872
['SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_yt_duration', 'SMA_YTVD_comments_count']
AMT
['SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_yt_duration', 'SMA_YTVD_comments_count']
ANTM
['SMA_YTVD_dislikes_count', 'SMA_YTVD_comments_count', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
0.27281548574342607
['SMA_YTVD_dislikes_count', 'SMA_YTVD_comments_count', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
APA
['SMA_YTVD_dislikes_count', 'SMA_YTVD_comments_cou

['SMA_YTVD_dislikes_count', 'Unnamed: 0.1.1', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
0.020971611844834648
['SMA_YTVD_dislikes_count', 'Unnamed: 0.1.1', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
GM
['Adj_Close_Lag', 'SMA_YTVD_comments_count']
0.0020606163964525456
['Adj_Close_Lag', 'SMA_YTVD_comments_count']
GOOGL
['SMA_YTVD_comments_count', 'Unnamed: 0.1.1', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'Adj_Close_Lag']
0.35780848499933327
['SMA_YTVD_comments_count', 'Unnamed: 0.1.1', 'SMA_YTVD_yt_duration', 'SMA_YTVD_views_count', 'Adj_Close_Lag']
HAL
['Unnamed: 0.1.1', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
3.7022282949666234
['Unnamed: 0.1.1', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
HBI
['Unnamed: 0', 'Adj_Close_Lag']
0.0705818556328568
['Unnamed: 0', 'Adj_Close_Lag']
HCA
['Adj_Close_Lag', 'SMA_YTVD_views_count']
0.01917984

['SMA_YTVD_comments_count', 'Unnamed: 0.1', 'SMA_YTVD_yt_duration', 'Unnamed: 0', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
0.16893229095301554
['SMA_YTVD_comments_count', 'Unnamed: 0.1', 'SMA_YTVD_yt_duration', 'Unnamed: 0', 'SMA_YTVD_likes_count', 'Adj_Close_Lag']
SWKS
['SMA_YTVD_dislikes_count', 'Adj_Close_Lag']
2.8292002788831363
['SMA_YTVD_dislikes_count', 'Adj_Close_Lag']
SYMC
['Unnamed: 0.1.1', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
0.00022635229259182488
['Unnamed: 0.1.1', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
SYY
['Unnamed: 0.1.1', 'Adj_Close_Lag']
0.9289082398375967
['Unnamed: 0.1.1', 'Adj_Close_Lag']
TAP
['SMA_YTVD_likes_count', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
0.49969370110571526
['SMA_YTVD_likes_count', 'SMA_YTVD_dislikes_count', 'Adj_Close_Lag', 'SMA_YTVD_comments_count']
TGT
['Unnamed: 0', 'Unnamed: 0.1.1', 'Adj_Close_Lag', 'SMA_YTVD_yt_duration']
0.026288552901063245
['

Other old stuff

In [None]:
dfsSMA = {}

relevant_csvs = ['SMA_TWTT.csv',
 'SMA_YTCD.csv',
 'SMA_FBP.csv',
 'SMA_TWTD.csv',
 'SMA_INSP.csv',
 'SMA_INSD.csv',
 'SMA_FBD.csv',
 'SMA_YTVD.csv']

for e in relevant_csvs:
    dfsSMA = {}
    dfsSMA[e[:-4]] = pd.read_csv(os.path.join('datasets', 'SMA', e))

In [None]:
dfsSMA_cc = {}

for indicator in dfsSMA.keys():
    df_temp = dfsSMA[indicator]
    df_temp = df_temp[df_temp['brand_ticker'].isin(correct_tickers_set)]
    dfsSMA_cc[indicator] = df_temp

In [None]:
dfsSMA_cc_ra = {} # relevant attributes only
dfsSMA_cc_ra_price = {}

for indicator in dfsSMA_cc.keys():
    dfsSMA_cc_ra[indicator] = dfsSMA_cc[indicator][metrics[indicator]]
    # rename the indicators to include also the general name - this is needed for one big frame
    metrics_dct = {}
    for metric in metrics[indicator]:
        if metric != 'brand_ticker' and metric != 'date':
            new_metric_name = indicator + '_' + metric
            metrics_dct[metric] = new_metric_name
    dfsSMA_cc_ra[indicator] = dfsSMA_cc_ra[indicator].rename(columns=metrics_dct)
    dfsSMA_cc_ra[indicator] = dfsSMA_cc_ra[indicator].reset_index(drop=True)
    dfsSMA_cc_ra_price[indicator] = pd.merge(dfsSMA_cc_ra[indicator], stock_files_df,on=['brand_ticker', 'date'], how='inner')

In [None]:
for indicator in categ_metrics.keys():
    if not categ_metrics[indicator]:
        for categ_var in categ_metrics[indicator]:
            dfsSMA_cc_ra_price[indicator][categ_var] = dfsSMA_cc_ra_price[indicator][categ_var].astype('category')
        non_categ_vars = list(set(dfsSMA_cc_ra_price[indicator].columns)-set(categ_metrics[indicator]))
    
    sma_temp_price_with_categs = pd.get_dummies(dfsSMA_cc_ra_price[categ_var], drop_first=True)
    # drop first does it correctly even for multiple categorical variables
    sma_temp_price_with_categs[non_categ_vars] = dfsSMA_cc_ra_price[indicator][non_categ_vars]
    sma_temp_price_with_categs.to_csv(indicator.lower() + '_2.csv')