 ## C add attributes 2 to trade log
 Design matrix is one record per row

In [1]:
# imports

import pandas as pd
import numpy as np # for np.nan
import os # for path

import yfinance as yf

import json

from pandas_datareader.quandl import QuandlReader #data side

In [2]:
# read in raw data
### INPUT ###

# formatted tradelog
trades_filename = 'output/b_completewattr.csv'
df_raw = pd.read_csv(trades_filename)

In [3]:
# pull data from yahoo finance

reload_data = False

tickers = ['^VIX' , '^GSPC']
if reload_data: 
    df_data = yf.download(
        ' '.join(tickers), 
        start="2010-01-01", end="2020-12-01", 
        group_by='Tickers'
    )
    # turn into tabular form
    df_data_formatted = df_data.stack(level=0).reset_index().rename(columns={'level_1':'Ticker'})
    df_data_formatted.to_csv('output/c_mktdata.csv')
else:
    df_data_formatted = pd.read_csv('output/c_mktdata.csv')

df_data_formatted.head()

Unnamed: 0.1,Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume
0,0,2010-01-04,^GSPC,1132.98999,1132.98999,1133.869995,1116.560059,1116.560059,3991400000
1,1,2010-01-04,^VIX,20.040001,20.040001,21.68,20.030001,21.68,0
2,2,2010-01-05,^GSPC,1136.52002,1136.52002,1136.630005,1129.660034,1132.660034,2491020000
3,3,2010-01-05,^VIX,19.35,19.35,20.129999,19.34,20.049999,0
4,4,2010-01-06,^GSPC,1137.140015,1137.140015,1139.189941,1133.949951,1135.709961,4972660000


In [4]:
# pivoted

df_data_pivot = df_data_formatted.pivot(
    index=['Date'],columns=['Ticker'],values=['Close'],
).reset_index()
df_data_pivot.columns = ['_'.join(col).strip() for col in df_data_pivot.columns.values]
df_data_pivot['Date_'] = pd.to_datetime(df_data_pivot['Date_'],errors='coerce')
df_data_pivot.head()



Unnamed: 0,Date_,Close_^GSPC,Close_^VIX
0,2010-01-04,1132.98999,20.040001
1,2010-01-05,1136.52002,19.35
2,2010-01-06,1137.140015,19.16
3,2010-01-07,1141.689941,19.059999
4,2010-01-08,1144.97998,18.129999


In [5]:
# merge

df_source = df_raw.copy(deep=True)

df_source['Open_Date'] = pd.to_datetime(df_source['Open_Date'],errors='coerce')


# need to sort
df_source = df_source.sort_values(['Open_Date']) 
df_data_pivot = df_data_pivot.sort_values(['Date_'])

df_result = pd.merge_asof(
    df_source, df_data_pivot,left_on=['Open_Date'],right_on=['Date_']
)

In [6]:
# pull data from Quandl 

if reload_data:
    
    with open('data/vars.json', 'r') as json_file:
        var_dict = json.load(json_file)

    quandl_key = var_dict['QUANDL_API']

    QR = QuandlReader("AAII/AAII_SENTIMENT",api_key=quandl_key)

    QR_df = QR.read().reset_index()

    QR_df.columns = ['AAII_Sent_' + str(col)  for col in QR_df.columns]
    QR_df.to_csv('output/c_mktdata_aaii.csv')
else: 
    QR_df = pd.read_csv('output/c_mktdata_aaii.csv')

In [7]:
# merge Quandl
QR_df_sorted = QR_df.sort_values(['AAII_Sent_Date'])
QR_df_sorted['AAII_Sent_Date'] = pd.to_datetime(QR_df['AAII_Sent_Date'], errors='coerce')


df_result = pd.merge_asof(
    df_result, QR_df_sorted,left_on=['Open_Date'],right_on=['AAII_Sent_Date'],
    direction='forward',
)

In [8]:
# save output

df_result.to_csv('output/c_resulttradewattr.csv')