In [434]:
# Import libraries and dependencies
import os
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import datetime 

%matplotlib inline

In [435]:
# Load .env enviroment variables
from dotenv import load_dotenv
load_dotenv()

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create an object for aplpaca api 
api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)


In [436]:
sectors_csv = Path("Resources/spy_etf_sectors.csv")
sectors_df = pd.read_csv(sectors_csv)

sectors_df = sectors_df.drop(columns=["Zacks\nG Sector", "\nCompany", "% of\nPortfolio"])

sectors_df = sectors_df.rename(columns={
    "\nTicker": "tickers",
    "Russell\nSector": "sector"
})
sectors_df

Unnamed: 0,tickers,sector
0,ATVI,TECHNOLOGY
1,GOOGL,TECHNOLOGY
2,GOOG,TECHNOLOGY
3,T,UTILITIES
4,CTL,UTILITIES
...,...,...
499,PEG,UTILITIES
500,SRE,UTILITIES
501,SO,UTILITIES
502,WEC,UTILITIES


In [437]:
dxy_csv = Path("Resources/$dxy_historical_data_01-01_to_09-15-2020.csv")
dxy_df = pd.read_csv(dxy_csv)
dxy_df = dxy_df.rename(columns={"Last":"DXY", "Time":"date"})
dxy_df = dxy_df[["date", "DXY"]].dropna()

In [438]:
btc_csv = Path("Resources/btcusd_historical_data_01-01_to_09-15-2020.csv")
btc_df = pd.read_csv(btc_csv)
btc_df = btc_df.rename(columns={"Last":"BTC", "Time":"date"}).dropna()
btc_df = btc_df[["date", "BTC"]]

In [439]:
xau_csv = Path("Resources/btcusd_historical_data_01-01_to_09-15-2020.csv")
xau_df = pd.read_csv(xau_csv)
xau_df = xau_df.rename(columns={"Last":"XAU", "Time":"date"}).dropna()
xau_df = xau_df[["date", "XAU"]]

In [440]:
alternative_assets = xau_df.merge(btc_df, on='date')
alternative_assets = alternative_assets.merge(dxy_df, on='date')
alternative_assets['date'] = pd.to_datetime(alternative_assets['date'], utc=True).dt.date
alternative_assets

Unnamed: 0,date,XAU,BTC,DXY
0,2020-09-15,10866.25,10866.25,93.07
1,2020-09-14,10692.27,10692.27,93.05
2,2020-09-11,10330.04,10330.04,93.27
3,2020-09-10,10304.28,10304.28,93.34
4,2020-09-09,10260.84,10260.84,93.26
...,...,...,...,...
179,2020-01-08,8003.06,8003.06,97.31
180,2020-01-07,8018.82,8018.82,96.96
181,2020-01-06,7580.60,7580.60,96.62
182,2020-01-03,7278.13,7278.13,96.89


In [441]:
# Set the ticker to stripes of 100
ticker_list = sectors_df["tickers"].to_list()
ticker_stripe_1 = ticker_list[0:99]
ticker_stripe_2 = ticker_list[99:198]
ticker_stripe_3 = ticker_list[198:297]
ticker_stripe_4 = ticker_list[297:396]
ticker_stripe_5 = ticker_list[396:495]
ticker_stripe_6 = ticker_list[495:]

# Set timeframe to '1D'
timeframe = "1D"

# Set start and end datetimes of from Jan 1 2020 to Sep 15 2020
start_date = start = pd.Timestamp("2020-01-01", tz="America/Chicago").isoformat()
end_date = pd.Timestamp("2020-09-15", tz="America/Chicago").isoformat()

# Chunk the data by stripes to make it compaitible with alpaca api ingestion 

spy_stripe_1 = api.get_barset(
    ticker_stripe_1,
    timeframe,
    start=start_date,
    end=end_date
).df

spy_stripe_2 = api.get_barset(
    ticker_stripe_2,
    timeframe,
    start=start_date,
    end=end_date
).df

spy_stripe_3 = api.get_barset(
    ticker_stripe_3,
    timeframe,
    start=start_date,
    end=end_date
).df

spy_stripe_4 = api.get_barset(
    ticker_stripe_4,
    timeframe,
    start=start_date,
    end=end_date
).df

spy_stripe_5 = api.get_barset(
    ticker_stripe_5,
    timeframe,
    start=start_date,
    end=end_date
).df

spy_stripe_6 = api.get_barset(
    ticker_stripe_6,
    timeframe,
    start=start_date,
    end=end_date
).df

# Combine all of the stripes together 
spy_df = pd.concat([spy_stripe_1, spy_stripe_2, spy_stripe_3, spy_stripe_4, spy_stripe_5, spy_stripe_6], axis=1)
spy_df

Unnamed: 0_level_0,AAP,AAP,AAP,AAP,AAP,ADM,ADM,ADM,ADM,ADM,...,WEC,WEC,WEC,WEC,WEC,XEL,XEL,XEL,XEL,XEL
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
2020-01-02 00:00:00-05:00,160.47,160.950,157.5300,159.40,726650,46.57,46.640,45.8800,46.11,1899517,...,92.400,92.5200,90.455,90.68,1267002.0,63.5500,63.5800,62.2000,62.380,2340833
2020-01-03 00:00:00-05:00,158.03,159.630,157.2500,159.48,482371,45.89,46.190,45.7200,46.02,1512616,...,90.650,91.7100,90.520,91.22,1405515.0,62.2800,62.9150,62.2800,62.680,1908930
2020-01-06 00:00:00-05:00,157.74,158.890,156.4325,156.82,571689,45.79,45.845,45.3200,45.67,2045946,...,91.340,91.8000,90.800,91.34,907752.0,62.7600,62.9400,62.3300,62.590,1392103
2020-01-07 00:00:00-05:00,156.90,157.400,152.4090,154.95,1021104,45.57,45.690,45.1000,45.11,1567481,...,90.890,91.0700,90.340,90.96,930917.0,62.1500,62.5168,61.9700,62.460,1620406
2020-01-08 00:00:00-05:00,154.85,156.050,153.1400,153.15,706120,45.11,45.190,44.3800,44.61,2640257,...,91.210,91.3850,90.510,91.11,806742.0,62.5900,62.6400,62.1200,62.420,1630637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-09-09 00:00:00-04:00,152.62,155.680,152.6200,154.30,1040694,45.51,46.340,45.4700,46.06,5600146,...,96.300,99.0400,95.970,97.76,1872756.0,69.1400,71.3500,69.1400,70.590,1662544
2020-09-10 00:00:00-04:00,155.53,155.550,153.1050,153.57,650622,46.47,46.590,45.9386,46.02,4053865,...,97.275,97.6700,95.860,95.96,1707418.0,70.2400,70.6750,69.0400,69.140,1650643
2020-09-11 00:00:00-04:00,154.47,155.320,153.0800,154.56,879257,46.13,46.970,46.0100,46.89,5603942,...,96.110,96.3000,94.930,95.92,1619722.0,69.2500,69.3500,68.1600,68.849,1865467
2020-09-14 00:00:00-04:00,156.90,158.915,156.1300,157.35,1322800,47.00,47.760,46.9600,47.50,5657532,...,96.550,97.7717,95.830,97.14,1169398.0,68.4268,69.9000,68.3495,69.540,1408925


In [442]:
spy_df.to_csv('..Resources\spy_df_csv.csv', index = True)
spy_df_csv = Path('..Resources\spy_df_csv.csv')
spy_csv_df = pd.read_csv(spy_df_csv, header=None )
spy_csv_df[0] = pd.to_datetime(spy_csv_df[0], utc=True).dt.date
spy_csv_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2511,2512,2513,2514,2515,2516,2517,2518,2519,2520
0,NaT,AAP,AAP,AAP,AAP,AAP,ADM,ADM,ADM,ADM,...,WEC,WEC,WEC,WEC,WEC,XEL,XEL,XEL,XEL,XEL
1,NaT,open,high,low,close,volume,open,high,low,close,...,open,high,low,close,volume,open,high,low,close,volume
2,2020-01-02,160.47,160.95,157.53,159.4,726650,46.57,46.64,45.88,46.11,...,92.4,92.52,90.455,90.68,1267002.0,63.55,63.58,62.2,62.38,2340833
3,2020-01-03,158.03,159.63,157.25,159.48,482371,45.89,46.19,45.72,46.02,...,90.65,91.71,90.52,91.22,1405515.0,62.28,62.915,62.28,62.68,1908930
4,2020-01-06,157.74,158.89,156.4325,156.82,571689,45.79,45.845,45.32,45.67,...,91.34,91.8,90.8,91.34,907752.0,62.76,62.94,62.33,62.59,1392103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,2020-09-09,152.62,155.68,152.62,154.3,1040694,45.51,46.34,45.47,46.06,...,96.3,99.04,95.97,97.76,1872756.0,69.14,71.35,69.14,70.59,1662544
176,2020-09-10,155.53,155.55,153.105,153.57,650622,46.47,46.59,45.9386,46.02,...,97.275,97.67,95.86,95.96,1707418.0,70.24,70.675,69.04,69.14,1650643
177,2020-09-11,154.47,155.32,153.08,154.56,879257,46.13,46.97,46.01,46.89,...,96.11,96.3,94.93,95.92,1619722.0,69.25,69.35,68.16,68.849,1865467
178,2020-09-14,156.9,158.915,156.13,157.35,1322800,47.0,47.76,46.96,47.5,...,96.55,97.7717,95.83,97.14,1169398.0,68.4268,69.9,68.3495,69.54,1408925


In [443]:
spy_csv_df.columns = spy_csv_df.iloc[1]
spy_csv_df.drop(['open', 'high', 'low', 'volume'], axis=1, inplace=True)
spy_csv_df.columns = spy_csv_df.iloc[0]

In [444]:
spy_csv_df= spy_csv_df[2:]
spy_csv_df.columns.values[0]='date'

spy_csv_df = spy_csv_df.set_index('date')

spy_csv_df = spy_csv_df.apply(pd.to_numeric)

spy_csv_df = spy_csv_df.pct_change()

spy_csv_df = spy_csv_df.dropna().reset_index()

spy_csv_melt = spy_csv_df.melt(id_vars='date', var_name='tickers', value_name='close')

final_df = spy_csv_melt.merge(sectors_df, on='tickers')


In [450]:
final_df = final_df.set_index('date')
final_df = final_df.groupby(['sector','date'])['close'].mean()



AttributeError: 'Series' object has no attribute 'set_index'

In [449]:
final_df.reset_index()

Unnamed: 0,sector,close
0,CONSUMER DISCRETION,0.00496
1,CONSUMER STAPLES,0.001489
2,ENERGY,0.002384
3,FINANCIAL SERVICES,0.002878
4,HEALTH CARE,0.002593
5,MATERIALS,0.004306
6,PRODUCER DURABLES,0.003498
7,TECHNOLOGY,0.003175
8,UTILITIES,0.001417
