In [23]:
import os
import math
import numpy as np
import pandas as pd
import requests
import pickle
from datetime import datetime as dt
from datetime import timedelta
from pytz import timezone
from pprint import pprint

import cufflinks as cf
import plotly.graph_objects as go


In [7]:
SPY_info_df = pd.read_csv('data/SPY-Info.csv')
SPY_df = pd.read_csv('data/SPY.csv', index_col='Date', parse_dates=True)

In [8]:
f = 'data/market_data/'

In [9]:
def get_first_dates():
    '''Get the earliest date for which market data is available for each company'''

    first_dates = []

    for file in os.listdir(f):
        df = pd.read_csv(f + file, index_col='Unnamed: 0', parse_dates=True)
        first_date = df.iloc[0].name
        ticker = file.split('.')[0]
        first_dates.append((ticker, first_date))
        first_dates = sorted(first_dates, key=lambda x: x[1])

    return first_dates

In [10]:
def make_combined_returns_df():
    '''Make dataframe of returns for all SPY stocks, as well as SPY index'''

    combined_returns = SPY_df.copy()
    combined_returns['Return'] = combined_returns['Close'].pct_change() * 100
    combined_returns.drop(
        ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits'],
        axis=1, inplace=True
        )
    combined_returns.rename(columns={'Return': 'SPY'}, inplace=True)
    
    for file in os.listdir(f):
        ticker = file.replace('.csv', '')
        df = pd.read_csv(f + file, index_col='Unnamed: 0', parse_dates=True)
        df['Return'] = df['adjclose'].pct_change() * 100
        combined_returns = combined_returns.join(df['Return'], how='left')
        combined_returns.rename(columns={'Return': ticker}, inplace=True)

    return combined_returns

In [11]:
ticker_list = SPY_info_df['Symbol'].to_list()
sector_list = SPY_info_df['GICS Sector'].unique()
last_date = SPY_df.iloc[-1].name.date()
yr_ago = last_date - timedelta(days=365)
first_dates = get_first_dates()
combined_returns_df = make_combined_returns_df()

In [20]:
def get_weights():
    # Assign weights by sectors & sub-industries
    
    weights_df = pd.read_csv('data/SPY Weights.csv', index_col='Symbol')
    sector_weights = {}
    subIndustry_weights = {}
    ticker_weights = {}

    for sector in sector_list:
        subIndustry_list = SPY_info_df[SPY_info_df['GICS Sector'] == sector] \
                            ['GICS Sub-Industry'].unique()
        sector_weight = 0
        for subIndustry in subIndustry_list:
            tickers = SPY_info_df[SPY_info_df['GICS Sub-Industry'] == subIndustry] \
                        ['Symbol'].unique()
            subIndustry_weight = 0
            for ticker in tickers:
                weight = weights_df.loc[ticker, 'Weight']
                ticker_weights[ticker] = weight
                subIndustry_weight += weight

            subIndustry_weights[subIndustry] = subIndustry_weight
            sector_weight += subIndustry_weight
            
        sector_weights[sector] = sector_weight
       
    return sector_weights, subIndustry_weights, ticker_weights

sector_weights, subIndustry_weights, ticker_weights = get_weights()

In [29]:
print('Sector Weights:', sum(sector_weights.values()))
# pprint(sector_weights)
print('Sub-Industry Weights:', sum(subIndustry_weights.values()))
# pprint(subIndustry_weights)
print('Ticker Weights:', sum(ticker_weights.values()))
# pprint(ticker_weights)

Sector Weights: 99.99112299999997
Sub-Industry Weights: 99.99112299999999
Ticker Weights: 99.99112300000004
