In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
sp_data = pd.read_csv('sp500df.csv')
sp_sector_mapping = pd.read_csv('SP500_Sectors.csv')
sp_sectors_data = pd.read_csv('SP500_Sector_Data.csv')

In [3]:
sp_sectors_data.columns

Index(['Effective date ', 'S&P 500 Communication Services (Sector)',
       'S&P 500 Consumer Discretionary (Sector)',
       'S&P 500 Consumer Staples (Sector)', 'S&P 500 Energy (Sector)',
       'S&P 500 Financials (Sector)', 'S&P 500 Health Care (Sector)',
       'S&P 500 Industrials (Sector)',
       'S&P 500 Information Technology (Sector)', 'S&P 500 Materials (Sector)',
       'S&P 500 Real Estate (Sector)', 'S&P 500 Utilities (Sector)'],
      dtype='object')

In [4]:
all_sp = sp_data.columns.values
num_stocks = 5
best_stocks = {}

for sector in sp_sector_mapping.Sector.unique():
    
    # Get the tickers for that sector only, and select the values that are available in our dataframe
    sector_tickers = sp_sector_mapping.Ticker.loc[sp_sector_mapping.Sector == sector]
    sector_tickers = list(set(all_sp) & set(list(sector_tickers.values)))
    
    # Get the sector data, including for individual stocks
    sector_data = sp_data[['Date'] + sector_tickers]
    sector_data = pd.merge(sector_data, sp_sectors_data[['Effective date ', \
                                                        'S&P 500 '+ sector +' (Sector)']],\
                                                        left_on=['Date'], right_on=['Effective date '],\
                                                        how = 'left').drop(['Effective date '], axis=1)
    
    # Get the correlation
    corrs = sector_data[sector_data.columns[1:]].corr()['S&P 500 '+ sector +' (Sector)'][:-1]
    
    best_stocks[sector] = list(corrs.nlargest(num_stocks).index.values)

In [5]:
best_stocks

{'Industrials': ['AOS', 'AME', 'GD', 'EFX', 'DOV'],
 'Health Care': ['AMGN', 'COO', 'BDX', 'CI', 'CNC'],
 'Information Technology': ['ADBE', 'ADSK', 'ANSS', 'ACN', 'CDNS'],
 'Communication Services': ['CMCSA', 'CHTR', 'GOOGL', 'GOOG', 'ATVI'],
 'Consumer Discretionary': ['DHI', 'AMZN', 'EBAY', 'KMX', 'DG'],
 'Utilities': ['AEP', 'LNT', 'CMS', 'DTE', 'AEE'],
 'Financials': ['BAC', 'ALL', 'CBOE', 'SCHW', 'RE'],
 'Materials': ['AVY', 'ECL', 'APD', 'FMC', 'BLL'],
 'Real Estate': ['EXR', 'ESS', 'DRE', 'AIV', 'CCI'],
 'Consumer Staples': ['KMB', 'STZ', 'CL', 'CHD', 'KO'],
 'Energy': ['XOM', 'DVN', 'COP', 'APA', 'XEC']}