## Import the Required Dependencies

In [1]:
#Import dependencies
import pandas as pd
from pathlib import Path
import numpy as np

## Read in the CSV Data File

In [2]:
#Read in the csv file
raw_df = pd.read_csv(
    Path("./Dataset/constituents.csv")
)

## Clean Data for Analysis

In [3]:
print(raw_df["Symbol,Name,Sector"])

#Split Pandas series into individual lists
for row in raw_df:
    string_list=raw_df["Symbol,Name,Sector"].str.split(",")

0                                  MMM,3M,Industrials
1                         AOS,A. O. Smith,Industrials
2                 ABT,Abbott Laboratories,Health Care
3                             ABBV,AbbVie,Health Care
4                            ABMD,Abiomed,Health Care
                            ...                      
500            YUM,Yum! Brands,Consumer Discretionary
501    ZBRA,Zebra Technologies,Information Technology
502                     ZBH,Zimmer Biomet,Health Care
503                     ZION,Zions Bancorp,Financials
504                            ZTS,Zoetis,Health Care
Name: Symbol,Name,Sector, Length: 505, dtype: object


In [4]:
#View new individual lists
print(string_list)

0                                 [MMM, 3M, Industrials]
1                        [AOS, A. O. Smith, Industrials]
2                [ABT, Abbott Laboratories, Health Care]
3                            [ABBV, AbbVie, Health Care]
4                           [ABMD, Abiomed, Health Care]
                             ...                        
500           [YUM, Yum! Brands, Consumer Discretionary]
501    [ZBRA, Zebra Technologies, Information Technol...
502                    [ZBH, Zimmer Biomet, Health Care]
503                    [ZION, Zions Bancorp, Financials]
504                           [ZTS, Zoetis, Health Care]
Name: Symbol,Name,Sector, Length: 505, dtype: object


In [5]:
#Create a new list
new_list = []

#Split the elements in these lists by commas
for string in string_list:
    for word in string:
        word.split(",")
        new_list.append(word)

In [6]:
#Group individual elements into groups of 3
chunks = [new_list[x:x+3] for x in range(0, len(new_list), 3)]

In [7]:
#Recreate dataframe with properly grouped chunks
snp500_df = pd.DataFrame(chunks)
snp500_df

Unnamed: 0,0,1,2
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care
...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care
503,ZION,Zions Bancorp,Financials


In [8]:
#Create new, more descriptive column titles
columns = ["Company Ticker", "Company Name", "Company Industry"]

#Add columns to dataframe using .columns
snp500_df.columns = columns
snp500_df

Unnamed: 0,Company Ticker,Company Name,Company Industry
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care
...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care
503,ZION,Zions Bancorp,Financials


In [9]:
snp500_df.set_index("Company Ticker")

Unnamed: 0_level_0,Company Name,Company Industry
Company Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,3M,Industrials
AOS,A. O. Smith,Industrials
ABT,Abbott Laboratories,Health Care
ABBV,AbbVie,Health Care
ABMD,Abiomed,Health Care
...,...,...
YUM,Yum! Brands,Consumer Discretionary
ZBRA,Zebra Technologies,Information Technology
ZBH,Zimmer Biomet,Health Care
ZION,Zions Bancorp,Financials


## Create Filters

In [26]:
#Create empty lists for filters
industrials_list = []
health_care_list = []
consumer_discretionary_list = []
information_technology_list = []
communication_services_list = []
utilities_list = []
financials_list = []
materials_list = []
real_estate_list = []
consumer_staples_list = []
energy_list = []

In [54]:
#Add all industial companies to list for filtering
sector_list = ["Industrials", 
               "Health Care", 
               "Consumer Discretionary", 
               "Information Technology", 
               "Communication Services", 
               "Utilities", 
               "Financials",
               "Materials",
               "Real Estate",
               "Consumer Staples",
               "Energy"
              ]

for sector in sector_list:
    for index, row in snp500_df.iterrows():
        if row[2] == sector:
            if sector == "Industrials":
                industrials_list.append(row[0])
            elif sector == "Health Care":
                health_care_list.append(row[0])
            elif sector == "Consumer Discretionary":
                consumer_discretionary_list.append(row[0])
            elif sector == "Information Technology":
                information_technology_list.append(row[0])
            elif sector == "Communication Services":
                communication_services_list.append(row[0])
            elif sector == "Utilities":
                utilities_list.append(row[0])
            elif sector == "Financials":
                financials_list.append(row[0])
            elif sector == "Materials":
                materials_list.append(row[0])
            elif sector == "Real Estate":
                real_estate_list.append(row[0])
            elif sector == "Consumer Staples":
                consumer_staples_list.append(row[0])
            elif sector == "Energy":
                energy_list.append(row[0])

In [55]:
#Display first 5 items in industrials list
print(industrials_list[:5])

['MMM', 'AOS', 'ALK', 'ALLE', 'AAL']


In [56]:
#Display first 5 items in health care list
print(health_care_list[:5])

['ABT', 'ABBV', 'ABMD', 'A', 'ALGN']


In [57]:
#Display first 5 items in consumer discretionary list
print(consumer_discretionary_list[:5])

['AAP', 'AMZN', 'APTV', 'AZO', 'BBWI']


In [58]:
#Display first 5 items in information technology list
print(information_technology_list[:5])

['ACN', 'ADBE', 'AMD', 'AKAM', 'APH']


In [59]:
#Display first 5 items in communication services list
print(communication_services_list[:5])

['ATVI', 'GOOGL', 'GOOG', 'T', 'CHTR']


In [60]:
#Display first 5 items in utilities list
print(utilities_list[:5])

['AES', 'LNT', 'AEE', 'AEP', 'AWK']


In [61]:
#Display first 5 items in financials list
print(financials_list[:5])

['AFL', 'ALL', 'AXP', 'AIG', 'AMP']


In [62]:
#Display first 5 items in materials list
print(materials_list[:5])

['APD', 'ALB', 'AMCR', 'AVY', 'BLL']


In [63]:
#Display first 5 items in real estate list
print(real_estate_list[:5])

['ARE', 'AMT', 'AVB', 'BXP', 'CBRE']


In [64]:
#Display first 5 items in consumer staples list
print(consumer_staples_list[:5])

['ADM', 'MO', 'BF.B', 'CPB', 'CHD']


In [65]:
#Display first 5 items in energy list
print(energy_list[:5])

['APA', 'BKR', 'CVX', 'COP', 'CTRA']
