## Import the Required Dependencies

In [180]:
#Import dependencies
import pandas as pd
from pathlib import Path
import numpy as np

## Read in the CSV Data File

In [181]:
#Read in the csv file
raw_df = pd.read_csv(
    Path("./Dataset/constituents.csv")
)

## Clean Data for Analysis

In [182]:
print(raw_df["Symbol,Name,Sector"])

#Split Pandas series into individual lists
for row in raw_df:
    string_list=raw_df["Symbol,Name,Sector"].str.split(",")

0                                  MMM,3M,Industrials
1                         AOS,A. O. Smith,Industrials
2                 ABT,Abbott Laboratories,Health Care
3                             ABBV,AbbVie,Health Care
4                            ABMD,Abiomed,Health Care
                            ...                      
500            YUM,Yum! Brands,Consumer Discretionary
501    ZBRA,Zebra Technologies,Information Technology
502                     ZBH,Zimmer Biomet,Health Care
503                     ZION,Zions Bancorp,Financials
504                            ZTS,Zoetis,Health Care
Name: Symbol,Name,Sector, Length: 505, dtype: object


In [183]:
#View new individual lists
print(string_list)

0                                 [MMM, 3M, Industrials]
1                        [AOS, A. O. Smith, Industrials]
2                [ABT, Abbott Laboratories, Health Care]
3                            [ABBV, AbbVie, Health Care]
4                           [ABMD, Abiomed, Health Care]
                             ...                        
500           [YUM, Yum! Brands, Consumer Discretionary]
501    [ZBRA, Zebra Technologies, Information Technol...
502                    [ZBH, Zimmer Biomet, Health Care]
503                    [ZION, Zions Bancorp, Financials]
504                           [ZTS, Zoetis, Health Care]
Name: Symbol,Name,Sector, Length: 505, dtype: object


In [184]:
#Create a new list
new_list = []

#Split the elements in these lists by commas
for string in string_list:
    for word in string:
        word.split(",")
        new_list.append(word)

In [185]:
#Group individual elements into groups of 3
chunks = [new_list[x:x+3] for x in range(0, len(new_list), 3)]

In [186]:
#Recreate dataframe with properly grouped chunks
snp500_df = pd.DataFrame(chunks)
snp500_df

Unnamed: 0,0,1,2
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care
...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care
503,ZION,Zions Bancorp,Financials


In [202]:
#Create new, more descriptive column titles
columns = ["Company Ticker", "Company Name", "Company Industry"]

#Add columns to dataframe using .columns
snp500_df.columns = columns
snp500_df

Unnamed: 0,Company Ticker,Company Name,Company Industry
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care
...,...,...,...
500,YUM,Yum! Brands,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care
503,ZION,Zions Bancorp,Financials


In [275]:
snp500_df.set_index("Company Name")

Unnamed: 0_level_0,Company Ticker,Company Industry
Company Name,Unnamed: 1_level_1,Unnamed: 2_level_1
3M,MMM,Industrials
A. O. Smith,AOS,Industrials
Abbott Laboratories,ABT,Health Care
AbbVie,ABBV,Health Care
Abiomed,ABMD,Health Care
...,...,...
Yum! Brands,YUM,Consumer Discretionary
Zebra Technologies,ZBRA,Information Technology
Zimmer Biomet,ZBH,Health Care
Zions Bancorp,ZION,Financials


## Create Filters

In [276]:
#Create empty lists for filters
industrials_list = []
health_care_list = []
consumer_discretionary_list = []

In [266]:
#Add all industial companies to list for filtering
for index, row in snp500_df.iterrows():
    if row[2] == "Industrials":
        industrials_list.append(row[0])

In [267]:
#Add all health care companies to list for filtering
for index, row in snp500_df.iterrows():
    if row[2] == "Health Care":
        health_care_list.append(row[0])

In [269]:
#Add all consumer discretionary companies to list for filtering
for index, row in snp500_df.iterrows():
    if row[2] == "Consumer Discretionary":
        consumer_discretionary_list.append(row[0])

In [274]:
#Display first 5 items in industrials list
print(industrials_list[:5])

['MMM', 'AOS', 'ALK', 'ALLE', 'AAL']


In [272]:
#Display first 5 items in health care list
print(health_care_list[:5])

['ABT', 'ABBV', 'ABMD', 'A', 'ALGN']


In [273]:
#Display first 5 items in consumer discretionary list
print(consumer_discretionary_list[:5])

['AAP', 'AMZN', 'APTV', 'AZO', 'BBWI']
