# Stock Watchers

Goal: Combine Senate and House Stockwatcher data with stock descriptions from the YFinance Module.

Senate Stock Watcher API: https://senatestockwatcher.com/api
House Stock Watcher API: https://housestockwatcher.com/api

#### Imports

In [403]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [404]:
import re, lxml.html, lxml.etree
from collections import OrderedDict
import utils

In [405]:
import requests

In [406]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [407]:
import yfinance as yf

-----

### Reading JSON for Senate Stock Watcher

In [408]:
url = f"https://senate-stock-watcher-data.s3-us-west-2.amazonaws.com/aggregate/all_transactions.json"
response = requests.get(url)
data_senate_stock_watcher = response.json()

In [409]:
data_senate_stock_watcher[0:3]

[{'transaction_date': '02/24/2022',
  'owner': 'Spouse',
  'ticker': 'NEE',
  'asset_description': 'NextEra Energy, Inc. Common Stock',
  'asset_type': 'Stock',
  'type': 'Sale (Partial)',
  'amount': '$1,001 - $15,000',
  'comment': '--',
  'senator': 'Shelley M Capito',
  'ptr_link': 'https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/',
  'disclosure_date': '03/11/2022'},
 {'transaction_date': '02/24/2022',
  'owner': 'Spouse',
  'ticker': 'MSFT',
  'asset_description': 'Microsoft Corporation - Common Stock',
  'asset_type': 'Stock',
  'type': 'Sale (Partial)',
  'amount': '$1,001 - $15,000',
  'comment': '--',
  'senator': 'Shelley M Capito',
  'ptr_link': 'https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/',
  'disclosure_date': '03/11/2022'},
 {'transaction_date': '02/24/2022',
  'owner': 'Spouse',
  'ticker': 'LHX',
  'asset_description': 'L3Harris Technologies, Inc. Common Stock',
  'asset_type': 'Stock',
  'type': 

In [410]:
# print(data_senate_stock_watcher[0:10])

In [411]:
transaction_date = []
owner = []
ticker = []
asset_description = []
asset_type = []
transaction_type = []
amount = []
comment = []
politician = []
ptr_link = []
disclosure_date = []
location = []
cap_gains = []

In [412]:
for stock in data_senate_stock_watcher:
    transaction_date.append(stock['transaction_date'])
    owner.append(stock['owner'])
    ticker.append(stock['ticker'])
    asset_description.append(stock['asset_description'])
    asset_type.append(stock['asset_type'])
    transaction_type.append(stock['type'])
    amount.append(stock['amount'])
    comment.append(stock['comment'])
    politician.append(stock['senator'])
    location.append(np.nan)
    ptr_link.append(stock['ptr_link'])
    disclosure_date.append(stock['disclosure_date'])
    cap_gains.append(np.nan)
    

In [413]:
len(transaction_date)

9230

In [414]:
disclosure_date[0:10]

['03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022',
 '03/11/2022']

----

### Reading JSON for House Stock Watcher

In [415]:
url = f"https://house-stock-watcher-data.s3-us-west-2.amazonaws.com/data/all_transactions.json"
response = requests.get(url)
data_house_stock_watcher = response.json()

In [416]:
# data_house_stock_watcher

In [417]:
for stock in data_house_stock_watcher:
    disclosure_date.append(stock['disclosure_date'])
    transaction_date.append(stock['transaction_date'])
    owner.append(stock['owner'])
    ticker.append(stock['ticker'])
    asset_description.append(stock['asset_description'])
    asset_type.append(np.nan)
    transaction_type.append(stock['type'])
    amount.append(stock['amount'])
    comment.append(np.nan)
    politician.append(stock['representative'])
    location.append(stock['district'])
    ptr_link.append(stock['ptr_link'])
    cap_gains.append(stock['cap_gains_over_200_usd'])


In [418]:
len(transaction_date)

22972

### Creating a Dataframe of Current Stock Watcher Data

In [419]:
ls = [transaction_date] + [disclosure_date] + [politician] + [owner] + [ticker] + [amount] + [asset_description] + [asset_type] + [transaction_type] + [comment] + [ptr_link] + [location] + [cap_gains]

In [420]:
df_init = pd.DataFrame(ls)

In [421]:
# df_init.head(5)

In [422]:
df = df_init.T

In [423]:
df.columns = ['transaction_date'] + ['disclosure_date'] + ['politician'] + ['owner'] + ['ticker'] + ['amount'] + ['asset_description'] + ['asset_type'] + ['transaction_type'] + ['comment'] + ['ptr_link'] + ['location'] + ['cap_gains']

In [424]:
df.head(5)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,"$1,001 - $15,000","NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,"$1,001 - $15,000",Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,"$1,001 - $15,000","L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,
3,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LOW,"$1,001 - $15,000","Lowe's Companies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,
4,02/24/2022,03/11/2022,Shelley M Capito,Spouse,AAPL,"$1,001 - $15,000",Apple Inc. - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,


In [425]:
df.dtypes

transaction_date     object
disclosure_date      object
politician           object
owner                object
ticker               object
amount               object
asset_description    object
asset_type           object
transaction_type     object
comment              object
ptr_link             object
location             object
cap_gains            object
dtype: object

##### Fixing the Amount Column

In [426]:
df.amount = df.amount.str.replace(',', "")

In [427]:
df.amount = df.amount.str.replace('$', "")
# df.amount

  df.amount = df.amount.str.replace('$', "")


In [428]:
split_amounts = df.amount.str.split(' - ', expand=True)
split_amounts.head(3)

Unnamed: 0,0,1
0,1001,15000
1,1001,15000
2,1001,15000


In [429]:
len(split_amounts)

22972

In [445]:
df.head(5)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,1001 - 15000,Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,1001 - 15000,"L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
3,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LOW,1001 - 15000,"Lowe's Companies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
4,02/24/2022,03/11/2022,Shelley M Capito,Spouse,AAPL,1001 - 15000,Apple Inc. - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000


In [431]:
df2 = pd.concat([df, split_amounts], axis=1)

In [432]:
df = df2

In [446]:
df.head(5)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,1001 - 15000,Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,1001 - 15000,"L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
3,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LOW,1001 - 15000,"Lowe's Companies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
4,02/24/2022,03/11/2022,Shelley M Capito,Spouse,AAPL,1001 - 15000,Apple Inc. - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000


In [434]:
df = df.rename(columns={0: "amount_low", 1: "amount_high"})

In [435]:
df.head(3)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,1001 - 15000,Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,1001 - 15000,"L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000


##### Cleaning Up the Ticker Column

In [436]:
df['ticker'] = df['ticker'].str.replace('APPl','APPL')

In [437]:
df['ticker'] = df['ticker'].str.replace('ALb','ALB')

In [438]:
df['ticker'] = df['ticker'].str.replace('BLDr','BLDR')

In [439]:
df['ticker'] = df['ticker'].str.replace('HOn','HON')

In [440]:
df['ticker'] = df['ticker'].str.replace('AAl','AAL')

In [441]:
df['ticker'] = df['ticker'].str.replace('BP PLC','BP')

In [442]:
df.head(3)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,1001 - 15000,Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,1001 - 15000,"L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000


In [443]:
# df.to_csv('..//data//processed//stock_watchers_03_03_2022.csv', index = False)

In [444]:
# df.to_csv('..//data//processed//stock_watchers_03_12_2022.csv', index = False)

----

## Identifying Stock Information with Yahoo Finance Data

Using the YFinance Ticker Module from Yahoo Finance, we are able to assign stock information to the stock tickers identified in the stock watcher data. This stock info includes the full name, sector, industry, state, and business summary.

In [369]:
ticker[0:10]

['NEE', 'MSFT', 'LHX', 'LOW', 'AAPL', 'OXY', 'ARKK', 'ARKK', 'X', 'X']

In [36]:
ticker_edit = ' '.join(ticker)

In [368]:
# ticker_edit

In [38]:
tickers = yf.Tickers(ticker_edit)

In [370]:
# tickers.tickers['ROKU'].info

In [40]:
print(tickers.tickers['SBUX'].info['sector'])
print(tickers.tickers['MSFT'].info['sector'])
print(tickers.tickers['AVY'].info['sector'])
print(tickers.tickers['AWI'].info['sector'])
print(tickers.tickers['HON'].info['sector'])
print(tickers.tickers['MSFT'].info['sector'])

Consumer Cyclical
Technology
Industrials
Industrials
Industrials
Technology


In [41]:
test = 'SBUX'

In [74]:
tickers.tickers['NWL'].info['sector']

'Consumer Defensive'

In [42]:
print(tickers.tickers[test].info['sector'])

Consumer Cyclical


##### Looping through each Stock Ticker

In [43]:
ticker2 = []
name = []
sector = []
industry = []
longbusinesssummary = []
website = []
state = []

In [81]:
# ticker

In [44]:
for a in ticker:
    if a == '--':
        print('--')
        ticker2.append(np.nan)
        name.append(np.nan)
        sector.append(np.nan)
        industry.append(np.nan)
        longbusinesssummary.append(np.nan)
        website.append(np.nan)
        state.append(np.nan)
    else:
        stock = tickers.tickers[a]
        
#         stock = yf.Ticker(a)
        print(a)
        try:
            ticker2.append(a)  
        except: 
            ticker2.append(np.nan)
        try:
            name.append(stock.info['longName'])
        except:
            name.append(np.nan)
        try: 
            sector.append(stock.info['sector'])
        except:
            sector.append(np.nan)
        try:
            industry.append(stock.info['industry'])
        except:
            industry.append(np.nan)
        try:
            longbusinesssummary.append(stock.info['longBusinessSummary'])
        except:
            longbusinesssummary.append(np.nan)
        try:
            website.append(stock.info['website'])
        except:
            website.append(np.nan)
        try:
            state.append(stock.info['state'])
        except:
            state.append(np.nan)

--
--
--
ROAD
UL
WEX
VFC
OTIS
HSIC
CARR
MMM
HSY
NVDA
LIN
AMZN
INTC
GOLD
BABA
QCOM
BABA
BABA
QCOM
CLF
CLF
X
INTC
WBA
VZ
MMM
PFE
IBM
INTC
KO
CSCO
CVX
DOW
AMGN
INTC
CLF
SSYS
SSYS
SSYS
SSYS
--
DUK
--
--
--
--
MDT
ABBV
ABC
CTAS
LMT
PEP
PM
PONCX
PG
TSN
XEL
ABQCX
FUN
CRWD
FGSCX
GILD
CLOU
NWL
PEGA
VTRS
ACN
ADBE
AWK
AAPL
CTAS
LLY
MA
MSFT
NEE
RTX
WMT
LUBFX
GEMIX
CEF
XAR
PTMC
FIMKX
JHMM
WIW
NVDA
GNR
ITA
MOO
SI
FIMKX
JHMM
WIW
KRE
SRVR
ITA
MOO
GEMIX
XAR
GLD
IVOO
CIBR
KBA
FDN
GBIL
C
IWN
TTE
QCOM
ARCC
LMT
FXI
CVS
NVDA
MET
AXS
GLAD
HASI
ETN
XAR
ABBV
RTX
GBIL
IRM
APO
LHX
FIMKX
AAPL
CSCO
GLW
INTC
V
VFH
WIW
VZ
DLR
RIO
KBE
AMT
SRVR
CEF
MOS
GLD
LDP
ITA
AQN
IGHG
PTMC
PTNQ
AY
JCI
GMRE
FCPIX
BMY
DIS
JNJ
NVS
VZ
USB
MSFT
GILD
XOM
CVS
BRK.B
BAC
AAPL
PII
PFE
KMB
CMCSA
CSCO
CI
BK
USB
MSFT
KR
GILD
FB
XOM
DVA
CVS
BRK.B
BAC
AAPL
GOOG
VZ
PII
PFE
KMB
CMCSA
CSCO
CI
BK
IRT
AAPL
PYPL
QQQ
ECOM
CLF
ECOM
ECOM
BABA
BABA
BABA
BABA
SSYS
--
DDOG
RHP
RNR
RHP
RNR
PNFP
ENVA
CCEP
BKEP
NS
CLDT
--
--
--
--
--
--
--
--
--
SBIO
--
--
CE

MPC
WFC
CSCO
ABBV
JPM
DD
K
BABA
AIZ
ABBV
DOCU
PRU
CVX
DD
AIZ
EMR
CR
HIG
AMGN
AIG
PANW
CVX
BP
DD
DD
DD
BKNG
BKNG
HELE
BKNG
TJX
ROST
LULU
FB
ROST
CME
NVDA
AZO
ADP
ARNA
AMZN
CME
AMZN
NVDA
IPHI
FB
ARNA
ICE
ICE
ICE
ICE
ICE
WM
DLR
MSFT
MSFT
DIS
XOM
CVX
BA
WMT
AAPL
--
V
WMT
NFLX
NVDA
NFLX
AAPL
NFLX
NVDA
AAPL
AAPL
NFLX
WMT
TROW
PG
MSFT
MDT
EMR
BLK
AAPL
AMGN
T
VFC
PSX
MMM
JNJ
DOW
N/A
N/A
--
N/A
VWO
HAL
XOM
BRK-B
BAM
IR
N/A
ORCL
XOM
CTXS
FNKO
EEFT
ORLY
FNKO
FNKO
AZO
ARNA
TCEHY
ROKU
FNKO
ARNA
TCEHY
KL
CMCSA
AZO
HON
MRK
DFS
DAL
EMR
CAT
AFL
--
WFC
--
--
WMT
NVDA
MSFT
HQL
HQL
BSTZ
BSTZ
--
--
--
--
GM
PFE
DD
DD
DVN
PFE
PFE
CZR
CZR
DD
PFE
DD
DAL
PFE
DD
CZR
CZR
CZR
PFE
PFE
CZR
GPK
CZR
DD
PFE
DD
T
VIAC
T
--
DIS
CZR
FEYE
GPK
DAL
T
VIAC
DVN
T
T
CZR
T
VIAC
T
CZR
VIAC
TRP
T
CZR
DAL
T
TRP
HBI
HBI
WMB
TRGP
TRGP
HBI
ET
ET
AAPL
ET
ET
WMB
WMB
TRGP
MPLX
ENB
DCP
DCP
LNG
ALB
WES
WES
SHLX
OKE
T
PSXP
PSXP
PSXP
VIAC
T
--
--
--
--
N/A
N/A
N/A
BMEZ
PYPL
INTU
DHR
BAM
AAPL
--
--
--
N/A
--
N/A
CMCSA
CSCO
GS
CVX
UL
PRU
--
-

DHR
REVB
REVB
BA
WMT
PYPL
LRCX
GS
DAL
C
BA
BABA
KHC
BTO
--
T
ON
ST
LEA
APTV
NSRGY
UNH
SLB
PFE
PEP
NSRGY
CMCSA
UNH
TMO
MRK
STZ
--
T
N/A
DISCA
GM
GM
MYL
SU
TWX
FOXA
MYL
FOXA
FDC
CRMT
BOIVF
CZR
BOIVF
CZR
SPY
BOIVF
FEYE
RF
FEYE
FEYE
RF
FOX
FOXA
KMI
OKE
BRK-B
UHAL
LNG
LNG
RF
RF
CZR
FOXA
T
FOXA
CRMT
FOXA
--
SBUX
OKE
FOXA
CRMT
FDC
USG
USG
CRMT
DISCA
DISCA
--
--
SHW
--
--
--
AMZN
NFLX
CVX
NFLX
AMZN
N/A
N/A
N/A
CRSP
BMY
CLR
MET
CVS
AMZN
WFC
JPM
USB
HOLX
N/A
--
TCMD
TCMD
PODD
RCL
BNPQY
--
DWDP
--
BOX
BOX
C
AMZN
CB
HD
MCD
F
ETN
BUD
ADI
IBM
TSLA
MGA
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
T
GIS
DISCA
BAC
UHAL
CRMT
DIS
CRMT
WPX
CRMT
CRMT
WPX
PEP
CRMT
WPX
DISCA
PEP
HBI
WPX
GLW
FOXA
HBI
DISCA
WPX
FOXA
TWX
DISCA
HBI
FEYE
FEYE
FEYE
HBI
BAC
--
N/A
N/A
N/A
N/A
N/A
GILD
N/A
N/A
FE
VFC
UPS
SBUX
PG
MSFT
MDT
LOW
IBM
GWW
XOM
EMR
CMCSA
CSCO
CVS
BLK
AFL
WFC
WFC
WFC
DWDP
DWDP
--
--
--
--
--
--
--
--
--
REVB
BOX
ENB
PTEN
WEC
WMT
VFC
UTX
USB
MMM
TGT
SYY
QCOM
PG
PX
PII
PSX
PEP
PAYX
NVS
NSC
NKE
N

ESRX
VZ
SRCL
GOOG
COP
--
TSCO
TMK
MTD
HOLX
AWK
--
GILD
--
N/A
SPG
NKE
CSX
F
ZBH
ZBH
--
--
--
--
--
--
VSM
ENB
DIS
FEYE
WMB
OKS
EPD
EPD
BPL
ETP
WMB
BAC
BAC
BAC
DIS
--
BA
FB
BAC
CVX
NFLX
ZNGA
AAPL
TJX
--
USB
UNP
VSAT
IBM
WFC
--
VLO
CAT
UA
UAA
CTL
BX
DEO
JWN
CVS
WETF
DVMT
AMG
XSD
INTC
DHR
IAI
AMG
XSD
DHR
N/A
GILD
ORCL
DIS
GILD
FB
AMGN
GILD
MCK
GOOGL
ABT
BA
DIS
SBUX
UL
JCI
GILD
ORCL
DEO
MDU
PFE
AMGN
N/A
CERN
CTSH
DIS
BAC
VSM
DIS
ENTG
DIS
ENTG
FOXA
FEYE
WMB
SE
SLB
WMB
SE
KHC
EXR
EXR
DIS
DIS
BAC
BAC
BAC
DIS
DIS
VSM
CAB
GEL
WMB
DIS
WMB
WMB
WMB
DIS
--
DIS
DPM
BAX
WLK
VSM
SPY
SPY
BAX
VIAB
MOS
VSM
--
--
--
--
--
RAI
--
--
SCCO
--
--
--
--
--
--
--
--
--
--
--
--
--
--
ASIX
NOC
COST
HDS
PFS
EEM
TMO
XSD
ISRG
WETF
VWO
FEYE
XSD
HACK
ISRG
WETF
TMO
--
CVX
QCOM
PX
NSC
JNJ
EV
VFC
PPG
LOW
GWW
CVS
AMGN
N/A
N/A
N/A
N/A
TEVA
GE
--
--
--
--
CVS
ASIX
ASIX
ASIX
HSBC
HAS
DIS
BRK-B
JPM
IBM
BRK-B
MCK
MA
CB
DEO
WFC
HAS
IBM
CAB
GLW
GLW
FEYE
FEYE
CAB
FEYE
BAX
CAB
--
--
CAB
SPY
MOS
SPY
--
--
--
N/A
DIS
A
AGN
CSX
VZ
P

NOG
FCX
COH
--
TOLLX
TOLLX
WASCX
MDDVX
MDDVX
DVFAX
DVFAX
N/A
--
--
--
--
XOM
BA
BAC
MIK
TJX
ORCL
T
KORS
PCAR
IBM
CMRE
PCAR
PRU
AMGN
HYH
SSYS
ABBV
KEYS
T
KORS
BEAV
M
--
PFS
T
PFS
ESV
TOLLX
--
RYN
N/A
MDLZ
CEA
KSU
ZNGA
KSU
BIDU
NFLX
--
--
BTF
--
PNQI
--
--
--
MFCDX
FOXA
TWX
TXN
TFX
SRCL
HSIC
ROP
RTN
PCP
MET
INTC
GILD
XOM
DD
COP
CVX
AAPL
APH
AMLP
ALL
T
NGG
OLN
MET
HON
CSCO
PEP
T
--
PRU
M
DHR
F
COF-PP
DHR
BABA
F
N/A
N/A
PFS
N/A
--
--
--
--
--
--
VIXY
QQQ
SBUX
SBUX
COF
BMY
CY
TRIP
UL
KORS
YUM
M
A
UL
N/A
CDK
AJG
CMI
BA
ABBV
SBUX
--
--
--
--
--
--
--
AAPL
AAPL
NFLX
AAPL
NFLX
NFLX
N/A
BX
EL
YUM
PEP
NEE
SLB
NOV
HON
ESS
BMY
CVS
CY
SLB
ETN
HYT
CY
VZ
PANW
EL
ISIS
TU
AIG
CY
SLB
APA
N/A
FEYE
FEYE
NXPI
NXPI
IJH
RSP
IDV
IXP
SDY
AMLP
DWX
ILF
EPP
NKE
IRM
IBM
FB
AMZN
AAPL
DISH
AMT
EBAY
IRM
NFLX
--
--
--
--
--
--
--
KSU
COST
TWTR
BAC
AAPL
CELG
CELG
MCHP
VOD
DE
MDP
QLIK
PANW
LNKD
VOD
AXP
UL
KORS
GPRO
--
RYAM
LGP
LNT
ARE
BUD
AMT
BIIB
BTE
BWA
BMR
DAL
CME
ECL
EMC
EPR
ENLC
GE
GRMN
HRS
HCC
HUB-B
HTGC
JNJ
HUN
MM

KeyError: 'AAPl'

In [447]:
# ticker2
# name
# sector
# industry
# longbusinesssummary
# website
# state

In [49]:
len(ticker)

22929

##### The Result of the Loop(s)

In [359]:
ticker2[0:10]

[nan, nan, nan, 'ROAD', 'UL', 'WEX', 'VFC', 'OTIS', 'HSIC', 'CARR']

In [360]:
name[0:10]

[nan,
 nan,
 nan,
 'Construction Partners, Inc.',
 'Unilever PLC',
 'WEX Inc.',
 'V.F. Corporation',
 'Otis Worldwide Corporation',
 'Henry Schein, Inc.',
 'Carrier Global Corporation']

In [361]:
sector[0:10]

[nan,
 nan,
 nan,
 'Industrials',
 'Consumer Defensive',
 'Technology',
 'Consumer Cyclical',
 'Industrials',
 'Healthcare',
 'Industrials']

In [362]:
industry[0:10]

[nan,
 nan,
 nan,
 'Engineering & Construction',
 'Household & Personal Products',
 'Software—Infrastructure',
 'Apparel Manufacturing',
 'Specialty Industrial Machinery',
 'Medical Distribution',
 'Building Products & Equipment']

In [365]:
longbusinesssummary[0:5]

[nan,
 nan,
 nan,
 'Construction Partners, Inc., a civil infrastructure company, engages in the construction and maintenance of roadways across Alabama, Florida, Georgia, North Carolina, and South Carolina. The company, through its subsidiaries, provides various products and services to public and private infrastructure projects, with a focus on highways, roads, bridges, airports, and commercial and residential developments. It also engages in manufacturing and distributing hot mix asphalt (HMA) for internal use and sales to third parties in connection with construction projects; paving activities, including the construction of roadway base layers and application of asphalt pavement; site development, including the installation of utility and drainage systems; mining aggregates, such as sand and gravel that are used as raw materials in the production of HMA; and distributing liquid asphalt cement for internal use and sales to third parties in connection with HMA production. The company

In [366]:
website[0:10]

[nan,
 nan,
 nan,
 'https://www.constructionpartners.net',
 'https://www.unilever.com',
 'https://www.wexinc.com',
 'https://www.vfc.com',
 'https://www.otis.com',
 'https://www.henryschein.com',
 'https://www.corporate.carrier.com']

In [367]:
state[0:10]

[nan, nan, nan, 'AL', nan, 'ME', 'CO', 'CT', 'NY', 'FL']

### Making a Dataframe of the YFinance Stock Info

In [278]:
ls_yfinance = [ticker2] + [name] + [sector] + [industry] + [longbusinesssummary] + [website] 

In [279]:
df_yfinance_init = pd.DataFrame(ls_yfinance)

In [280]:
# df_yfinance.head(5)

In [281]:
df_yfinance = df_yfinance_init.T

In [282]:
df_yfinance.columns = ['ticker2'] + ['name'] + ['sector'] + ['industry'] + ['longbusinesssummary'] + ['website']

In [448]:
df_yfinance.head(5)

Unnamed: 0,ticker2,name,sector,industry,longbusinesssummary,website
0,,,,,,
1,,,,,,
2,,,,,,
3,ROAD,"Construction Partners, Inc.",Industrials,Engineering & Construction,"Construction Partners, Inc., a civil infrastructure company, engages in the construction and maintenance of roadways across Alabama, Florida, Georgia, North Carolina, and South Carolina. The company, through its subsidiaries, provides various products and services to public and private infrastructure projects, with a focus on highways, roads, bridges, airports, and commercial and residential developments. It also engages in manufacturing and distributing hot mix asphalt (HMA) for internal use and sales to third parties in connection with construction projects; paving activities, including the construction of roadway base layers and application of asphalt pavement; site development, including the installation of utility and drainage systems; mining aggregates, such as sand and gravel that are used as raw materials in the production of HMA; and distributing liquid asphalt cement for internal use and sales to third parties in connection with HMA production. The company was formerly known as SunTx CPI Growth Company, Inc. and changed its name to Construction Partners, Inc. in September 2017. Construction Partners, Inc. was incorporated in 1999 and is headquartered in Dothan, Alabama.",https://www.constructionpartners.net
4,UL,Unilever PLC,Consumer Defensive,Household & Personal Products,"Unilever PLC operates as a fast-moving consumer goods company in Asia, Africa, the Middle East, Turkey, Russia, Ukraine, Belarus, the Americas, and Europe. It operates through Beauty & Personal Care, Foods & Refreshment, and Home Care segments. The Beauty & Personal Care segment provides skin care and hair care products, deodorants, and skin cleansing products under the Axe, Clear, Dove, Lifebuoy, Lux, Pond's, Rexona, Signal, Suave, Sunsilk, TRESemmÃ©, and Vaseline brands. The Foods & Refreshment segment offers ice cream, soups, bouillons, seasonings, mayonnaise, ketchups, and tea categories under the Ben & Jerry's, Breyers, Brooke Bond, Heart (Wall's), Hellmann's, Knorr, Lipton, Magnum, The Vegetarian Butcher, and Unilever Food Solutions brands. The Home Care segment provides fabric solutions, and home care and hygiene products under the Cif, Omo, Persil, Domestos, Seventh Generation, and Sunlight brands. Unilever PLC was incorporated in 1894 and is based in London, the United Kingdom.",https://www.unilever.com


In [449]:
df_yfinance.tail(5)

Unnamed: 0,ticker2,name,sector,industry,longbusinesssummary,website
23982,SWK,"Stanley Black & Decker, Inc.",Industrials,Tools & Accessories,"Stanley Black & Decker, Inc. engages in the tools and storage, industrial, and security businesses worldwide. Its Tools & Storage segment offers power tools and equipment, including professional products, such as professional grade corded and cordless electric power tools and equipment, and pneumatic tools and fasteners; and consumer products comprising corded and cordless electric power tools primarily under the BLACK+DECKER brand, as well as lawn and garden products and related accessories, and home products. This segment sells its products through retailers, distributors, and a direct sales force to professional end users, distributors, retail consumers, and industrial customers in various industries. The company's Industrial segment provides engineered fastening systems and products to customers in the automotive, manufacturing, electronics, construction, aerospace, and other industries; sells and rents custom pipe handling, joint welding, and coating equipment for use in the construction of large and small diameter pipelines, as well as provides pipeline inspection services; and sells hydraulic tools, attachments, and accessories. This segment also serves oil and natural gas pipeline industry and other industrial customers. Its Security segment designs, supplies, and installs commercial electronic security systems and provides electronic security services; offers healthcare solutions, which include asset tracking, infant protection, pediatric protection, patient protection, wander management, fall management, and emergency call products; and sells automatic doors to commercial customers. This segment serves consumers, retailers, educational, financial, and healthcare institutions, as well as commercial, governmental, and industrial customers. The company was formerly known as The Stanley Works and changed its name to Stanley Black & Decker, Inc. in March 2010. Stanley Black & Decker, Inc. was founded in 1843 and is headquartered in New Britain, Connecticut.",https://www.stanleyblackanddecker.com
23983,USB,U.S. Bancorp,Financial Services,Banks—Regional,"U.S. Bancorp, a financial services holding company, provides various financial services in the United States. It operates in Corporate and Commercial Banking, Consumer and Business Banking, Wealth Management and Investment Services, Payment Services, and Treasury and Corporate Support segments. The company offers depository services, including checking accounts, savings accounts, and time certificate contracts; lending services, such as traditional credit products; and credit card services, lease financing and import/export trade, asset-backed lending, agricultural finance, and other products. It also provides ancillary services comprising capital markets, treasury management, and receivable lock-box collection services to corporate customers; and a range of asset management and fiduciary services for individuals, estates, foundations, business corporations, and charitable organizations. In addition, the company offers investment and insurance products to its customers principally within its markets, as well as fund administration services to a range of mutual and other funds. Further, it provides corporate and purchasing card, and corporate trust services; and merchant processing services, as well as cash and investment management, ATM processing, mortgage banking, and brokerage and leasing services. As of December 31, 2020, the company provided its products and services through a network of 2,434 banking offices principally operating in the Midwest and West regions of the United States, as well as through online services and over mobile devices; and operated a network of 4,232 ATMs. The company was founded in 1863 and is headquartered in Minneapolis, Minnesota.",https://www.usbank.com
23984,BMY,Bristol-Myers Squibb Company,Healthcare,Drug Manufacturers—General,"Bristol-Myers Squibb Company discovers, develops, licenses, manufactures, and markets biopharmaceutical products worldwide. It offers products for hematology, oncology, cardiovascular, immunology, fibrotic, neuroscience, and covid-19 diseases. The company's products include Revlimid, an oral immunomodulatory drug for the treatment of multiple myeloma; Eliquis, an oral inhibitor for reduction in risk of stroke/systemic embolism in NVAF, and for the treatment of DVT/PE; Opdivo for anti-cancer indications; Pomalyst/Imnovid indicated for patients with multiple myeloma; and Orencia for adult patients with active RA and psoriatic arthritis. It also provides Sprycel for the treatment of Philadelphia chromosome-positive chronic myeloid leukemia; Yervoy for the treatment of patients with unresectable or metastatic melanoma; Abraxane, a protein-bound chemotherapy product; Reblozyl for the treatment of anemia in adult patients with beta thalassemia; and Empliciti for the treatment of multiple myeloma. In addition, the company offers Zeposia to treat relapsing forms of multiple sclerosis; Breyanzi, a CD19-directed genetically modified autologous T cell immunotherapy for the treatment of adult patients with relapsed or refractory large B-cell lymphoma; Inrebic, an oral kinase inhibitor indicated for the treatment of adult patients with myelofibrosis; and Onureg for the treatment of adult patients with AML. It sells products to wholesalers, distributors, pharmacies, retailers, hospitals, clinics, and government agencies. The company was formerly known as Bristol-Myers Company. The company was founded in 1887 and is headquartered in New York, New York.",https://www.bms.com
23985,LLY,Eli Lilly and Company,Healthcare,Drug Manufacturers—General,"Eli Lilly and Company discovers, develops, and markets human pharmaceuticals worldwide. It offers Baqsimi for severe hypoglycemia; Basaglar, Humalog, Humalog Mix 75/25, Humalog U-100, Humalog U-200, Humalog Mix 50/50, insulin lispro, insulin lispro protamine, insulin lispro mix 75/25, Humulin, Humulin 70/30, Humulin N, Humulin R, Humulin U-500, and Lyumjev for diabetes; and Jardiance, Trajenta, and Trulicity for type 2 diabetes. The company provides Alimta for non-small cell lung cancer (NSCLC) and malignant pleural mesothelioma; Cyramza for metastatic gastric cancer, gastro-esophageal junction adenocarcinoma, metastatic NSCLC, metastatic colorectal cancer, and hepatocellular carcinoma; Erbitux for colorectal cancers, and various head and neck cancers; Retevmo for metastatic NSCLC, medullary thyroid cancer, and thyroid cancer; Tyvyt for relapsed or refractory classic Hodgkin's lymph and non-squamous NSCLC; and Verzenio for HR+ and HER2- metastatic breast cancer. It offers Olumiant for rheumatoid arthritis; and Taltz for plaque psoriasis, psoriatic arthritis, ankylosing spondylitis, and non-radiographic axial spondylarthritis. The company offers Cymbalta for depressive disorder, diabetic peripheral neuropathic pain, generalized anxiety disorder, fibromyalgia, and chronic musculoskeletal pain; Emgality for migraine prevention and episodic cluster headache; Reyvow for migraine; and Zyprexa for schizophrenia, bipolar I disorder, and bipolar maintenance. Its Bamlanivimab and etesevimab for COVID-19; Cialis for erectile dysfunction and benign prostatic hyperplasia; and Forteo for osteoporosis. Eli Lilly and Company has collaborations with Incyte Corporation; Pfizer Inc.; AC Immune SA; Centrexion Therapeutics Corporation; ImmuNext, Inc.; Avidity Biosciences, Inc.; AbCellera Biologics Inc.; Junshi Biosciences; MiNA Therapeutics Limited; Verge Genomics; and The Banner Alzheimer's Institute, as well as Kumquat Biosciences. The company was founded in 1876 and is headquartered in Indianapolis, Indiana.",https://www.lilly.com
23986,DIS,The Walt Disney Company,Communication Services,Entertainment,"The Walt Disney Company, together with its subsidiaries, operates as an entertainment company worldwide. It operates through two segments, Disney Media and Entertainment Distribution; and Disney Parks, Experiences and Products. The company engages in the film and episodic television content production and distribution activities, as well as operates television broadcast networks under the ABC, Disney, ESPN, Freeform, FX, Fox, National Geographic, and Star brands; and studios that produces motion pictures under the Walt Disney Pictures, Twentieth Century Studios, Marvel, Lucasfilm, Pixar, and Searchlight Pictures banners. It also offers direct-to-consumer streaming services through Disney+, Disney+ Hotstar, ESPN+, Hulu, and Star+; sale/licensing of film and television content to third-party television and subscription video-on-demand services; theatrical, home entertainment, and music distribution services; staging and licensing of live entertainment events; and post-production services by Industrial Light & Magic and Skywalker Sound. In addition, the company operates theme parks and resorts, such as Walt Disney World Resort in Florida; Disneyland Resort in California; Disneyland Paris; Hong Kong Disneyland Resort; and Shanghai Disney Resort; Disney Cruise Line, Disney Vacation Club, National Geographic Expeditions, and Adventures by Disney as well as Aulani, a Disney resort and spa in Hawaii; licenses its intellectual property to a third party for the operations of the Tokyo Disney Resort; and provides consumer products, which include licensing of trade names, characters, visual, literary, and other IP for use on merchandise, published materials, and games. Further, it sells branded merchandise through retail, online, and wholesale businesses; and develops and publishes books, comic books, and magazines. The Walt Disney Company was founded in 1923 and is based in Burbank, California.",https://www.thewaltdisneycompany.com


In [286]:
# df_yfinance.to_csv('..//data//processed//yfinance_03_12_2022.csv', index = False)

----

### Merging Dataframes of the Stock Watcher Data and the YFinance Data

In [341]:
df2 = pd.read_csv("..//data//processed//stock_watchers_03_12_2022.csv", encoding="utf-8")

In [342]:
df2.head(10)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0
1,02/24/2022,03/11/2022,Shelley M Capito,Spouse,MSFT,1001 - 15000,Microsoft Corporation - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0
2,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LHX,1001 - 15000,"L3Harris Technologies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0
3,02/24/2022,03/11/2022,Shelley M Capito,Spouse,LOW,1001 - 15000,"Lowe's Companies, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0
4,02/24/2022,03/11/2022,Shelley M Capito,Spouse,AAPL,1001 - 15000,Apple Inc. - Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0
5,02/22/2022,03/11/2022,Thomas H Tuberville,Joint,OXY,50001 - 100000,Occidental Petroleum Corporation Common Stock,Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/95310897-f905-4ac6-a595-7ea2df711b28/,,,50001,100000.0
6,02/17/2022,03/11/2022,Thomas H Tuberville,Joint,ARKK,1001 - 15000,"Ark Innovation ETF <div class=""text-muted"">Option Type: Short Sale <br><em>Strike price:</em> $65.00 <br> <em>Expires:</em> 09/16/2022 </div>",Stock Option,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/95310897-f905-4ac6-a595-7ea2df711b28/,,,1001,15000.0
7,02/17/2022,03/11/2022,Thomas H Tuberville,Joint,ARKK,1001 - 15000,"Ark Innovation ETF <div class=""text-muted"">Option Type: Short Sale <br><em>Strike price:</em> $65.00 <br> <em>Expires:</em> 09/16/2022 </div>",Stock Option,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/95310897-f905-4ac6-a595-7ea2df711b28/,,,1001,15000.0
8,02/10/2022,03/11/2022,Thomas H Tuberville,Joint,X,1001 - 15000,"United States Steel Corporation Common Stock <div class=""text-muted"">Option Type: Short Sale <br><em>Strike price:</em> $22.00 <br> <em>Expires:</em> 09/16/2022 </div>",Stock Option,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/95310897-f905-4ac6-a595-7ea2df711b28/,,,1001,15000.0
9,02/10/2022,03/11/2022,Thomas H Tuberville,Joint,X,1001 - 15000,"United States Steel Corporation Common Stock <div class=""text-muted"">Option Type: Short Sale <br><em>Strike price:</em> $27.00 <br> <em>Expires:</em> 09/16/2022 </div>",Stock Option,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/95310897-f905-4ac6-a595-7ea2df711b28/,,,1001,15000.0


In [339]:
df_yfinance2 = pd.read_csv("..//data//processed//yfinance_03_12_2022.csv", encoding="utf-8")

In [462]:
df_yfinance2.head(4)

Unnamed: 0,ticker2,name,sector,industry,longbusinesssummary,website
0,,,,,,
1,,,,,,
2,,,,,,
3,ROAD,"Construction Partners, Inc.",Industrials,Engineering & Construction,"Construction Partners, Inc., a civil infrastructure company, engages in the construction and maintenance of roadways across Alabama, Florida, Georgia, North Carolina, and South Carolina. The company, through its subsidiaries, provides various products and services to public and private infrastructure projects, with a focus on highways, roads, bridges, airports, and commercial and residential developments. It also engages in manufacturing and distributing hot mix asphalt (HMA) for internal use and sales to third parties in connection with construction projects; paving activities, including the construction of roadway base layers and application of asphalt pavement; site development, including the installation of utility and drainage systems; mining aggregates, such as sand and gravel that are used as raw materials in the production of HMA; and distributing liquid asphalt cement for internal use and sales to third parties in connection with HMA production. The company was formerly known as SunTx CPI Growth Company, Inc. and changed its name to Construction Partners, Inc. in September 2017. Construction Partners, Inc. was incorporated in 1999 and is headquartered in Dothan, Alabama.",https://www.constructionpartners.net


In [343]:
len(df_yfinance2)

23987

In [345]:
len(df_yfinance2.drop_duplicates())

2630

In [346]:
df_yfinance3 = df_yfinance2.drop_duplicates()

In [347]:
df_yfinance3.head(10)

Unnamed: 0,ticker2,name,sector,industry,longbusinesssummary,website
0,,,,,,
3,ROAD,"Construction Partners, Inc.",Industrials,Engineering & Construction,"Construction Partners, Inc., a civil infrastructure company, engages in the construction and maintenance of roadways across Alabama, Florida, Georgia, North Carolina, and South Carolina. The company, through its subsidiaries, provides various products and services to public and private infrastructure projects, with a focus on highways, roads, bridges, airports, and commercial and residential developments. It also engages in manufacturing and distributing hot mix asphalt (HMA) for internal use and sales to third parties in connection with construction projects; paving activities, including the construction of roadway base layers and application of asphalt pavement; site development, including the installation of utility and drainage systems; mining aggregates, such as sand and gravel that are used as raw materials in the production of HMA; and distributing liquid asphalt cement for internal use and sales to third parties in connection with HMA production. The company was formerly known as SunTx CPI Growth Company, Inc. and changed its name to Construction Partners, Inc. in September 2017. Construction Partners, Inc. was incorporated in 1999 and is headquartered in Dothan, Alabama.",https://www.constructionpartners.net
4,UL,Unilever PLC,Consumer Defensive,Household & Personal Products,"Unilever PLC operates as a fast-moving consumer goods company in Asia, Africa, the Middle East, Turkey, Russia, Ukraine, Belarus, the Americas, and Europe. It operates through Beauty & Personal Care, Foods & Refreshment, and Home Care segments. The Beauty & Personal Care segment provides skin care and hair care products, deodorants, and skin cleansing products under the Axe, Clear, Dove, Lifebuoy, Lux, Pond's, Rexona, Signal, Suave, Sunsilk, TRESemmÃ©, and Vaseline brands. The Foods & Refreshment segment offers ice cream, soups, bouillons, seasonings, mayonnaise, ketchups, and tea categories under the Ben & Jerry's, Breyers, Brooke Bond, Heart (Wall's), Hellmann's, Knorr, Lipton, Magnum, The Vegetarian Butcher, and Unilever Food Solutions brands. The Home Care segment provides fabric solutions, and home care and hygiene products under the Cif, Omo, Persil, Domestos, Seventh Generation, and Sunlight brands. Unilever PLC was incorporated in 1894 and is based in London, the United Kingdom.",https://www.unilever.com
5,WEX,WEX Inc.,Technology,Software—Infrastructure,"WEX Inc. provides financial technology services in North America, the Asia Pacific, and Europe. It operates through three segments: Fleet Solutions, Travel and Corporate Solutions, and Health and Employee Benefit Solutions. The Fleet Solutions segment offers fleet vehicle payment processing services. Its services include customer, account activation, and account retention services; authorization and billing inquiries, and account maintenance services; premium fleet services; credit and collections services; merchant services; analytics solutions with access to web-based data analytics platform that offers insights to fleet managers; and ancillary services and tools to fleets to manage expenses and capital requirements. This segment markets its products directly and indirectly to commercial and government vehicle fleet customers with small, medium, and large fleets, as well as with over-the-road and long haul fleets; and indirectly through co-branded and private label relationships. The Travel and Corporate Solutions segment provides payment processing solutions for payment and transaction monitoring needs. Its products include virtual cards that are used for transactions where no card is presented and that require pre-authorization; and prepaid and gift card products that enables secure payment and financial management solutions with single card options, access to open or closed loop redemption, load limits, and with various expirations. This segment markets its products directly and indirectly to commercial and government organizations. The Health and Employee Benefit Solutions segment offers healthcare payment products and software-as-a-service consumer directed platforms for healthcare market, as well as payroll related and employee benefit products in Brazil. The company was formerly known as Wright Express Corporation and changed its name to WEX Inc. in October 2012. WEX Inc. was founded in 1983 and is headquartered in Portland, Maine.",https://www.wexinc.com
6,VFC,V.F. Corporation,Consumer Cyclical,Apparel Manufacturing,"V.F. Corporation, together with its subsidiaries, engages in the design, production, procurement, marketing, and distribution of branded lifestyle apparel, footwear, and related products for men, women, and children in the Americas, Europe, and the Asia-Pacific. It operates through three segments: Outdoor, Active, and Work. The company offers outdoor, merino wool and other natural fibers-based, lifestyle, and casual apparel; equipment; accessories; outdoor lifestyle, performance-based, youth culture/action sports-inspired, streetwear, and protective work footwear; handbags, luggage, backpacks, totes, and travel accessories; and work and work-inspired lifestyle apparel and footwear. It provides its products under the North Face, Timberland, Smartwool, Icebreaker, Altra, Vans, Supreme, Kipling, Napapijri, Eastpak, JanSport, Eagle Creek, Dickies, and Timberland PRO brand names. The company sells its products primarily to specialty stores, department stores, national chains, and mass merchants, as well as sells through direct-to-consumer operations, including retail stores, concession retail stores, and e-commerce sites, and other digital platforms. V.F. Corporation was founded in 1899 and is headquartered in Denver, Colorado.",https://www.vfc.com
7,OTIS,Otis Worldwide Corporation,Industrials,Specialty Industrial Machinery,"Otis Worldwide Corporation manufactures, installs, and services elevators and escalators in the United States, China, and internationally. The company operates in two segments, New Equipment and Service. The New Equipment segment designs, manufactures, sells, and installs a range of passenger and freight elevators, as well as escalators and moving walkways for residential and commercial buildings, and infrastructure projects. The Service segment performs maintenance and repair services, as well as modernization services to upgrade elevators and escalators. It had a network of approximately 34,000 service mechanics operating approximately 1,400 branches and offices. The company was founded in 1853 and is headquartered in Farmington, Connecticut.",https://www.otis.com
8,HSIC,"Henry Schein, Inc.",Healthcare,Medical Distribution,"Henry Schein, Inc. provides health care products and services to dental practitioners and laboratories, physician practices, government, institutional health care clinics, and other alternate care clinics worldwide. It operates in two segments, Health Care Distribution, and Technology and Value-Added Services. The Health Care Distribution segment offers dental products, including infection-control products, handpieces, preventatives, impression materials, composites, anesthetics, teeth, dental implants, gypsum, acrylics, articulators, abrasives, dental chairs, delivery units and lights, X-ray supplies and equipment, personal protective equipment, and high-tech and digital restoration equipment, as well as equipment repair services. This segment also provides medical products comprising branded and generic pharmaceuticals, vaccines, surgical products, diagnostic tests, infection-control products, X-ray products, equipment, and vitamins. The Technology and Value-Added Services segment offers software, technology, and other value-added services that include practice management software systems for dental and medical practitioners. This segment also provides value-added practice solutions, which comprise financial services on a non-recourse basis, e-services, practice technology, network, and hardware services, as well as continuing education services for practitioners, and consulting and other services. Henry Schein, Inc. was founded in 1932 and is headquartered in Melville, New York.",https://www.henryschein.com
9,CARR,Carrier Global Corporation,Industrials,Building Products & Equipment,"Carrier Global Corporation provides heating, ventilating, and air conditioning (HVAC), refrigeration, fire, security, and building automation technologies worldwide. It operates through three segments: HVAC, Refrigeration, and Fire & Security. The HVAC segment provides products, controls, services, and solutions to meet the heating, cooling, and ventilation needs of residential and commercial customers. Its products include air conditioners, heating systems, controls, and aftermarket components, as well as aftermarket repair and maintenance services and building automation solutions. The Refrigeration segment offers transport refrigeration and monitoring products and services, as well as digital solutions for trucks, trailers, shipping containers, intermodal applications, food retail, and warehouse cooling; and commercial refrigeration solutions, such as refrigerated cabinets, freezers, systems, and controls. The Fire & Security segment provides various residential, commercial, and industrial technologies, including fire, flame, gas, smoke, and carbon monoxide detection; portable fire extinguishers; fire suppression systems; intruder alarms; access control systems; video management systems; and electronic controls. Its other fire and security service offerings comprise audit, design, installation, and system integration, as well as aftermarket maintenance and repair and monitoring services. The company offers its products under the Autronica, Det-Tronics, Edwards, Fireye, GST, Kidde, LenelS2, Marioff, Onity, and Supra; Carrier, Automated Logic, Bryant, CIAT, Day & Night, Heil, NORESCO, and Riello; and Carrier Commercial Refrigeration, Carrier Transicold, and Sensitech brands. The company was incorporated in 2019 and is headquartered in Palm Beach Gardens, Florida.",https://www.corporate.carrier.com
10,MMM,3M Company,Industrials,Conglomerates,"3M Company develops, manufactures, and markets various products worldwide. It operates through four business segments: Safety and Industrial, Transportation and Electronics, Health Care, and Consumer. The Safety and Industrial segment offers personal safety products, industrial adhesives and tapes, abrasives, closure and masking systems, electrical markets, automotive aftermarket, and roofing granules to industrial, electrical, and safety markets. The Transportation and Electronics provides electronics, such as display materials and systems, electronic materials solutions; automotive and aerospace, and commercial solutions; advanced materials; and transportation safety products to transportation and electronic original equipment manufacturer customers. The Health Care segment offers medical and surgical supplies, skin health and infection prevention products, oral care, separation and purification sciences, health information systems, drug delivery systems, and food safety products to healthcare industry. The Consumer segment provides home improvement, home care, and consumer health care products, as well as stationery and office supplies to various consumers. This segment is also involved in the retail auto care business. It offers its products through various e-commerce and traditional wholesalers, retailers, jobbers, distributors, and dealers, as well as directly to users. 3M Company has a strategic collaboration with Merry Maids in residential cleaning sector; and collaborations with The Infectious Disease Research Institute and Duke Human Vaccine Institute to create a vaccine candidate with potential to provide protection against multiple variants of SARS-CoV-2. The company was founded in 1902 and is headquartered in St. Paul, Minnesota.",https://www.3m.com
11,HSY,The Hershey Company,Consumer Defensive,Confectioners,"The Hershey Company, together with its subsidiaries, engages in the manufacture and sale of confectionery products and pantry items in the United States and internationally. The company operates through three segments: North America Confectionery, North America Salty Snacks, and International. It offers chocolate and non-chocolate confectionery products; gum and mint refreshment products, including mints, chewing gums, and bubble gums; pantry items, such as baking ingredients, toppings, beverages, and sundae syrups; and snack items comprising spreads, meat snacks, bars and snack bites, mixes, popcorn, and protein bars. The company provides its products primarily under the Hershey's, Reese's, Kisses, Jolly Rancher, Almond Joy, Brookside, barkTHINS, Cadbury, Good & Plenty, Heath, Kit Kat, Payday, Rolo, Twizzlers, Whoppers, York, Ice Breakers, Breath Savers, Bubble Yum, Lily's, SkinnyPop, Pirates Booty, Paqui, Dot's Homestyle Pretzels, and ONE Bar brands, as well as under the Pelon Pelo Rico, IO-IO, and Sofit brands. It markets and sells its products to wholesale distributors, chain grocery stores, mass merchandisers, chain drug stores, vending companies, wholesale clubs, convenience stores, dollar stores, concessionaires, and department stores. The company was founded in 1894 and is headquartered in Hershey, Pennsylvania.",https://www.thehersheycompany.com


In [348]:
df3 = df2.merge(df_yfinance3, left_on='ticker', right_on='ticker2')

In [460]:
df3.head(3)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high,ticker2,name,sector,industry,longbusinesssummary,website,stock_description
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."
1,01/14/2022,02/14/2022,Thomas H Tuberville,Joint,NEE,15001 - 50000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/c9da6bea-fa14-4a3a-9d8b-1745e834da59/,,,15001,50000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."
2,12/21/2018,01/08/2019,Susan M Collins,Spouse,NEE,15001 - 50000,"NextEra Energy, Inc.",Stock,Purchase,--,https://efdsearch.senate.gov/search/view/ptr/bb3e64d2-1a93-46ac-ac1d-47508fbb199f/,,,15001,50000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."


In [350]:
len(df3)

23119

In [353]:
df3['stock_description'] = df3['sector'] + ', ' + df3['industry'] + ', ' + df3['longbusinesssummary']

In [355]:
df3.head(3)

Unnamed: 0,transaction_date,disclosure_date,politician,owner,ticker,amount,asset_description,asset_type,transaction_type,comment,ptr_link,location,cap_gains,amount_low,amount_high,ticker2,name,sector,industry,longbusinesssummary,website,stock_description
0,02/24/2022,03/11/2022,Shelley M Capito,Spouse,NEE,1001 - 15000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Partial),--,https://efdsearch.senate.gov/search/view/ptr/e7893c34-0761-4c2b-ac52-e303f166517f/,,,1001,15000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."
1,01/14/2022,02/14/2022,Thomas H Tuberville,Joint,NEE,15001 - 50000,"NextEra Energy, Inc. Common Stock",Stock,Sale (Full),--,https://efdsearch.senate.gov/search/view/ptr/c9da6bea-fa14-4a3a-9d8b-1745e834da59/,,,15001,50000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."
2,12/21/2018,01/08/2019,Susan M Collins,Spouse,NEE,15001 - 50000,"NextEra Energy, Inc.",Stock,Purchase,--,https://efdsearch.senate.gov/search/view/ptr/bb3e64d2-1a93-46ac-ac1d-47508fbb199f/,,,15001,50000.0,NEE,"NextEra Energy, Inc.",Utilities,Utilities—Regulated Electric,"NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida.",https://www.nexteraenergy.com,"Utilities, Utilities—Regulated Electric, NextEra Energy, Inc., through its subsidiaries, generates, transmits, distributes, and sells electric power to retail and wholesale customers in North America. The company generates electricity through wind, solar, nuclear, and fossil fuel, such as coal and natural gas facilities. It also develops, constructs, and operates long-term contracted assets with a focus on renewable generation facilities, electric transmission facilities, and battery storage projects; and owns, develops, constructs, manages and operates electric generation facilities in wholesale energy markets. As of December 31, 2020, the company operated approximately 28,400 megawatts of net generating capacity. It serves approximately 11 million people through approximately 5.6 million customer accounts in the east and lower west coasts of Florida with approximately 76,200 circuit miles of transmission and distribution lines and 673 substations. The company was formerly known as FPL Group, Inc. and changed its name to NextEra Energy, Inc. in 2010. NextEra Energy, Inc. was founded in 1925 and is headquartered in Juno Beach, Florida."


In [357]:
# df3.to_csv('..//data//processed//stock_watchers_w_yfinance_03_12_2022.csv', index = False)

----

##### Identifying Sectors and Industries to Compare with Congressional Committee Assignments

In [450]:
df3.sector.unique()

array(['Utilities', 'Technology', 'Industrials', 'Consumer Cyclical',
       'Energy', nan, 'Basic Materials', 'Financial Services',
       'Consumer Defensive', 'Healthcare', 'Communication Services',
       'Real Estate'], dtype=object)

In [451]:
df3.industry.unique()

array(['Utilities—Regulated Electric', 'Software—Infrastructure',
       'Aerospace & Defense', 'Home Improvement Retail',
       'Consumer Electronics', 'Oil & Gas E&P', nan, 'Steel',
       'Credit Services', 'Software—Application', 'Auto Manufacturers',
       'Engineering & Construction', 'Household & Personal Products',
       'Apparel Manufacturing', 'Specialty Industrial Machinery',
       'Medical Distribution', 'Building Products & Equipment',
       'Conglomerates', 'Confectioners', 'Semiconductors',
       'Specialty Chemicals', 'Internet Retail', 'Gold',
       'Pharmaceutical Retailers', 'Telecom Services',
       'Drug Manufacturers—General', 'Information Technology Services',
       'Beverages—Non-Alcoholic', 'Communication Equipment',
       'Oil & Gas Integrated', 'Chemicals', 'Computer Hardware',
       'Medical Devices', 'Specialty Business Services', 'Tobacco',
       'Farm Products', 'Leisure',
       'Drug Manufacturers—Specialty & Generic',
       'Utilities—Regu

In [453]:
df3_subset = df3[['sector' , 'industry']]

In [455]:
len(df3_subset.drop_duplicates())

144

In [456]:
df3_subset = df3_subset.drop_duplicates()

In [457]:
df3_subset.head(10)

Unnamed: 0,sector,industry
0,Utilities,Utilities—Regulated Electric
57,Technology,Software—Infrastructure
369,Industrials,Aerospace & Defense
387,Consumer Cyclical,Home Improvement Retail
419,Technology,Consumer Electronics
762,Energy,Oil & Gas E&P
791,,
795,Basic Materials,Steel
811,Financial Services,Credit Services
900,Technology,Software—Application


In [459]:
# df3_subset.to_csv('..//data//processed//stock_watchers_w_yfinance_sectors_info_03_12_2022.csv', index = False)