# Sector and Industry

In [2]:
import yfinance as yf
import pandas as pd
from datetime import datetime

In [3]:
tickers_names = {
    "XLC": "Communication Services",
    "XLY": "Consumer Discretionary",
    "XLP": "Consumer Staples",
    "XLE": "Energy",
    "XLF": "Financials",
    "XLV": "Health Care",
    "XLI": "Industrials",
    "XLB": "Materials",
    "XLRE": "Real Estate",
    "XLK": "Technology",
    "XLU": "Utilities",
}
def create_sector_dataframe(tickers_dict):
    df = pd.DataFrame(tickers_dict.items(), columns=['Ticker', 'Sector'])
    extraction_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    df["Extraction_Timestamp"] = extraction_time  
    return df

sector_dataframe = create_sector_dataframe(tickers_names)

df= sector_dataframe
file_name = f"ticker_sector{datetime.now().strftime('%Y%m%d')}.csv"
df.to_csv(file_name)

In [4]:
sectores = {
    "Technology": ["AAPL", "MSFT", "GOOGL"],
    "Financial Services": ["JPM", "BAC", "WFC"],
    "Healthcare": ["JNJ", "PFE", "MRK"],
    "Consumer Cyclical": ["AMZN", "TSLA", "NKE"],
    "Industrials": ["BA", "CAT", "GE"],
    "Communication Services": ["GOOGL", "FB", "NFLX"],
    "Consumer Defensive": ["KO", "PG", "WMT"],
    "Energy": ["XOM", "CVX", "BP"],
    "Basic Materials": ["BA", "NEM", "FCX"],
    "Real Estate": ["PLD", "SPG", "DLR"],
    "Utilities": ["DUK", "SO", "NEE"],
}

data = []

for sector, tickers in sectores.items():
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            info = stock.info

            data.append({
                "Sector": sector,
                "Name": info.get('longName', 'N/A'),
                "Symbol": info.get('symbol', 'N/A'),
                "Ticker": ticker
            })
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

df = pd.DataFrame(data)

extraction_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["Extraction_Timestamp"] = extraction_time

file_name = f"company_sector{datetime.now().strftime('%Y%m%d')}.csv"
df.to_csv(file_name)

In [5]:
tech = yf.Sector("technology")

In [6]:
data = {
    'Tech Sector Info': {
        'Key': tech.key,
        'Name': tech.name,
        'Symbol': tech.symbol,
        'Ticker': tech.ticker,
        'Overview': tech.overview,
        'Top Companies': tech.top_companies,
        'Research Reports': tech.research_reports,
        'Top ETFs': tech.top_etfs,
        'Top Mutual Funds': tech.top_mutual_funds,
        'Industries': tech.industries,
    }}
data

{'Tech Sector Info': {'Key': 'technology',
  'Name': 'Technology',
  'Symbol': '^YH311',
  'Ticker': yfinance.Ticker object <^YH311>,
  'Overview': {'companies_count': 809,
   'market_cap': 19178459234304,
   'message_board_id': 'INDEXYH311',
   'description': 'Companies engaged in the design, development, and support of computer operating systems and applications. This sector also includes companies that make computer equipment, data storage products, networking products, semiconductors, and components. Companies in this sector include Apple, Microsoft, and IBM.',
   'industries_count': 12,
   'market_weight': 0.29183847,
   'employee_count': 7339636},
  'Top Companies':                                                 name rating  market weight
  symbol                                                                    
  AAPL                                      Apple Inc.    Buy       0.189459
  NVDA                              NVIDIA Corporation    Buy       0.186012
  MSFT       

In [12]:
top_companies= yf.Sector("technology").top_companies
df = pd.DataFrame(top_companies)
extraction_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["Extraction_Timestamp"] = extraction_time
#filename = f"stocks{datetime.now().strftime('%Y%m%d')}.csv"
#stocks.to_csv(file_name)


#df.to_csv("top_companies_tech.csv", index=False)
df

Unnamed: 0_level_0,name,rating,market weight,Extraction_Timestamp
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,Apple Inc.,Buy,0.189459,2024-10-23 08:26:16
NVDA,NVIDIA Corporation,Buy,0.186012,2024-10-23 08:26:16
MSFT,Microsoft Corporation,Buy,0.16785,2024-10-23 08:26:16
AVGO,Broadcom Inc.,Buy,0.044254,2024-10-23 08:26:16
ORCL,Oracle Corporation,Buy,0.02566,2024-10-23 08:26:16
CRM,"Salesforce, Inc.",Buy,0.01456,2024-10-23 08:26:16
AMD,"Advanced Micro Devices, Inc.",Buy,0.013172,2024-10-23 08:26:16
ACN,Accenture plc,Buy,0.012271,2024-10-23 08:26:16
CSCO,"Cisco Systems, Inc.",Buy,0.011931,2024-10-23 08:26:16
ADBE,Adobe Inc.,Buy,0.011466,2024-10-23 08:26:16


In [14]:
df["name"]

symbol
AAPL                                      Apple Inc.
NVDA                              NVIDIA Corporation
MSFT                           Microsoft Corporation
AVGO                                   Broadcom Inc.
ORCL                              Oracle Corporation
CRM                                 Salesforce, Inc.
AMD                     Advanced Micro Devices, Inc.
ACN                                    Accenture plc
CSCO                             Cisco Systems, Inc.
ADBE                                      Adobe Inc.
IBM      International Business Machines Corporation
QCOM                           QUALCOMM Incorporated
NOW                                 ServiceNow, Inc.
TXN                   Texas Instruments Incorporated
INTU                                     Intuit Inc.
UBER                         Uber Technologies, Inc.
AMAT                         Applied Materials, Inc.
ANET                           Arista Networks, Inc.
PANW                        Palo Alto N

In [16]:
top_ETFs = yf.Sector("technology").top_etfs
print(type(top_ETFs))
print(top_ETFs)
if isinstance(top_ETFs, dict):
    df = pd.DataFrame.from_dict(top_ETFs, orient='index').reset_index()
elif isinstance(top_ETFs, list):
    df = pd.DataFrame(top_ETFs)
else:
    raise ValueError("top_ETFs no es un formato compatible para crear un DataFrame.")


extraction_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["Extraction_Timestamp"] = extraction_time

df.to_csv("top_etfs_tech.csv", index=False)
df


<class 'dict'>
{'VGT': 'Vanguard Information Tech ETF', 'XLK': 'SPDR Select Sector Fund - Techn', 'SMH': 'VanEck Semiconductor ETF', 'IYW': 'iShares U.S. Technology ETF', 'SOXX': 'iShares Semiconductor ETF', 'FTEC': 'Fidelity MSCI Information Techn', 'SOXL': 'Direxion Daily Semiconductor Bu', 'IGV': 'iShares Expanded Tech-Software ', 'CIBR': 'First Trust NASDAQ Cybersecurit', 'IGM': 'iShares Expanded Tech Sector ET'}


Unnamed: 0,index,0,Extraction_Timestamp
0,VGT,Vanguard Information Tech ETF,2024-10-23 09:09:12
1,XLK,SPDR Select Sector Fund - Techn,2024-10-23 09:09:12
2,SMH,VanEck Semiconductor ETF,2024-10-23 09:09:12
3,IYW,iShares U.S. Technology ETF,2024-10-23 09:09:12
4,SOXX,iShares Semiconductor ETF,2024-10-23 09:09:12
5,FTEC,Fidelity MSCI Information Techn,2024-10-23 09:09:12
6,SOXL,Direxion Daily Semiconductor Bu,2024-10-23 09:09:12
7,IGV,iShares Expanded Tech-Software,2024-10-23 09:09:12
8,CIBR,First Trust NASDAQ Cybersecurit,2024-10-23 09:09:12
9,IGM,iShares Expanded Tech Sector ET,2024-10-23 09:09:12


In [18]:
industry =yf.Sector("technology").industries
df=pd.DataFrame(industry)
extraction_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["Extraction_Timestamp"] = extraction_time
file_name = f"industry{datetime.now().strftime('%Y%m%d')}.csv"
df.to_csv(file_name)