In [49]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import wikipedia as wp
import yfinance as yf 
from ta.momentum import rsi 
from ta.trend import macd

In [50]:
# Try different page names for S&P 500
try:
    html = wp.page("List of S&P 500 companies").html().encode("UTF-8")
except:
    try:
        html = wp.page("S&P 500 Index").html().encode("UTF-8")
    except:
        html = wp.page("S&P 500").html().encode("UTF-8")

# Try different table indices to find the right one
tables = pd.read_html(html)
print(f"Found {len(tables)} tables")

# Usually the main S&P 500 list is in the first table (index 0)
stocks = tables[0].set_index("Symbol")
stocks = stocks.drop("GOOG", errors='ignore')  # ignore if GOOG doesn't exist
# stocks.loc["GOOGL", "Security"] = "Alphabet Inc." if "Security" in stocks.columns else stocks.loc["GOOGL", "Name"] = "Alphabet"
stocks = stocks.rename(index={"BRK.B": "BRK-B"}, errors='ignore')
stocks.head()

Found 2 tables


Unnamed: 0_level_0,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
ABT,Abbott Laboratories,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
ABBV,AbbVie,Health Care,Biotechnology,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [53]:
stocks[['GICS Sector']].to_csv('../input/sp500_2009_2025/domain.csv')

In [None]:
fundamentals = [
	yf.Ticker(stock).info for stock in stocks.index
]
fundamentals = pd.DataFrame(fundamentals).set_index("symbol")
fundamentals.index = fundamentals.index.rename("Symbol")  # for consistency with the stocks dataframe

In [20]:
fundamentals.columns

Index(['address1', 'city', 'state', 'zip', 'country', 'phone', 'website',
       'industry', 'industryKey', 'industryDisp',
       ...
       'regularMarketChangePercent', 'regularMarketPrice', 'trailingPegRatio',
       'address2', 'displayName', 'fax', 'ipoExpectedDate', 'prevName',
       'nameChangeDate', 'industrySymbol'],
      dtype='object', length=185)

In [21]:
fundamentals

Unnamed: 0_level_0,address1,city,state,zip,country,phone,website,industry,industryKey,industryDisp,sector,sectorKey,sectorDisp,longBusinessSummary,fullTimeEmployees,companyOfficers,auditRisk,boardRisk,compensationRisk,shareHolderRightsRisk,overallRisk,governanceEpochDate,compensationAsOfEpochDate,irWebsite,executiveTeam,maxAge,priceHint,previousClose,open,dayLow,dayHigh,regularMarketPreviousClose,regularMarketOpen,regularMarketDayLow,regularMarketDayHigh,dividendRate,dividendYield,exDividendDate,payoutRatio,fiveYearAvgDividendYield,...,regularMarketChange,regularMarketDayRange,fullExchangeName,averageDailyVolume3Month,fiftyTwoWeekLowChange,fiftyTwoWeekLowChangePercent,fiftyTwoWeekRange,fiftyTwoWeekHighChange,fiftyTwoWeekHighChangePercent,fiftyTwoWeekChangePercent,dividendDate,earningsTimestamp,earningsTimestampStart,earningsTimestampEnd,earningsCallTimestampStart,earningsCallTimestampEnd,isEarningsDateEstimate,epsTrailingTwelveMonths,epsForward,epsCurrentYear,priceEpsCurrentYear,fiftyDayAverageChange,fiftyDayAverageChangePercent,twoHundredDayAverageChange,twoHundredDayAverageChangePercent,sourceInterval,exchangeDataDelayedBy,averageAnalystRating,cryptoTradeable,marketState,regularMarketChangePercent,regularMarketPrice,trailingPegRatio,address2,displayName,fax,ipoExpectedDate,prevName,nameChangeDate,industrySymbol
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
MMM,3M Center,Saint Paul,MN,55144-1000,United States,651 733 1110,https://www.3m.com,Conglomerates,conglomerates,Conglomerates,Industrials,industrials,Industrials,3M Company provides diversified technology ser...,61500.0,"[{'maxAge': 1, 'name': 'Mr. William M. Brown',...",1.0,7.0,5.0,4.0,4.0,1.754006e+09,1.735603e+09,http://phx.corporate-ir.net/phoenix.zhtml?c=80...,[],86400.0,2,154.34,154.12,153.2050,155.3050,154.34,154.12,153.2050,155.3050,2.92,1.89,1.756080e+09,0.3972,4.12,...,0.410004,153.205 - 155.305,NYSE,3413033.0,32.769997,0.268651,121.98 - 164.15,-9.399994,-0.057265,20.334375,1.757635e+09,1.752842e+09,1.752842e+09,1.752842e+09,1.752844e+09,1.752844e+09,False,7.20,7.90,7.88836,19.617512,3.333404,0.022015,11.444046,0.079857,15,0,2.1 - Buy,False,PRE,0.265650,154.75,3.5243,,,,,,,
AOS,11270 West Park Place,Milwaukee,WI,53224,United States,414 359 4000,https://www.aosmith.com,Specialty Industrial Machinery,specialty-industrial-machinery,Specialty Industrial Machinery,Industrials,industrials,Industrials,A. O. Smith Corporation manufactures and marke...,12700.0,"[{'maxAge': 1, 'name': 'Mr. Stephen M. Shafer'...",4.0,10.0,7.0,10.0,10.0,1.754006e+09,1.735603e+09,http://investor.shareholder.com/aosmith/,[],86400.0,2,72.82,72.64,71.8450,73.0400,72.82,72.64,71.8450,73.0400,1.36,1.89,1.753920e+09,0.3733,1.71,...,-0.970001,71.845 - 73.04,NYSE,1684848.0,13.019997,0.221316,58.83 - 92.06,-20.210000,-0.219531,-11.438429,1.755216e+09,1.753360e+09,1.761050e+09,1.761050e+09,1.753366e+09,1.753366e+09,True,3.59,4.07,3.82958,18.761848,3.067795,0.044602,3.378151,0.049336,15,0,2.6 - Hold,False,PRE,-1.332050,71.85,1.9581,Suite 170,A. O. Smith,,,,,
ABT,100 Abbott Park Road,North Chicago,IL,60064-6400,United States,224 667 6100,https://www.abbott.com,Medical Devices,medical-devices,Medical Devices,Healthcare,healthcare,Healthcare,"Abbott Laboratories, together with its subsidi...",114000.0,"[{'maxAge': 1, 'name': 'Mr. Robert B. Ford', '...",9.0,7.0,9.0,2.0,7.0,1.754006e+09,1.672445e+09,http://www.abbottinvestor.com/phoenix.zhtml?c=...,[],86400.0,2,131.25,132.05,131.7200,133.8100,131.25,132.05,131.7200,133.8100,2.36,1.78,1.752538e+09,0.2861,1.68,...,1.190000,131.72 - 133.81,NYSE,6518717.0,21.580002,0.194660,110.86 - 141.23,-8.789993,-0.062239,18.144512,1.755216e+09,1.752755e+09,1.760531e+09,1.760531e+09,1.752757e+09,1.752757e+09,True,7.97,5.16,5.14258,25.753610,1.133408,0.008632,5.699455,0.044969,15,0,1.8 - Buy,False,PRE,0.906668,132.44,,Abbott Park,,,,,,
ABBV,1 North Waukegan Road,North Chicago,IL,60064-6400,United States,847 932 7900,https://www.abbvie.com,Drug Manufacturers - General,drug-manufacturers-general,Drug Manufacturers - General,Healthcare,healthcare,Healthcare,"AbbVie Inc., a research-based biopharmaceutica...",55000.0,"[{'maxAge': 1, 'name': 'Mr. Scott T. Reents', ...",6.0,5.0,2.0,9.0,7.0,1.754006e+09,1.735603e+09,,[],86400.0,2,206.19,206.65,206.6500,211.6938,206.19,206.65,206.6500,211.6938,6.56,3.14,1.752538e+09,3.0381,3.89,...,2.959990,206.65 - 211.6938,NYSE,5664393.0,45.339996,0.276784,163.81 - 218.66,-9.510010,-0.043492,6.508124,1.755216e+09,1.753965e+09,1.753965e+09,1.753965e+09,1.753967e+09,1.753967e+09,False,2.09,12.13,11.97246,17.469260,16.878387,0.087784,20.569840,0.109077,15,0,1.9 - Buy,False,PRE,1.435560,209.15,0.4416,,AbbVie,,,,,
ACN,1 Grand Canal Square,Dublin,,D02 P820,Ireland,353 1 646 2000,https://www.accenture.com,Information Technology Services,information-technology-services,Information Technology Services,Technology,technology,Technology,Accenture plc provides strategy and consulting...,791000.0,"[{'maxAge': 1, 'name': 'Ms. Julie T. Spellman ...",1.0,4.0,4.0,4.0,2.0,1.754006e+09,1.735603e+09,http://investor.accenture.com/phoenix.zhtml?c=...,[],86400.0,2,255.88,257.04,254.9900,260.3900,255.88,257.04,254.9900,260.3900,5.92,2.32,1.752106e+09,0.4558,1.41,...,-0.830002,254.99 - 260.39,NYSE,3973979.0,18.380005,0.077661,236.67 - 398.35,-143.300000,-0.359734,-22.845387,1.755216e+09,1.750423e+09,1.758717e+09,1.759149e+09,1.750421e+09,1.750421e+09,True,12.57,14.07,12.87902,19.803526,-25.600204,-0.091217,-69.282880,-0.213617,15,0,2.0 - Buy,False,PRE,-0.324372,255.05,1.9660,Grand Canal Harbour,Accenture,353 1 646 2020,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XYL,301 Water Street SE,Washington,DC,20003,United States,202 869 9150,https://www.xylem.com,Specialty Industrial Machinery,specialty-industrial-machinery,Specialty Industrial Machinery,Industrials,industrials,Industrials,"Xylem Inc., together with its subsidiaries, en...",23000.0,"[{'maxAge': 1, 'name': 'Mr. Matthew Francis Pi...",1.0,2.0,5.0,4.0,2.0,1.754006e+09,1.735603e+09,,[],86400.0,2,141.83,141.19,140.9000,142.6900,141.83,141.19,140.9000,142.6900,1.60,1.13,1.756339e+09,0.3958,1.13,...,0.039993,140.9 - 142.69,NYSE,1202440.0,41.399994,0.412063,100.47 - 145.33,-3.460007,-0.023808,4.762960,1.758758e+09,1.753965e+09,1.761827e+09,1.761827e+09,1.753967e+09,1.753967e+09,True,3.84,4.75,4.81587,29.458853,8.262802,0.061844,16.877647,0.135029,15,0,2.0 - Buy,False,PRE,0.028198,141.87,2.5648,Suite 200,Xylem,,,,,
YUM,1441 Gardiner Lane,Louisville,KY,40213,United States,502 874 8300,https://www.yum.com,Restaurants,restaurants,Restaurants,Consumer Cyclical,consumer-cyclical,Consumer Cyclical,"Yum! Brands, Inc., together with its subsidiar...",40000.0,"[{'maxAge': 1, 'name': 'Mr. David W. Gibbs', '...",2.0,2.0,7.0,7.0,4.0,1.754006e+09,1.735603e+09,http://www.yum.com/investors/,[],86400.0,2,152.01,152.98,147.5500,152.9800,152.01,152.98,147.5500,152.9800,2.84,1.90,1.748304e+09,0.5444,1.80,...,-2.670000,147.55 - 152.98,NYSE,1997662.0,27.210000,0.222795,122.13 - 163.3,-13.960007,-0.085487,9.198594,1.749168e+09,1.754397e+09,1.762259e+09,1.762259e+09,1.754396e+09,1.754396e+09,True,5.06,6.08,5.99337,24.917534,3.437195,0.023558,5.973694,0.041667,15,0,2.4 - Buy,False,PRE,-1.756460,149.34,2.1131,,Yum! Brands,,,,,
ZBRA,3 Overlook Point,Lincolnshire,IL,60069,United States,847 634 6700,https://www.zebra.com,Communication Equipment,communication-equipment,Communication Equipment,Technology,technology,Technology,"Zebra Technologies Corporation, together with ...",9900.0,"[{'maxAge': 1, 'name': 'Mr. William J. Burns',...",1.0,8.0,4.0,9.0,8.0,1.754006e+09,1.735603e+09,http://www.zebra.com/id/zebra/na/en/index/abou...,[],86400.0,2,312.06,310.24,302.8301,310.5250,312.06,310.24,302.8301,310.5250,,,,0.0000,,...,-7.320010,302.8301 - 310.525,NasdaqGS,519091.0,99.009995,0.481262,205.73 - 427.76,-123.020020,-0.287591,-10.936403,,1.761655e+09,1.761655e+09,1.761655e+09,1.754397e+09,1.754397e+09,False,10.60,16.04,15.59584,19.539825,-12.121002,-0.038253,-23.516998,-0.071642,15,0,1.9 - Buy,False,PRE,-2.345710,304.74,1.1835,,Zebra,,,,,
ZBH,345 East Main Street,Warsaw,IN,46580,United States,574 373 3333,https://www.zimmerbiomet.com,Medical Devices,medical-devices,Medical Devices,Healthcare,healthcare,Healthcare,"Zimmer Biomet Holdings, Inc., together with it...",17000.0,"[{'maxAge': 1, 'name': 'Mr. Ivan Tornos', 'ag...",5.0,7.0,6.0,4.0,5.0,1.754006e+09,1.735603e+09,http://investor.zimmer.com/,[],86400.0,2,104.31,104.54,104.3500,106.6700,104.31,104.54,104.3500,106.6700,0.96,0.91,1.750896e+09,0.2341,0.78,...,1.400000,104.35 - 106.67,NYSE,1923540.0,16.489998,0.184824,89.22 - 116.71,-11.000000,-0.094251,-6.797743,1.753920e+09,1.754539e+09,1.761827e+09,1.761827e+09,1.754570e+09,1.754570e+09,True,4.10,8.60,8.13519,12.994164,10.636597,0.111878,3.480698,0.034048,15,0,2.5 - Buy,False,PRE,1.342150,105.71,1.3736,,Zimmer Biomet,,,,,


In [22]:
fundamentals = fundamentals[["marketCap", "trailingPE", "forwardPE", "priceToBook", "trailingEps", "forwardEps", "bookValue", "payoutRatio", "beta", "fiveYearAvgDividendYield", "52WeekChange", "averageVolume", "enterpriseToRevenue", "profitMargins"]].fillna(0)
fundamentals.head()

Unnamed: 0_level_0,marketCap,trailingPE,forwardPE,priceToBook,trailingEps,forwardEps,bookValue,payoutRatio,beta,fiveYearAvgDividendYield,52WeekChange,averageVolume,enterpriseToRevenue,profitMargins
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
MMM,82424500000.0,21.493055,19.588608,19.214056,7.2,7.9,8.054,0.3972,1.104,4.12,0.203344,3413033.0,3.742,0.16011
AOS,10068340000.0,20.013927,17.65356,5.454338,3.59,4.07,13.173,0.3733,1.22,1.71,-0.114384,1684848.0,5.354,0.13683
ABT,230506500000.0,16.617315,25.666668,4.558566,7.97,5.16,29.053,0.2861,0.705,1.68,0.181445,6518717.0,5.497,0.32427
ABBV,369476000000.0,100.07177,17.242374,-2011.0576,2.09,12.13,-0.104,3.0381,0.503,3.89,0.065081,5664393.0,7.434,0.06451
ACN,158858700000.0,20.290375,18.127222,5.198207,12.57,14.07,49.065,0.4558,1.29,1.41,-0.228454,3973979.0,2.313,0.11607


In [23]:
fundamentals.shape

(502, 14)

In [24]:
normalized_fundamentals = (fundamentals - fundamentals.mean()) / fundamentals.std()

In [None]:
normalized_fundamentals.to_csv("../input/sp500_2009_2025/fundamentals.csv")

In [54]:
fundamentals_df = pd.read_csv('../input/sp500_2009_2025/fundamentals.csv', index_col=0)
stocks = (fundamentals_df.index).to_list()
# Remove the Stock which doesn't have relevant data
stocks.remove('BF.B')

In [55]:
values = [
	yf.Ticker(stock).history(period="15y", actions=False) for stock in stocks
]

values

[                                 Open        High         Low       Close  \
 Date                                                                        
 2010-08-23 00:00:00-04:00   43.121715   43.808380   43.063162   43.158974   
 2010-08-24 00:00:00-04:00   42.690560   43.116403   42.301981   42.823635   
 2010-08-25 00:00:00-04:00   42.589401   43.169607   42.301957   42.983303   
 2010-08-26 00:00:00-04:00   43.254800   43.292059   42.317950   42.466995   
 2010-08-27 00:00:00-04:00   42.647954   43.164285   42.158235   43.116379   
 ...                               ...         ...         ...         ...   
 2025-08-14 00:00:00-04:00  159.600006  160.070007  156.630005  156.649994   
 2025-08-15 00:00:00-04:00  157.369995  157.369995  151.610001  152.389999   
 2025-08-18 00:00:00-04:00  152.360001  155.429993  152.190002  152.860001   
 2025-08-19 00:00:00-04:00  152.839996  155.720001  152.050003  154.339996   
 2025-08-20 00:00:00-04:00  154.119995  155.339996  153.169998  

In [56]:
sp500_df = pd.concat(values, keys=stocks)
sp500_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume
Unnamed: 0_level_1,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MMM,2010-08-23 00:00:00-04:00,43.121715,43.808380,43.063162,43.158974,3879106
MMM,2010-08-24 00:00:00-04:00,42.690560,43.116403,42.301981,42.823635,5353176
MMM,2010-08-25 00:00:00-04:00,42.589401,43.169607,42.301957,42.983303,4278212
MMM,2010-08-26 00:00:00-04:00,43.254800,43.292059,42.317950,42.466995,4278092
MMM,2010-08-27 00:00:00-04:00,42.647954,43.164285,42.158235,43.116379,4121296
...,...,...,...,...,...,...
ZTS,2025-08-14 00:00:00-04:00,151.570007,153.360001,151.169998,153.059998,2565100
ZTS,2025-08-15 00:00:00-04:00,153.720001,155.100006,152.979996,154.110001,3705700
ZTS,2025-08-18 00:00:00-04:00,154.000000,154.940002,151.600006,152.320007,2158500
ZTS,2025-08-19 00:00:00-04:00,152.919998,155.699997,152.509995,155.669998,2594100


In [57]:
sp500_df.isnull().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [58]:
sp500_df.to_csv('../input/sp500_2010_2025/sp500_prices_newversion.csv')