## Analyzing the Performance of the Dow Jones Industrial Average

The main goal of this project is to analyze the historical performance of the DJIA and make informed predictions or conclusions about its future trends.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline 
plt.style.use('seaborn')

import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

In [2]:
url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'

dataframe = pd.read_html(url)[1]

dataframe

Unnamed: 0,Company,Exchange,Symbol,Industry,Date added,Notes,Index weighting
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.41%
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.02%
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,5.48%
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,2.84%
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.36%
5,Caterpillar,NYSE,CAT,Construction and mining,1991-05-06,,4.52%
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.50%
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,0.96%
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.22%
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,1.89%


In [3]:
dataframe.rename(columns = {'Date added':'Date_Added'}, inplace = True)
dataframe.rename(columns = {'Index weighting': 'Weights'}, inplace = True)

In [4]:
dataframe

Unnamed: 0,Company,Exchange,Symbol,Industry,Date_Added,Notes,Weights
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.41%
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.02%
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,5.48%
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,2.84%
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.36%
5,Caterpillar,NYSE,CAT,Construction and mining,1991-05-06,,4.52%
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.50%
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,0.96%
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.22%
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,1.89%


In [5]:
dataframe.Date_Added = pd.to_datetime(dataframe.Date_Added)

In [6]:
dataframe.Weights= pd.to_numeric(dataframe.Weights.str.replace('%', ''))

In [7]:
dataframe

Unnamed: 0,Company,Exchange,Symbol,Industry,Date_Added,Notes,Weights
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.41
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.02
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,5.48
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,2.84
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.36
5,Caterpillar,NYSE,CAT,Construction and mining,1991-05-06,,4.52
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.5
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,0.96
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.22
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,1.89


In [8]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Company     30 non-null     object        
 1   Exchange    30 non-null     object        
 2   Symbol      30 non-null     object        
 3   Industry    30 non-null     object        
 4   Date_Added  30 non-null     datetime64[ns]
 5   Notes       5 non-null      object        
 6   Weights     30 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(5)
memory usage: 1.8+ KB


In [9]:
dataframe.columns

Index(['Company', 'Exchange', 'Symbol', 'Industry', 'Date_Added', 'Notes',
       'Weights'],
      dtype='object')

In [10]:
dataframe.drop(columns = 'Notes', inplace = True)

In [11]:
dataframe.head()

Unnamed: 0,Company,Exchange,Symbol,Industry,Date_Added,Weights
0,3M,NYSE,MMM,Conglomerate,1976-08-09,2.41
1,American Express,NYSE,AXP,Financial services,1982-08-30,3.02
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,5.48
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,2.84
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,3.36


In [12]:
dataframe.set_index('Symbol', inplace = True)

In [13]:
dataframe

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41
AXP,American Express,NYSE,Financial services,1982-08-30,3.02
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89


In [14]:
symbols = dataframe.index.to_list()
symbols

['MMM',
 'AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CVX',
 'CSCO',
 'KO',
 'DIS',
 'DOW',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'MCD',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'CRM',
 'TRV',
 'UNH',
 'VZ',
 'V',
 'WBA',
 'WMT']

In [18]:
last_update = dataframe['Date_Added'].max()
last_update

Timestamp('2020-08-31 00:00:00')

In [19]:
time_series = yf.download(tickers = symbols, start = last_update)
time_series

[*********************100%%**********************]  30 of 30 completed


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-08-31,126.748962,230.460190,97.282425,171.820007,132.653931,272.649994,38.233528,73.758080,131.869995,39.024300,...,7929273,28774200,3519800,5596800,2297900,4341600,9326900,14555500,7940100,15078800
2020-09-01,131.797699,228.222168,98.125122,172.100006,136.140167,281.250000,38.061459,73.011078,133.550003,40.883835,...,6797433,25725500,4912400,5671800,969900,2487100,5642900,13358400,10728300,35599400
2020-09-02,129.067032,234.827026,99.973297,174.779999,139.141678,276.690002,38.414639,73.107765,135.389999,42.570393,...,11571597,34080800,6775400,8384800,1391400,2846500,9863800,21711900,7672400,17222000
2020-09-03,118.733818,225.538376,100.030739,168.770004,136.801971,265.010010,37.083447,72.308044,133.240005,41.852524,...,12983672,58400300,7264400,7277900,1299400,3872500,11310200,23126800,8278000,16005000
2020-09-04,118.812424,225.984161,101.189438,171.050003,138.125641,254.699997,36.965721,72.000465,131.990005,42.034149,...,9359688,59664100,5157600,7437700,1460700,2901000,13243600,22250100,6994400,11327400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-23,173.000000,273.059998,144.589996,181.029999,247.320007,202.000000,52.200001,160.679993,83.099998,48.240002,...,8493000,24374700,5966100,5981800,1503500,2357700,5071700,31725800,19921100,5699000
2023-10-24,173.440002,276.119995,144.419998,182.360001,249.550003,204.220001,53.009998,156.649994,82.559998,49.240002,...,6347900,31153600,9252200,6322000,2201800,1978200,6844100,61617900,10157700,5845000
2023-10-25,171.100006,272.160004,143.520004,177.729996,244.940002,197.059998,52.400002,155.869995,80.589996,47.540001,...,7849200,55053800,7054700,6172900,2083800,2380100,10365300,30832800,10408400,5540600
2023-10-26,166.889999,269.709991,143.339996,179.089996,242.429993,196.250000,51.369999,154.750000,79.779999,48.020000,...,11335700,37828500,11222600,7582100,1972300,2675800,6414500,34367000,8832500,5406700


In [21]:
time_series.Close

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-31,129.039993,253.320007,101.589996,171.820007,142.309998,272.649994,42.220001,83.930000,131.869995,45.119999,...,81.364502,225.529999,111.889999,138.330002,116.040001,312.549988,211.990005,59.270000,38.020000,138.850006
2020-09-01,134.179993,250.860001,102.470001,172.100006,146.050003,281.250000,42.029999,83.080002,133.550003,47.270000,...,80.562981,227.270004,114.839996,138.179993,114.690002,312.829987,213.350006,59.160000,36.759998,147.589996
2020-09-02,131.399994,258.119995,104.400002,174.779999,149.270004,276.690002,42.419998,83.190002,135.389999,49.220001,...,82.948471,231.649994,116.800003,140.509995,117.480003,320.239990,216.479996,60.529999,37.169998,147.679993
2020-09-03,120.879997,247.910004,104.459999,168.770004,146.759995,265.010010,40.950001,82.279999,133.240005,48.389999,...,81.574425,217.300003,112.849998,138.259995,115.349998,316.230011,208.960007,60.610001,37.090000,144.539993
2020-09-04,120.959999,248.399994,105.669998,171.050003,148.179993,254.699997,40.820000,81.930000,131.990005,48.599998,...,81.335876,214.250000,112.400002,137.960007,116.570000,312.000000,204.660004,60.480000,36.889999,142.830002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-23,173.000000,273.059998,144.589996,181.029999,247.320007,202.000000,52.200001,160.679993,83.099998,48.240002,...,103.349998,329.320007,102.809998,148.149994,159.190002,521.570007,231.529999,31.389999,21.959999,161.009995
2023-10-24,173.440002,276.119995,144.419998,182.360001,249.550003,204.220001,53.009998,156.649994,82.559998,49.240002,...,103.029999,330.529999,105.180000,149.899994,157.919998,525.000000,234.649994,34.299999,21.370001,163.250000
2023-10-25,171.100006,272.160004,143.520004,177.729996,244.940002,197.059998,52.400002,155.869995,80.589996,47.540001,...,103.629997,340.670013,103.540001,150.600006,164.729996,530.210022,236.850006,33.689999,21.670000,162.759995
2023-10-26,166.889999,269.709991,143.339996,179.089996,242.429993,196.250000,51.369999,154.750000,79.779999,48.020000,...,105.550003,327.890015,100.019997,149.800003,164.460007,528.359985,231.279999,34.470001,21.700001,161.770004


In [22]:
time_series.dropna(inplace = True)

In [23]:
performance = time_series.Close.iloc[-1].div(time_series.Close.iloc[0]).sub(1).sort_values(ascending = False)
performance

CVX     0.719886
UNH     0.678643
CAT     0.676340
MSFT    0.462378
GS      0.415093
AXP     0.390983
TRV     0.384695
JPM     0.354327
AAPL    0.303627
MRK     0.263696
CSCO    0.221222
IBM     0.208952
MCD     0.197827
WMT     0.160749
KO      0.115284
V       0.081513
HON     0.069163
DOW     0.066268
PG      0.063833
BA      0.045804
AMGN    0.032568
HD     -0.030101
JNJ    -0.050909
NKE    -0.124318
CRM    -0.279039
INTC   -0.302453
DIS    -0.398423
VZ     -0.435802
WBA    -0.443977
MMM    -0.463133
dtype: float64

In [24]:
performance.index.name = 'Symbol'

In [25]:
performance

Symbol
CVX     0.719886
UNH     0.678643
CAT     0.676340
MSFT    0.462378
GS      0.415093
AXP     0.390983
TRV     0.384695
JPM     0.354327
AAPL    0.303627
MRK     0.263696
CSCO    0.221222
IBM     0.208952
MCD     0.197827
WMT     0.160749
KO      0.115284
V       0.081513
HON     0.069163
DOW     0.066268
PG      0.063833
BA      0.045804
AMGN    0.032568
HD     -0.030101
JNJ    -0.050909
NKE    -0.124318
CRM    -0.279039
INTC   -0.302453
DIS    -0.398423
VZ     -0.435802
WBA    -0.443977
MMM    -0.463133
dtype: float64

In [26]:
dataframe

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41
AXP,American Express,NYSE,Financial services,1982-08-30,3.02
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89


In [27]:
dataframe['performance'] = performance

In [28]:
dataframe

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights,performance
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41,-0.463133
AXP,American Express,NYSE,Financial services,1982-08-30,3.02,0.390983
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48,0.032568
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84,0.303627
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36,0.045804
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52,0.67634
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5,0.719886
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96,0.221222
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22,0.115284
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89,-0.398423


In [29]:
dataframe.sort_values(by = 'performance', ascending = False)

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights,performance
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5,0.719886
UNH,UnitedHealth Group,NYSE,Managed health care,2012-09-24,10.29,0.678643
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52,0.67634
MSFT,Microsoft,NASDAQ,Information technology,1999-11-01,4.88,0.462378
GS,Goldman Sachs,NYSE,Financial services,2019-04-02,7.36,0.415093
AXP,American Express,NYSE,Financial services,1982-08-30,3.02,0.390983
TRV,Travelers,NYSE,Insurance,2009-06-08,3.62,0.384695
JPM,JPMorgan Chase,NYSE,Financial services,1991-05-06,2.61,0.354327
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84,0.303627
MRK,Merck,NYSE,Pharmaceutical industry,1979-06-29,2.1,0.263696


This project will provide you with a comprehensive understanding of the DJIA's historical performance, help you develop data analysis and modeling skills, and potentially offer insights for making informed investment decisions.





