In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline
import plotly.express as px

# Importing the dataset and first review

In [None]:
#lets import the dataset and review the first five rows
df = pd.read_csv('../input/sp-500-companies-with-financial-information/financials.csv')
df.head()

In [None]:
#lets see the column names
df.columns

In [None]:
#lets see how many rows and columns the dataset has
df.shape

In [None]:
#Lets get some idea what inside the dataset

df.describe(include='all')

In [None]:
#lets check if all data is available or we have something NaN
print(df.isnull().sum())

In [None]:
#lets see these two rows where the Price/Earnings data is missing

df[df['Price/Earnings'].isnull()]

# Analysis

In [None]:
#how much each Sector has in Market Cap, rounded to 1 decimal point
dfsec = df.groupby(["Sector"]).sum().sort_values("Market Cap", ascending=False).head(20) 
dfsec = dfsec[["Market Cap"]].round(1) 
dfsec.reset_index(inplace=True) 
dfsec

In [None]:
#plotting the Market Cap by Sector

plt.figure(figsize = (15,10)) # width and height of figure is defined in inches
plt.title("Market cap by sector", fontsize=18) 
plt.bar(dfsec["Sector"], dfsec["Market Cap"],color= '#227d3d',edgecolor='yellow', linewidth = 1)
plt.xlabel("Sector",fontsize=15) # x axis shows the States
plt.ylabel("Market Cap",fontsize=15) # y axis shows the Revenue
plt.xticks(fontsize=12, rotation=90)
plt.yticks(fontsize=12)


We see that IT sector is the leader, followed by Financials and Health Care sector. Let's have a closer look on these three sectors.

In [None]:
#lets review EBITDA by Sector, on the second line we are rounding to 1 decimal point

dfebitda = df.groupby(["Sector"]).sum().sort_values("EBITDA", ascending=False).head() 
dfebitda = dfebitda[["EBITDA"]].round(1) 
dfebitda.reset_index(inplace=True) 
dfebitda

In [None]:
fig = px.pie(dfebitda, values='EBITDA', title='EBITDA by Sector (hover to see details)', hole=.3, hover_data=['Sector'], labels={'Sector'})
fig.show()

IT sector is leading by both Market Cap and EBITDA, Health Care is also in the same third place, while second place is now taken by Consumer Discretionary

# Splitting Sectors

Lets have a closer look at the dataset by sectors

In [None]:
#lets create three top sectors
dfit=df[df['Sector']=='Information Technology']
dffin=df[df['Sector']=='Financials']
dfhc=df[df['Sector']=='Health Care']

In [None]:
dfit.head(3)

In [None]:
dffin.head(3)

In [None]:
dfhc.head(3)

Lets review the 5 top companies by Price in each of the sectors

In [None]:
dfit=dfit.sort_values(by='Price',ascending=False).head(5)
dfit

In [None]:
dffin=dffin.sort_values(by='Price',ascending=False).head(5)
dffin

In [None]:
dfhc=dfhc.sort_values(by='Price',ascending=False).head(5)
dfhc

In [None]:
#we can also use "largest" function, lets try on health care sector

dfhc1 = dfhc.nlargest(5, ['Price'])

In [None]:
#this is how to add a title to the dataframe
dfhc2 = dfhc1[['Sector','Name','Price']]
dfhc2.style.set_table_attributes("style='display:inline'").set_caption('Top five companies by Price in the Health Care Sector')

In [None]:
#this is how to add a title to the dataframe
dfit = dfit[['Name','Price']]
dfit.style.set_table_attributes("style='display:inline'").set_caption('Top five companies by Price in the IT Sector')

In [None]:
#this is how to add a title to the dataframe
dffin = dffin[['Name','Price']]
dffin.style.set_table_attributes("style='display:inline'").set_caption('Top five companies by Price in the Financials Sector')

# Plots

In [None]:
sns.histplot(data=df['Price'],bins=50, kde=False)

In [None]:
sns.scatterplot(data=df['Price'])

In [None]:
sns.histplot(data=df['Earnings/Share'],bins=10,kde=False)