# Some important information about data frame variables 

Price/Earnings - financial indicator equal to the ratio of the market value of a share to the annual earnings per share.

Dividend yield is the ratio of the annual dividend per share to the share price. This value is most often expressed as a percentage.

Earnings/Share - financial indicator equal to the ratio of the company's net profit available for distribution to the average annual number of ordinary shares.

EBITDA is an analytical indicator equal to the amount of profit before deduction of expenses on payment of interest, taxes, depreciation and amortization.

Price/Sales - financial indicator equal to the ratio of a company's market capitalization to its annual revenue.

Price/book - financial ratio equal to the ratio of the company's current market capitalization to its book value.


# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline

from operator import itemgetter
from collections import defaultdict

In [None]:
df = pd.read_csv('../input/sp-500-companies-with-financial-information/financials.csv')
df.head()

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.describe(include='all')

In [None]:
df.isnull().sum()

In [None]:
# numpy arrays for creating dictionaries for various kinds of analysis
sector = np.asarray(df['Sector'])
price = np.asarray(df['Price'])
company = np.asarray(df['Name'])
market_cap = np.asarray(df['Market Cap'])
dividend_yield = np.asarray(df['Dividend Yield'])
price_sales = np.asarray(df['Price/Sales'])
price_earnings = np.asarray(df['Price/Earnings'])
EBITDA = np.asarray(df['EBITDA'])

In [None]:
# Creating dictionaries
company_sectors = dict(zip(company,sector))
copmany_prices = dict(zip(company,price))
company_market_cap = dict(zip(company,market_cap))
company_dividend_yields = dict(zip(company,dividend_yield))
company_price_sales = dict(zip(company,price_sales))
company_price_earnings = dict(zip(company,price_earnings))
company_ebitda = dict(zip(company, EBITDA))

In [None]:
# Determining number of each company in each sector
res = defaultdict(int)
for key, val in company_sectors.items():
    res[val] += 1

sectors = dict(res)
pd.DataFrame(sectors.items(), columns=['Sector', 'Company'])

### Pie Charts 

In [None]:
sector_names = list(sectors)
sector_values = list(sectors.values()) #[67, 61, 70, 84, 28, 68, 25, 33, 34, 32, 3]

pie, ax = plt.subplots(figsize=[15,10])
plt.pie(x=sector_values, autopct="%.1f%%", explode=[0.05]*len(sector_values), labels=sector_names, pctdistance=0.5)
plt.title("S&P 500 sector breakdown (by July 2020)", fontsize=14);
pie.savefig("DeliveryPieChart.png")

In [None]:
# Determining market capitalization of each sector
dfsec = df.groupby(["Sector"]).sum().sort_values("Market Cap", ascending=False) 
dfsec = dfsec[["Market Cap"]].round(1) 
dfsec.reset_index(inplace=True) 
dfsec

In [None]:
sector_names = list(dfsec['Sector'])
sector_values = list(dfsec['Market Cap'])

pie, ax = plt.subplots(figsize=[15,10])
plt.pie(x=sector_values, autopct="%.1f%%", explode=[0.07]*len(sector_values), labels=sector_names, pctdistance=0.5)
plt.title("Market capitalization structure of each sector (by July 2020)", fontsize=14);
pie.savefig("DeliveryPieChart.png")

## Bar plots and scatter plots (determining top-10 companies according to various types of criteries)

In [None]:
# Function for determining the belonging of leading companies to each sector
def defining_sectors(top_companies_dict):
    L1 = list(top_companies_dict)
    L2 = list(top_companies_dict.values())
    S = []
    for l in L1:
        sec = company_sectors[l]
        S.append(sec)
    
    data = pd.DataFrame({'Company':L1, 'Sector':S, 'Value':L2})
    return data 

## Top 10 companies by market capitalization

In [None]:
N = 10
top_10_by_market_cap = dict(sorted(company_market_cap.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_market_cap)
y_axis = list(top_10_by_market_cap.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="crimson")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Market capitalization",fontsize=18)
plt.title("Top 10 Companies (By Market capitalization)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_market_cap)

## Top 50 companies by market capitalization

In [None]:
N = 50
top_20_by_market_cap = dict(sorted(company_market_cap.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_20_by_market_cap.items(),columns=['Name', 'Market Cap'])
fig = px.scatter(data, x="Name", y="Market Cap", color="Name", size='Market Cap')
fig.show()

## Top 10 companies by stock price values

In [None]:
N = 10
top_10_by_price_values = dict(sorted(copmany_prices.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_price_values)
y_axis = list(top_10_by_price_values.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="darkcyan")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Price Values",fontsize=18)
plt.title("Top 10 Companies (By Stock Price Values)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_price_values)

## Top 100 companies by stock price values

In [None]:
N = 100
top_100_by_price_values = dict(sorted(copmany_prices.items(), key = itemgetter(1), reverse = True)[:N])
res
data = pd.DataFrame(top_100_by_price_values.items(),columns=['Name', 'Price Value'])
fig = px.scatter(data, x="Name", y="Price Value", color="Name", size='Price Value')
fig.show()

## Top 10 companies by dividend yields

In [None]:
N = 10
top_10_by_dividend_yield = dict(sorted(company_dividend_yields.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_dividend_yield)
y_axis = list(top_10_by_dividend_yield.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="darkorange")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Dividend Yields",fontsize=18)
plt.title("Top 10 Companies (By Dividend Yields)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_dividend_yield)

## Top 100 companies by divident yields

In [None]:
N = 100
top_100_by_dividend_yield = dict(sorted(company_dividend_yields.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_100_by_dividend_yield.items(),columns=['Company', 'Dividend Yield'])
fig = px.scatter(data, x="Company", y="Dividend Yield", color="Company", size='Dividend Yield')
fig.show()

## Top 10 companies by price/sales

In [None]:
N = 10
top_10_by_price_sales = dict(sorted(company_price_sales.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_price_sales)
y_axis = list(top_10_by_price_sales.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="springgreen")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Price sales",fontsize=18)
plt.title("Top 10 Companies (Price/sales)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_price_sales)

## Top 50 companies by price/sales

In [None]:
N = 50
top_100_by_price_sales = dict(sorted(company_price_sales.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_100_by_price_sales.items(),columns=['Company', 'Price/Sales'])
fig = px.scatter(data, x="Company", y="Price/Sales", color="Company", size='Price/Sales')
fig.show()

## Top 10 companies by price/earnings

In [None]:
N = 10
top_10_by_price_earnings = dict(sorted(company_price_earnings.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_price_earnings)
y_axis = list(top_10_by_price_earnings.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="deepskyblue")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Price Earnings",fontsize=18)
plt.title("Top 10 Companies (Price/Earnings)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_price_earnings)

## Top 50 companies by price earnings

In [None]:
N = 50
top_50_by_price_earnings = dict(sorted(company_price_earnings.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_50_by_price_earnings.items(),columns=['Company', 'Price/Earnings'])
fig = px.scatter(data, x="Company", y="Price/Earnings", color="Company", size='Price/Earnings')
fig.show()

## Top 10 companies by EBITDA

In [None]:
N = 10
top_10_by_EBITDA = dict(sorted(company_ebitda.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_by_EBITDA)
y_axis = list(top_10_by_EBITDA.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="cyan")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("EBITDA",fontsize=18)
plt.title("Top 10 Companies (EBITDA)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
defining_sectors(top_10_by_EBITDA)

## Top 50 companies by EBITDA

In [None]:
N = 50
top_100_ebitda = dict(sorted(company_ebitda.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_100_ebitda.items(),columns=['Company', 'EBITDA'])
fig = px.scatter(data, x="Company", y="EBITDA", color="Company", size='EBITDA')
fig.show()

In [None]:
# Creatiing new data frame to define difference of high and low values of price in 52 weeks

difference = np.asarray(df['52 Week Low'] - df['52 Week High']) # range of stock price in 52 weeks (max - min)
Low = np.asarray(df['52 Week Low'])
High = np.asarray(df['52 Week High'])

new_df = pd.DataFrame({'Company':company,
                        'Sector':sector,
                        'Price':price,
                        '52 Week Low':Low,
                        '52 Week High':High,
                        'Stock Growth':difference})

new_df.head()

## Top 10 companies in the growth of stock quotes in 52 weeks

In [None]:
new_df = new_df.groupby(["Company","Sector"]).sum().sort_values("Stock Growth", ascending=False) 
new_df = new_df[["Stock Growth"]].round(1) 
new_df.reset_index(inplace=True) 
new_df.head(10)

In [None]:
arr1, arr2 = np.asarray(new_df['Company']), np.asarray(new_df['Stock Growth'])
D = dict(zip(arr1,arr2))

N = 10
top_10_stock_grouth = dict(sorted(D.items(), key = itemgetter(1), reverse = True)[:N])

f = plt.figure(figsize=(10,5))
f.add_subplot(111)

x_axis = list(top_10_stock_grouth)
y_axis = list(top_10_stock_grouth.values())

plt.axes(axisbelow=True)
plt.barh(x_axis, y_axis, color="brown")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Companies",fontsize=18)
plt.title("Top 10 companies by stock price growth in 52 weeks",fontsize=20)
plt.grid(alpha=0.3)

## Top 50 companies by stock price growth in 52 weeks

In [None]:
N = 50

top_50_stock_grouth = dict(sorted(D.items(), key = itemgetter(1), reverse = True)[:N])
data = pd.DataFrame(top_50_stock_grouth.items(),columns=['Company', 'Stock Grouth'])
fig = px.scatter(data, x="Company", y="Stock Grouth", color="Company", size='Stock Grouth')
fig.show()

## Defining a new type of variables using financial formulas and adding them to our dataframe

In [None]:
df['Annual earnings per share'] = df['Price']/df['Price/Earnings']
df['Number of Shares Outstanding'] = df['Market Cap']/df['Price']
df['Net Profit'] = df['Number of Shares Outstanding'] * df['Earnings/Share']
df['Stock Grouth'] = df['52 Week Low'] - df['52 Week High']
df['Book Value'] = df['Price/Book'] * df['Market Cap']
df['Annual Revenue'] = df['Price/Sales'] * df['Market Cap']
df = df.drop(columns='SEC Filings')
df.head()

In [None]:
# Correlation analysis of our new data frame
df.iloc[:,:].corr().style.background_gradient(cmap='Purples').format("{:.3f}")