## Downloads all Stock Data from a Given Index for 5 Years

In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

# pip install numpy
# conda install -c anaconda pandas
# conda install -c conda-forge matplotlib

import datetime as dt # For defining dates

import time

# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

# New Imports
# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

import warnings
warnings.simplefilter("ignore")

## Constants

In [26]:
PATH = "/Users/oppoudel/dev/Python4Finance/Stocks/"

# Start end date defaults
S_DATE = "2017-12-12"
E_DATE = "2022-12-10"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

## Holds Stocks Not Downloaded

In [42]:
stocks_not_downloaded = []
missing_stocks = []

## Function that Returns a Stock Dataframe from a CSV

In [12]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

## Returns a Named Columns Data from a CSV

In [3]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

## Get Stock Tickers

In [45]:
# Total of 3482 tickers
tickers = get_column_from_csv("D:/Python for Finance/Wilshire-5000-Stocks.csv", "Ticker")
tickers

# for x in tickers:
#     print(x, end=", ")

print(len(tickers))

3481


## Function that Saves Stock Data to CSV

In [4]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(folder, ticker):
    
    stock = yf.Ticker(ticker)
    
    try:
        print("Get Data for : ", ticker)
        # Get historical closing price data
        df = stock.history(period="5y")
    
        # Wait 2 seconds
        time.sleep(2)
        
        if df.empty:
            stocks_not_downloaded.append(ticker)
        
        # Remove the period for saving the file name
        # Save data to a CSV file
        # File to save to 
        the_file = folder + ticker.replace(".", "_") + '.csv'
        print(the_file, " Saved")
        df.to_csv(the_file)
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

## Get 5 Years of Data for the 1st 20 Stocks

In [47]:
# # Folder used to store stock data
# folder = "D:/Python for Finance/Wilshire_Stocks/"

# for x in range(20):
#   save_to_csv_from_yahoo(folder, tickers[x])
# print("Finished")

## Stocks Not Downloaded

In [48]:
# stocks_not_downloaded

## Get Next 80 Stocks

In [49]:
# for x in range(20, 100):
#   save_to_csv_from_yahoo(folder, tickers[x])
# print("Finished")
# stocks_not_downloaded

## Get Rest of Stocks

In [50]:
# for x in range(3001, 3481):
#   save_to_csv_from_yahoo(folder, tickers[x])
# print("Finished")
# stocks_not_downloaded

## Delete Empty Files

In [51]:
try:
  for x in missing_stocks:
    os.remove("/Users/oppoudel/dev/Python4Finance/Stocks/" + x + ".csv")
except FileNotFoundError:
  print("Couldn't Find " + x)

print("Finished")

Finished


## Are FAANG Stocks Worth Investing in?

In [5]:
# Download just those stocks were are working with to make them up to date
# Facebook, Amazon, Apple, Netflix, Google
tickers = ["FB", "AMZN", "AAPL", "NFLX", "GOOG"]

for i in tickers:
    save_to_csv_from_yahoo("/Users/oppoudel/dev/Python4Finance/Stocks/", i)
    print("Finished " + i)

Get Data for :  FB
/Users/oppoudel/dev/Python4Finance/Stocks/FB.csv  Saved
Finished FB
Get Data for :  AMZN
/Users/oppoudel/dev/Python4Finance/Stocks/AMZN.csv  Saved
Finished AMZN
Get Data for :  AAPL
/Users/oppoudel/dev/Python4Finance/Stocks/AAPL.csv  Saved
Finished AAPL
Get Data for :  NFLX
/Users/oppoudel/dev/Python4Finance/Stocks/NFLX.csv  Saved
Finished NFLX
Get Data for :  GOOG
/Users/oppoudel/dev/Python4Finance/Stocks/GOOG.csv  Saved
Finished GOOG


In [16]:
fig = go.Figure()

# Get the dataframe with all FB's data
fb_df = get_stock_df_from_csv("FB")

# fb_df.head()

# Get closing price for the rest
amzn_df = get_stock_df_from_csv("AMZN")
aapl_df = get_stock_df_from_csv("AAPL")
nflx_df = get_stock_df_from_csv("NFLX")
goog_df = get_stock_df_from_csv("GOOG")

amzn_df

fb_plot = go.Scatter(x=fb_df.index, y=fb_df['Close'], name="Facebook")
amzn_plot = go.Scatter(x=amzn_df.index, y=amzn_df['Close'], name="Amazon")
aapl_plot = go.Scatter(x=aapl_df.index, y=aapl_df['Close'], name="Apple")
nflx_plot = go.Scatter(x=nflx_df.index, y=nflx_df['Close'], name="Netflix")
goog_plot = go.Scatter(x=goog_df.index, y=goog_df['Close'], name="Google")

# Plot price changes
fig.add_trace(fb_plot)
fig.add_trace(amzn_plot)
fig.add_trace(aapl_plot)
fig.add_trace(nflx_plot)
fig.add_trace(goog_plot)

fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=800, width=1300, 
                  showlegend=True)
fig.show()

# This data isn't useful for our purposes because the scales are different
# We must calculate the daily returns for these stocks to get data we can
# work with



## Daily Returns

For single stocks to find the daily return we subtract opening price from the closing price. Then you could multiply by the number of shares owned.

We calculate a percentage rate of return for each day to compare investments.
Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1

In [17]:
# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    # Save data to a CSV file
    df.to_csv(PATH + ticker + '.csv')
    return df  

In [18]:
add_daily_return_to_df(fb_df, "FB")
add_daily_return_to_df(amzn_df, "AMZN")
add_daily_return_to_df(aapl_df, "AAPL")
add_daily_return_to_df(nflx_df, "NFLX")
add_daily_return_to_df(goog_df, "GOOG")
goog_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,daily_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-04-24,851.200012,863.450012,849.859985,862.76001,1372500,0,0,
2017-04-25,865.0,875.0,862.809998,872.299988,1672000,0,0,0.011058
2017-04-26,874.22998,876.049988,867.747986,871.72998,1237200,0,0,-0.000653
2017-04-27,873.599976,875.400024,870.380005,874.25,2026800,0,0,0.002891
2017-04-28,910.659973,916.849976,905.77002,905.960022,3276300,0,0,0.036271


## Get Cumulative Return

In [19]:
def add_cum_return_to_df(df, ticker):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    df.to_csv(PATH + ticker + '.csv')
    return df

In [20]:
add_cum_return_to_df(fb_df, "FB")
add_cum_return_to_df(amzn_df, "AMZN")
add_cum_return_to_df(aapl_df, "AAPL")
add_cum_return_to_df(nflx_df, "NFLX")
add_cum_return_to_df(goog_df, "GOOG")

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,daily_return,cum_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-04-24,851.200012,863.450012,849.859985,862.760010,1372500,0,0,,
2017-04-25,865.000000,875.000000,862.809998,872.299988,1672000,0,0,0.011058,1.011058
2017-04-26,874.229980,876.049988,867.747986,871.729980,1237200,0,0,-0.000653,1.010397
2017-04-27,873.599976,875.400024,870.380005,874.250000,2026800,0,0,0.002891,1.013318
2017-04-28,910.659973,916.849976,905.770020,905.960022,3276300,0,0,0.036271,1.050072
...,...,...,...,...,...,...,...,...,...
2022-04-14,2612.989990,2614.205078,2542.229980,2545.060059,1171400,0,0,-0.023280,2.949905
2022-04-18,2548.199951,2574.239990,2531.569092,2559.219971,745900,0,0,0.005564,2.966317
2022-04-19,2561.540039,2618.074951,2549.030029,2610.620117,1136000,0,0,0.020084,3.025894
2022-04-20,2625.679932,2638.469971,2557.881104,2564.909912,1130500,0,0,-0.017509,2.972912


## Merge Multiple Stocks in One Dataframe by Column Name

In [27]:
def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    # Will hold data for all dataframes with the same column name
    mult_df = pd.DataFrame()
    
    for x in tickers:
        df = get_stock_df_from_csv(x)
        
        # NEW Check if your dataframe has duplicate indexes
        if not df.index.is_unique:
            # Delete duplicates 
            df = df.loc[~df.index.duplicated(), :]
        
        mask = (df.index >= sdate) & (df.index <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

In [28]:
mult_df = merge_df_by_column_name('cum_return',  S_DATE, 
                                  E_DATE, *tickers)
mult_df

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-12-12,1.216471,1.283962,1.209328,1.291316,1.205990
2017-12-13,1.225682,1.282915,1.213343,1.306125,1.206141
2017-12-14,1.226301,1.294079,1.212991,1.317945,1.216039
2017-12-15,1.238606,1.299457,1.225317,1.321838,1.233472
2017-12-18,1.243005,1.312064,1.242573,1.323924,1.248482
...,...,...,...,...,...
2022-04-14,1.444834,3.343726,4.875123,2.371758,2.949905
2022-04-18,1.448890,3.367497,4.868635,2.349023,2.966317
2022-04-19,1.493847,3.484985,4.937356,2.423764,3.025894
2022-04-20,1.377741,3.394232,4.932342,1.572620,2.972912


In [29]:
# Plot out cumulative returns on $1 in each stock since beginning of 2017
fig = px.line(mult_df, x=mult_df.index, y=mult_df.columns)
fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=800, width=1300, 
                  showlegend=True)
fig.show()

## Create a Price / Volume Chart

In [30]:
fig = go.Figure()
nflx_plot = go.Scatter(x=nflx_df.index, y=nflx_df['Close'], name="Netflix")

# Plot price changes
fig.add_trace(nflx_plot)

# Plot volume as bar graph
fig.add_trace(go.Bar(x=nflx_df.index, y=nflx_df['Volume']/200000, name='Volume Traded'))

fig.update_xaxes(title="Date", rangeslider_visible=True)
fig.update_yaxes(title="Price")
fig.update_layout(height=800, width=1300, 
                  showlegend=True)
fig.show()


## Adding Bollinger Bands

Bollinger Bands plot 2 lines using a moving average and the standard deviation defines how far apart the lines are. They also are used to define if prices are to high or low. When bands tighten it is believed a sharp price move in some direction. Prices tend to bounce off of the bands which provides potential market actions.

A strong trend should be noted if the price moves outside the band. If prices go over the resistance line it is in overbought territory and if it breaks through support it is a sign of an oversold position.

You normally use 20 sessions when using them.

In [62]:
# Here we will add a middle band (20 days), upper band (20 days + 1.96 std),
# and lower band (20 days - 1.96 std)

In [31]:
# Here we will add a middle band (20 days), upper band (20 days + 1.96 std),
# and lower band (20 days - 1.96 std)
def add_bollinger_bands(df):
    df['middle_band'] = df['Close'].rolling(window=20).mean()
    df['upper_band'] = df['middle_band'] + 1.96 * df['Close'].rolling(window=20).std()
    df['lower_band'] = df['middle_band'] - 1.96 * df['Close'].rolling(window=20).std()

In [32]:
stk_dfs = [fb_df, amzn_df, aapl_df, nflx_df, goog_df]

In [33]:
for x in stk_dfs:
    add_bollinger_bands(x)

In [34]:
fb_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,daily_return,cum_return,middle_band,upper_band,lower_band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-04-24,144.960007,145.669998,144.339996,145.470001,14407200,0,0,,,,,
2017-04-25,145.789993,147.149994,145.789993,146.490005,17767500,0,0,0.007012,1.007012,,,
2017-04-26,147.089996,147.589996,146.089996,146.559998,12395000,0,0,0.000478,1.007493,,,
2017-04-27,146.669998,147.750000,146.139999,147.699997,11275100,0,0,0.007778,1.015330,,,
2017-04-28,149.500000,151.529999,149.070007,150.250000,30652200,0,0,0.017265,1.032859,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-14,214.889999,214.990005,210.000000,210.179993,18332700,0,0,-0.022373,1.444834,220.903502,234.038276,207.768728
2022-04-18,210.000000,213.380005,208.300003,210.770004,16580800,0,0,0.002807,1.448890,220.617502,234.365791,206.869212
2022-04-19,210.520004,218.429993,209.000000,217.309998,20089900,0,0,0.031029,1.493847,220.908501,234.100933,207.716070
2022-04-20,213.610001,214.160004,198.919998,200.419998,43036500,0,0,-0.077723,1.377741,220.097002,235.989939,204.204064


## Plot with Bollinger Bands

In [38]:
def plot_with_boll_bands(ticker):
    save_to_csv_from_yahoo("/Users/oppoudel/dev/Python4Finance/Stocks/", ticker)

    df = get_stock_df_from_csv(ticker)

    add_bollinger_bands(df)
    
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
    high=df['High'], low=df['Low'],
    close=df['Close'], name="Candlestick")

    upper_line = go.Scatter(x=df.index, y=df['upper_band'], 
    line=dict(color='rgba(250, 0, 0, 0.75)', 
    width=1), name="Upper Band")

    mid_line = go.Scatter(x=df.index, y=df['middle_band'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=0.7), name="Middle Band")

    lower_line = go.Scatter(x=df.index, y=df['lower_band'], 
    line=dict(color='rgba(0, 250, 0, 0.75)', 
    width=1), name="Lower Band")

    fig.add_trace(candle)
    fig.add_trace(upper_line)
    fig.add_trace(mid_line)
    fig.add_trace(lower_line)

    fig.update_xaxes(title="Date", rangeslider_visible=True)
    fig.update_yaxes(title="Price")
    fig.update_layout(title=ticker + " Bollinger Bands", 
    height=800, width=1300, showlegend=True)
    fig.show()

In [39]:
plot_with_boll_bands("AMD")

Get Data for :  AMD
/Users/oppoudel/dev/Python4Finance/Stocks/AMD.csv  Saved


## Support & Resistance

We normally use multiple moving averages to develop our support and resistance lines. 50 day for medium, 100 for long and 200 for very long terms are commonly used.

## Download S&P Data for 5 years

In [40]:
save_to_csv_from_yahoo(PATH, "^GSPC")
gspc_df = get_stock_df_from_csv("^GSPC")
gspc_df

Get Data for :  ^GSPC
/Users/oppoudel/dev/Python4Finance/Stocks/^GSPC.csv  Saved


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-04-24,2370.330078,2376.979980,2369.189941,2374.149902,3690650000,0,0
2017-04-25,2381.510010,2392.479980,2381.149902,2388.610107,3995240000,0,0
2017-04-26,2388.979980,2398.159912,2386.780029,2387.449951,4105920000,0,0
2017-04-27,2389.699951,2392.100098,2382.679932,2388.770020,4098460000,0,0
2017-04-28,2393.679932,2393.679932,2382.360107,2384.199951,3718270000,0,0
...,...,...,...,...,...,...,...
2022-04-14,4449.120117,4460.459961,4390.770020,4392.589844,3634740000,0,0
2022-04-18,4385.629883,4410.310059,4370.299805,4391.689941,3509340000,0,0
2022-04-19,4390.629883,4471.029785,4390.629883,4462.209961,3197930000,0,0
2022-04-20,4472.259766,4488.290039,4448.759766,4459.450195,3678040000,0,0


## Calculate Moving Averages

Moving averages are used to mitigate short term flucuations in a stock price. We create them by calculating the mean of a set of prices over a specified number of time periods. The Simple moving average (SMA) is just a simple mean. An Exponential Moving Average (EMA) is a weighted average that put more emphasis on more recent data.

In [41]:
# SMA
gspc_df['MA50'] = gspc_df['Close'].rolling(50).mean()
gspc_df['MA100'] = gspc_df['Close'].rolling(100).mean()

# EMA If we set adjust to False the weighted function is calculated recursively
gspc_df['EMA20'] = gspc_df['Close'].ewm(span=20, adjust=False).mean()
gspc_df['EMA50'] = gspc_df['Close'].ewm(span=50, adjust=False).mean()
gspc_df['EMA100'] = gspc_df['Close'].ewm(span=100, adjust=False).mean()

gspc_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,MA50,MA100,EMA20,EMA50,EMA100
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-04-24,2370.330078,2376.979980,2369.189941,2374.149902,3690650000,0,0,,,2374.149902,2374.149902,2374.149902
2017-04-25,2381.510010,2392.479980,2381.149902,2388.610107,3995240000,0,0,,,2375.527065,2374.716969,2374.436243
2017-04-26,2388.979980,2398.159912,2386.780029,2387.449951,4105920000,0,0,,,2376.662578,2375.216302,2374.693940
2017-04-27,2389.699951,2392.100098,2382.679932,2388.770020,4098460000,0,0,,,2377.815667,2375.747820,2374.972674
2017-04-28,2393.679932,2393.679932,2382.360107,2384.199951,3718270000,0,0,,,2378.423694,2376.079276,2375.155393
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-14,4449.120117,4460.459961,4390.770020,4392.589844,3634740000,0,0,4418.384600,4522.684502,4461.339525,4458.158233,4474.011658
2022-04-18,4385.629883,4410.310059,4370.299805,4391.689941,3509340000,0,0,4416.669600,4519.772002,4454.706232,4455.551633,4472.381525
2022-04-19,4390.629883,4471.029785,4390.629883,4462.209961,3197930000,0,0,4415.903203,4517.487100,4455.420873,4455.812744,4472.180108
2022-04-20,4472.259766,4488.290039,4448.759766,4459.450195,3678040000,0,0,4415.414805,4515.067002,4455.804618,4455.955389,4471.928030


## Plotting Moving Averages

In [71]:
fig = go.Figure()

candle = go.Candlestick(x=gspc_df.index, open=gspc_df['Open'],
    high=gspc_df['High'], low=gspc_df['Low'],
    close=gspc_df['Close'], name="Candlestick")

ema50_line = go.Scatter(x=gspc_df.index, y=gspc_df['EMA50'], 
    line=dict(color='rgba(250, 0, 0, 0.75)', 
    width=1), name="EMA50")

ema100_line = go.Scatter(x=gspc_df.index, y=gspc_df['EMA100'], 
    line=dict(color='rgba(0, 250, 0, 0.75)', 
    width=1), name="EMA100")

fig.add_trace(candle)
# 1st support line
fig.add_trace(ema50_line)
# 2nd support line
fig.add_trace(ema100_line)

fig.show()

# What we see with the support line is that if we hit it the market bounces back. When candles are green that means we have more buyers than sellers and vice versa.
# Support levels are good indicators and the market normally only breaks support when an event occurs that is external to the market.

## Analyzing Bitcoin

In [42]:
btc_df = yf.download(tickers='BTC-USD', period='3d', interval='15m')
btc_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-19 00:00:00+00:00,40828.175781,40906.359375,40825.128906,40904.992188,40904.992188,131694592
2022-04-19 00:15:00+00:00,41022.519531,41022.519531,40911.394531,40934.261719,40934.261719,208801792
2022-04-19 00:30:00+00:00,40936.355469,41146.886719,40936.355469,41036.796875,41036.796875,401954816
2022-04-19 00:45:00+00:00,41036.046875,41036.046875,40933.039062,40939.136719,40939.136719,50886656
2022-04-19 01:00:00+00:00,40931.214844,40931.214844,40894.179688,40906.390625,40906.390625,11370496
...,...,...,...,...,...,...
2022-04-21 22:30:00+00:00,40688.503906,40688.503906,40063.828125,40288.910156,40288.910156,994777088
2022-04-21 22:45:00+00:00,40310.593750,40443.187500,40310.593750,40400.535156,40400.535156,616816640
2022-04-21 23:00:00+00:00,40397.136719,40463.800781,40397.136719,40463.800781,40463.800781,281980928
2022-04-21 23:15:00+00:00,40459.761719,40459.761719,40370.968750,40416.406250,40416.406250,205160448


In [43]:
# We'll create a short term moving average using 5 periods
# and another using 20 periods
# When the longterm MA goes above the short term that is a sell sign
# and vice versa
btc_df['MA5'] = btc_df['Close'].rolling(5).mean()
btc_df['MA20'] = btc_df['Close'].rolling(20).mean()

## Plot

In [44]:
fig = go.Figure()

candle = go.Candlestick(x=btc_df.index, open=btc_df['Open'],
    high=btc_df['High'], low=btc_df['Low'],
    close=btc_df['Close'], name="Candlestick")

ma5_line = go.Scatter(x=btc_df.index, y=btc_df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

ma20_line = go.Scatter(x=btc_df.index, y=btc_df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

fig.add_trace(candle)
fig.add_trace(ma5_line)
fig.add_trace(ma20_line)

fig.show()

# When the MA20 (Blue) crosses above the MA5 (Orange) that is a Death Cross (Sell)
# When the MA5 (Orange) crosses above the MA20 (Blue)that is a Golden Cross (Buy)

## Longterm Bitcoin

In [75]:
# btcl_df = yf.download(tickers='BTC-USD', period='3mo', interval='1d')
# btcl_df

In [76]:
# btcl_df['MA5'] = btcl_df['Close'].rolling(5).mean()
# btcl_df['MA20'] = btcl_df['Close'].rolling(20).mean()

In [77]:
# fig = go.Figure()

# candle = go.Candlestick(x=btcl_df.index, open=btcl_df['Open'],
#     high=btcl_df['High'], low=btcl_df['Low'],
#     close=btcl_df['Close'], name="Candlestick")

# ma5_line = go.Scatter(x=btcl_df.index, y=btcl_df['MA5'], 
#     line=dict(color='rgba(255,165,0, 0.75)', 
#     width=1), name="MA5")

# ma20_line = go.Scatter(x=btcl_df.index, y=btcl_df['MA20'], 
#     line=dict(color='rgba(0, 0, 250, 0.75)', 
#     width=1), name="MA20")

# fig.add_trace(candle)
# fig.add_trace(ma5_line)
# fig.add_trace(ma20_line)

# fig.show()

# We see here that we are in a down trend long term

## Short Term Plot Function

In [45]:
def get_short_ma(ticker):
    df = yf.download(tickers=ticker, period='3d', interval='15m')
    df['MA5'] = df['Close'].rolling(5).mean()
    df['MA20'] = df['Close'].rolling(20).mean()
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
        high=df['High'], low=df['Low'],
        close=df['Close'], name="Candlestick")

    ma5_line = go.Scatter(x=df.index, y=df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

    ma20_line = go.Scatter(x=df.index, y=df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

    fig.add_trace(candle)
    fig.add_trace(ma5_line)
    fig.add_trace(ma20_line)

    fig.show()

## Long Term Plot Function

In [46]:
def get_long_ma(ticker):
    df = yf.download(tickers=ticker, period='3mo', interval='1d')
    df['MA5'] = df['Close'].rolling(5).mean()
    df['MA20'] = df['Close'].rolling(20).mean()
    fig = go.Figure()

    candle = go.Candlestick(x=df.index, open=df['Open'],
        high=df['High'], low=df['Low'],
        close=df['Close'], name="Candlestick")

    ma5_line = go.Scatter(x=df.index, y=df['MA5'], 
    line=dict(color='rgba(255,165,0, 0.75)', 
    width=1), name="MA5")

    ma20_line = go.Scatter(x=df.index, y=df['MA20'], 
    line=dict(color='rgba(0, 0, 250, 0.75)', 
    width=1), name="MA20")

    fig.add_trace(candle)
    fig.add_trace(ma5_line)
    fig.add_trace(ma20_line)

    fig.show()

## Long Term Ethereum Plot

In [47]:
get_long_ma('ETH-USD')

[*********************100%***********************]  1 of 1 completed


## Short Term Ethereum Plot

In [48]:
get_short_ma('ETH-USD')

[*********************100%***********************]  1 of 1 completed


## Long Term Litecoin

In [49]:
get_long_ma('LTC-USD')

[*********************100%***********************]  1 of 1 completed


## Long Term Cardano

In [50]:
get_long_ma('ADA-USD')

[*********************100%***********************]  1 of 1 completed


## Get Stock File Names in a List

In [51]:
files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers
# tickers.remove('.ds_Store')
tickers.sort()
len(tickers)

2908

## Add Daily & Cumulative Return to All Stock Data

In [52]:
for x in tickers:
    try:
        print("Working on :", x)
        new_df = get_stock_df_from_csv(x)
        new_df = add_daily_return_to_df(new_df, x)
        new_df = add_cum_return_to_df(new_df, x)
        new_df.to_csv(PATH + x + '.csv')
    except Exception as ex:
        print(ex)


Working on : A
File Doesn't Exist
'NoneType' object is not subscriptable
Working on : AA
'Close'
Working on : AAL
'Close'
Working on : AAME
'Close'
Working on : AAOI
'Close'
Working on : AAON
'Close'
Working on : AAP
'Close'
Working on : AAPL
Working on : AAT
'Close'
Working on : AAWW
'Close'
Working on : ABBV
'Close'
Working on : ABC
'Close'
Working on : ABCB
'Close'
Working on : ABEO
'Close'
Working on : ABG
'Close'
Working on : ABIO
'Close'
Working on : ABM
'Close'
Working on : ABMD
'Close'
Working on : ABR
'Close'
Working on : ABT
'Close'
Working on : ABTX
'Close'
Working on : AC
'Close'
Working on : ACAD
'Close'
Working on : ACBI
'Close'
Working on : ACC
'Close'
Working on : ACCO
'Close'
Working on : ACER
'Close'
Working on : ACGL
'Close'
Working on : ACHC
'Close'
Working on : ACHV
'Close'
Working on : ACIW
'Close'
Working on : ACLS
'Close'
Working on : ACM
'Close'
Working on : ACMR
'Close'
Working on : ACN
'Close'
Working on : ACNB
'Close'
Working on : ACOR
'Close'
Working on : A

## 11 Stock Market Sectors

We want to invest in stocks that are not correlated with other stocks. This means we want stocks that tend to go down when the others go up. Because all our stocks are expected to do well over the course of the year, this smoothes out the performance of our portfolio.

One way to easily find stocks that are not correlated is to create portfolios using stocks from different sectors of the market.

**Technology** : Manufacturing of electronics, software, or related to information technology 

**Health Care** : Biotech, hospitals, medical devices, drugs 

**Financials** : Banks, investment funds, and insurance

**Real Estate** : Residential, industrial, and retail real estate 

**Energy** : Production and supply of energy 

**Materials** : Mining, refining, chemical, and forestry 

**Consumer Discretionary** : Retailers, apparel, media, durables and services 

**Industrials** : Construction, machinery, fabrication, manufacturing, defense, and aerospace 

**Utilities** : Direct providers of electric, gas, and water 

**Consumer Staples** : Food, beverage as well as products consumers deem essential 

**Telecommunication** : Cable, internet providers, wireless, and satellite

## Get Sector Stocks

In [54]:
sec_df = pd.read_csv("/Users/oppoudel/dev/Python4Finance/Original/stock_sectors.csv")
sec_df

indus_df = sec_df.loc[sec_df['Sector'] == "Industrials"]
health_df = sec_df.loc[sec_df['Sector'] == "Health Care"]
it_df = sec_df.loc[sec_df['Sector'] == "Information Technology"]
comm_df = sec_df.loc[sec_df['Sector'] == "Communication Services"]
staple_df = sec_df.loc[sec_df['Sector'] == "Consumer Staples"]
discretion_df = sec_df.loc[sec_df['Sector'] == "Consumer Discretionary"]
utility_df = sec_df.loc[sec_df['Sector'] == "Utilities"]
financial_df = sec_df.loc[sec_df['Sector'] == "Financials"]
material_df = sec_df.loc[sec_df['Sector'] == "Materials"]
restate_df = sec_df.loc[sec_df['Sector'] == "Real Estate"]
energy_df = sec_df.loc[sec_df['Sector'] == "Energy"]


## Returns a DF with Cumulative Return for all Stocks

In [57]:
def get_cum_ret_for_stocks(stock_df):
    tickers = []
    cum_rets = []

    for index, row in stock_df.iterrows():
        df = get_stock_df_from_csv(row['Symbol'])
        if df is None:
            pass
        else:
            tickers.append(row['Symbol'])
            cum = df['cum_return'].iloc[-1]
            cum_rets.append(cum)
    return pd.DataFrame({'Ticker':tickers, 'CUM_RET':cum_rets})

In [58]:
industrial = get_cum_ret_for_stocks(indus_df)
health_care = get_cum_ret_for_stocks(health_df)
it = get_cum_ret_for_stocks(it_df)
commun = get_cum_ret_for_stocks(comm_df)
staple = get_cum_ret_for_stocks(staple_df)
discretion = get_cum_ret_for_stocks(discretion_df)
utility = get_cum_ret_for_stocks(utility_df)
finance = get_cum_ret_for_stocks(financial_df)
material = get_cum_ret_for_stocks(material_df)
restate = get_cum_ret_for_stocks(restate_df)
energy = get_cum_ret_for_stocks(energy_df)

KeyError: 'cum_return'

## Ichimoku Kinko Hyo

The Ichimoku (One Look) is considered an all in one indicator. It provides information on momentum, support and resistance. It is made up of 5 lines. If you are a short term trader you create 1 minute or 6 hour. Long term traders focus on day or weekly data.

 - Conversion Line (Tenkan-sen) : Represents support, resistance and reversals. Used to measure short term trends.
 - Baseline (Kijun-sen) : Represents support, resistance and confirms trend changes. Allows you to evaluate the strength of medium term trends. Called the baseline because it lags the price.
 - Leading Span A (Senkou A) : Used to identify future areas of support and resistance
 - Leading Span B (Senkou B) : Other line used to identify suture support and resistance
 - Lagging Span (Chikou) : Shows possible support and resistance. It is used to confirm signals obtained from other lines.
 - Cloud (Kumo) : Space between Span A and B. Represents the divergence in price evolution.
 
Formulas

 - Lagging Span = Price shifted back 26 periods
 - Base Line = (Highest Value in period + Lowest value in period)/2 (26 Sessions)
 - Conversion Line = (Highest Value in period + Lowest value in period)/2 (9 Sessions)
 - Leading Span A = (Conversion Value + Base Value)/2 (26 Sessions)
 - Leading Span B = (Conversion Value + Base Value)/2 (52 Sessions)

## Get Ichimoku Function

In [59]:
def get_fill_color(label):
    if label >= 1:
        return 'rgba(0,250,0,0.4)'
    else:
        return 'rgba(250,0,0,0.4)'

In [62]:
def get_Ichimoku(ticker):
    df = yf.download(tickers=ticker, period='1y', interval='1d')

    # Conversion
    hi_val = df['High'].rolling(window=9).max()
    low_val = df['Low'].rolling(window=9).min()
    df['Conversion'] = (hi_val + low_val) / 2

    # Baseline
    hi_val2 = df['High'].rolling(window=26).max()
    low_val2 = df['Low'].rolling(window=26).min()
    df['Baseline'] = (hi_val2 + low_val2) / 2

    # Spans
    df['SpanA'] = ((df['Conversion'] + df['Baseline']) / 2).shift(26)
    hi_val3 = df['High'].rolling(window=52).max()
    low_val3 = df['Low'].rolling(window=52).min()
    df['SpanB'] = ((hi_val3 + low_val3) / 2).shift(26)
    df['Lagging'] = df['Close'].shift(-26)

    candle = go.Candlestick(x=df.index, open=df['Open'],
    high=df['High'], low=df["Low"], close=df['Close'], name="Candlestick")

    df1 = df.copy()
    fig = go.Figure()
    df['label'] = np.where(df['SpanA'] > df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()

    df = df.groupby('group')

    dfs = []
    for name, data in df:
        dfs.append(data)

    for df in dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df.SpanA,
        line=dict(color='rgba(0,0,0,0)')))

        fig.add_traces(go.Scatter(x=df.index, y=df.SpanB,
        line=dict(color='rgba(0,0,0,0)'),
        fill='tonexty',
        fillcolor=get_fill_color(df['label'].iloc[0])))

    baseline = go.Scatter(x=df1.index, y=df1['Baseline'], 
    line=dict(color='pink', width=2), name="Baseline")

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'], 
    line=dict(color='black', width=1), name="Conversion")

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'], 
    line=dict(color='purple', width=2), name="Lagging")

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'], 
    line=dict(color='green', width=2, dash='dot'), name="Span A")

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'], 
    line=dict(color='red', width=1, dash='dot'), name="Span B")

    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    
    fig.update_layout(height=800, width=1300, showlegend=True)

    fig.show()


    

## What the Lines Mean

 - Lagging Span : When above the price it is bullish and when below bearish. It is used with other indicators because it is mainly a filter.
 - Baseline : When below price this is considered support. When above price this is considered resistance. We are in an uptrend when the slope increases and vice versa. The slope of the curve tells us the strength of the trend.
 - Conversion : We focus on its position versus the Baseline. When the Conversion crosses above the Baseline we are in an upward trend and vice versa. This is considered a strong indicator when above the Cloud and weak when below.
 - Cloud : The thicker the Cloud, the stronger the trend and vice versa. When the Spans cross many times we are in a range. When they cross this is a sign of a reversal of trend.

## Top Industrials

In [99]:
industrial.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
24,GNRC,8.172729
26,GE,6.217026
42,ODFL,6.137047
10,CPRT,5.350906
16,ETN,4.727456
9,CTAS,4.009012
13,DE,3.816768
30,INFO,3.775151
35,KSU,3.709996
55,TDY,3.444655


## Analyze Best Performers with Ichimoku

In [65]:
get_Ichimoku('GE')

[*********************100%***********************]  1 of 1 completed


## Top Healthcare

In [103]:
health_care.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
25,DXCM,8.678003
4,ALGN,6.780879
59,WST,5.22383
33,IDXX,5.053621
16,CTLT,4.888889
19,CRL,4.766054
61,ZTS,4.687021
11,TECH,4.586297
54,TMO,4.488118
49,RMD,4.426924


## Top Information Tech

In [104]:
it.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
21,ENPH,172.016003
2,AMD,12.785159
26,FTNT,11.098766
50,PAYC,9.118401
57,NOW,8.307653
70,ZBRA,7.422101
39,LRCX,7.366411
14,CDNS,7.123404
44,MPWR,6.569404
7,AAPL,6.523364


## Top Communications

In [105]:
commun.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
15,NFLX,4.863241
14,LYV,4.160283
2,GOOG,3.723259
1,GOOGL,3.663857
20,TTWO,3.310014
10,FB,2.943717
22,TWTR,2.406762
4,CHTR,2.198079
19,TMUS,2.044718
16,NWSA,1.950593


## Top Staples

In [106]:
staple.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
10,EL,4.850015
9,COST,3.904326
3,CHD,2.302912
30,WMT,2.1891
25,PG,2.105584
27,HSY,2.069291
22,MNST,2.022815
23,PEP,1.868988
18,LW,1.661314
4,CLX,1.647645


## Top Discretionary

In [107]:
discretion.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
17,ETSY,18.495894
43,POOL,5.58769
7,CZR,5.56125
52,TSLA,5.285195
10,CMG,4.500627
1,AMZN,4.418258
11,DHI,3.989342
32,LOW,3.836499
51,TGT,3.567896
40,NVR,3.563927


## Top Utilities

In [108]:
utility.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
20,NRG,3.344245
4,AWK,2.610672
0,AES,2.447139
2,AEE,1.951904
27,XEL,1.902183
26,WEC,1.879371
15,ES,1.864813
16,EXC,1.853503
1,LNT,1.839575
13,ETR,1.829929


## Top Finance

In [109]:
finance.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
39,MSCI,8.216773
49,SPGI,4.3774
37,MCO,4.265219
51,SIVB,4.240737
6,AJG,3.696233
40,NDAQ,3.277685
45,PGR,3.157559
53,TROW,2.999332
4,AMP,2.910101
38,MS,2.774116


## Top Materials

In [110]:
material.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
2,AVY,3.200383
1,ALB,3.093036
14,LIN,2.97387
11,FCX,2.594122
3,BLL,2.557432
5,CF,2.458485
10,FMC,2.295981
0,APD,2.235855
4,CE,2.148049
16,MLM,2.048859


## Top Real Estate

In [111]:
restate.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
21,SBAC,3.659762
17,PLD,3.442297
11,EXR,3.3313
4,CBRE,3.205564
1,AMT,2.894263
7,DRE,2.720885
16,MAA,2.698131
5,CCI,2.652901
8,EQIX,2.448353
0,ARE,2.220472


## Top Energy

In [112]:
energy.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
2,COP,1.632314
11,MPC,1.565467
14,OKE,1.515579
8,HES,1.382084
18,VLO,1.336799
1,CVX,1.26232
19,WMB,1.19886
4,FANG,1.131944
16,PXD,1.049206
3,DVN,1.035698
