project inspiration from https://www.youtube.com/watch?v=4jaBKXDqg9U 'How to Invest with Data Science' by Derek Banas

In [1]:
# import libraries

import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join

import datetime as dt
import time

import yfinance as yf

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
# make plotly work in jupyter notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
from plotly.subplots import make_subplots

import warnings
warnings.simplefilter('ignore')

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
PATH = "C://Users//sophi//Documents//Portfolio Projects//DS_for_Investing//Wilshire_Stocks//"

# Start end date defaults
S_DATE = "2017-02-01"
E_DATE = "2022-12-06"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

In [3]:
# Get Dataframe from CSV
def get_stock_df_from_csv(ticker):
    '''
    Reads a dataframe from the CSV file, changes index to date and returns it.
    Try to get the file and if it doesn't exist issue a warning
    '''
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

## Function for plotting Bollinger Bands Using Data Already Calculated and Saved in the CSVs

In [4]:
def plot_bollinger_bands(df, ticker):
    
    # using plotly to plot candlesticks
    fig = go.Figure()
    
    candle = go.Candlestick(x=df.index, 
                            open=df['Open'], 
                            high=df['High'], 
                            low=df['Low'], 
                            close=df['Close'], 
                           name='Candlestick')
    
    upper_line = go.Scatter(x=df.index, y=df['upper_band'],
                           line=dict(color='rgba(250, 0, 0, 0.75)',
                            width=1), name='Upper Band')
    mid_line = go.Scatter(x=df.index, y=df['middle_band'],
                           line=dict(color='rgba(0, 0, 250, 0.75)',
                            width=0.7), name='Middle Band')
    lower_line = go.Scatter(x=df.index, y=df['lower_band'],
                           line=dict(color='rgba(0, 250, 0, 0.75)',
                            width=1), name='Lower Band')
    
    fig.add_trace(candle)
    fig.add_trace(upper_line)
    fig.add_trace(mid_line)
    fig.add_trace(lower_line)
    
    fig.update_xaxes(title="Date", rangeslider_visible=True)
    fig.update_yaxes(title="Price")
    
    fig.update_layout(title=ticker + "Bollinger Bands", 
                      # might need to change the height and width
                     height=1200, width=1200, showlegend=True)
    fig.show()

## Function for plotting the Ichimoku

In [5]:
def get_fill_color(label):
    '''
    The Ichimoku cloud is green when the stock is showing upward
    buying potential, and red when the stock it is time to sell.
    Uses Span A and Span B calculations. 
    '''
    
    if label >= 1:
        return 'rgba(0,250,0,0.4)' # cloud is green
    else:
        return 'rgba(250,0,0,0.4)' # cloud is red
    
    
def get_Ichimoku(df):
    candle = go.Candlestick(x=df.index, 
                            open=df['Open'], 
                            high=df['High'], 
                            low=df['Low'], 
                            close=df['Close'], 
                           name='Candlestick')
    
    # the below code will change create the Ichimoku cloud and change its colors
    df1 = df.copy()
    fig = go.Figure()
    df['label'] = np.where(df['SpanA']>df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()
    df = df.groupby('group')
    
    list_dfs = []
    for name, data in df:
        list_dfs.append(data)
        
    for df in list_dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df['SpanA'], 
                                  # the line is 'empty' bc we don't need a line
                                  # we're actually filling in the area below the line
                                  line=dict(color='rgba(0,0,0,0)')))
        fig.add_traces(go.Scatter(x=df.index, y=df['SpanB'],
                                 # again, the line for Span B is blank
                                 line=dict(color='rgba(0,0,0,0)'),
                                 # now fill the area between the 2 lines
                                 fill='tonexty',
                                 # fill based on get_fill_color function (green or red)
                                 fillcolor=get_fill_color(df['label'].iloc[0])))
        
    baseline = go.Scatter(x=df1.index, y=df1['Baseline'],
                         line=dict(color='pink', width=3), name='Baseline')

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'],
                           line=dict(color='black', width=1), name='Conversion')

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'],
                           line=dict(color='purple', width=2), name='Lagging')

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'],
                           line=dict(color='green', width=2, dash='dot'), name='Span A')

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'],
                           line=dict(color='red', width=1, dash='dot'), name='Span B')

    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    fig.update_layout(height=1200, width=1200, showlegend=True
                     # might need to change the height and width
                     )
    fig.show()

        

# Plots

In [6]:
test_df = get_stock_df_from_csv("AMZN")
plot_bollinger_bands(test_df, "AMZN")

In [7]:
get_Ichimoku(test_df)

Notes on the Ichimoku

 - Lagging Span: When above the price it is bullish and when below bearish. It is used with other indicators because it is mainly a filter. 
 
 - Baseline: When below price this is considered support. When above price this is considered resistance. We are in an uptrend when the slope increases and vice versa. The slope of the curve tells us the strength of the trend.
 
 - Conversion: Position is more important vs the Baseline. When the Conversion crosses above the Baseline we are in an upward trend and vice versa. This is considered a strong indicator when above the Cloud and weak when below. 
  
 - Cloud: The thicker the Cloud, the stronger the trend and vice versa. When the Spans cross many times we are in a range. When they cross this is a sign of a reversal of trend. 

In [9]:
sec_df = pd.read_csv('big_stock_sectors.csv')
sec_df.head()

# create a new df for each industry/sector
indus_df = sec_df.loc[sec_df['Sector'] == 'Industrial']
health_df = sec_df.loc[sec_df['Sector'] == 'Healthcare']
it_df = sec_df.loc[sec_df['Sector'] == 'Information Technology']
comm_df = sec_df.loc[sec_df['Sector'] == 'Communication']
staple_df = sec_df.loc[sec_df['Sector'] == 'Staples']
discretion_df = sec_df.loc[sec_df['Sector'] == 'Discretionary']
utility_df = sec_df.loc[sec_df['Sector'] == 'Utilities']
financial_df = sec_df.loc[sec_df['Sector'] == 'Financials']
material_df = sec_df.loc[sec_df['Sector'] == 'Materials']
restate_df = sec_df.loc[sec_df['Sector'] == 'Real Estate']
energy_df = sec_df.loc[sec_df['Sector'] == 'Energy']

energy_df.head()

Unnamed: 0,Ticker,Name,Description,Mrkt Cap,Sector
121,AE,Adams Resources & Energy,"Oil, Gas & Consumable Fuels",121.11M,Energy
140,AES,The AES Corporation,Independent Power and Renewable Elect...,16.20B,Energy
273,ALTM,Altus Midstream Company,"Oil, Gas & Consumable Fuels",229.70M,Energy
274,ALTO,Alto Ingredients,"Oil, Gas & Consumable Fuels",350.17M,Energy
283,AM,Antero Midstream,"Oil, Gas & Consumable Fuels",4.62B,Energy


## Find the Cumulative Return for all Stocks

In [10]:
def get_cum_return_for_stocks(sector_df):
    tickers = []
    cum_returns = []
    for index, row in sector_df.iterrows():
        df  = get_stock_df_from_csv(row['Ticker'])
        if df is None:
            pass
        else:
            tickers.append(row['Ticker'])
            cum = df['cum_return'].iloc[-1]
            cum_returns.append(cum)
    return pd.DataFrame({'Ticker': ticker, 
                         'Cum_return': cum_returns})

In [11]:
health_care = get_cum_return_for_stocks(health_df)


File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't

IndexError: single positional indexer is out-of-bounds

In [None]:
health_care