In [1]:
# import libraries

import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join
from IPython.display import clear_output

import datetime as dt
import time

import yfinance as yf

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
# make plotly work in jupyter notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
from plotly.subplots import make_subplots

import warnings
warnings.simplefilter('ignore')

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Functions

In [2]:
# Get Dataframe from CSV
def get_stock_df_from_csv(ticker):
    '''
    Reads a dataframe from the CSV file, changes index to date and returns it.
    Try to get the file and if it doesn't exist issue a warning
    '''
    try:
        df = pd.read_csv(path + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        clear_output(wait=True)
        print("File Doesn't Exist")
    else:
        return df

In [3]:
# Functions for plotting the Ichimoku

def get_fill_color(label):
    '''
    The Ichimoku cloud is green when the stock is showing upward
    buying potential, and red when the stock it is time to sell.
    Uses Span A and Span B calculations. 
    '''
    
    if label >= 1:
        return 'rgba(0,250,0,0.4)' # cloud is green
    else:
        return 'rgba(250,0,0,0.4)' # cloud is red
    
    
def get_Ichimoku(df):
    candle = go.Candlestick(x=df.index, 
                            open=df['Open'], 
                            high=df['High'], 
                            low=df['Low'], 
                            close=df['Close'], 
                           name='Candlestick')
    
    # the below code will change create the Ichimoku cloud and change its colors
    df1 = df.copy()
    fig = go.Figure()
    df['label'] = np.where(df['SpanA']>df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()
    df = df.groupby('group')
    
    list_dfs = []
    for name, data in df:
        list_dfs.append(data)
        
    for df in list_dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df['SpanA'], 
                                  # the line is 'empty' bc we don't need a line
                                  # we're actually filling in the area below the line
                                  line=dict(color='rgba(0,0,0,0)')))
        fig.add_traces(go.Scatter(x=df.index, y=df['SpanB'],
                                 # again, the line for Span B is blank
                                 line=dict(color='rgba(0,0,0,0)'),
                                 # now fill the area between the 2 lines
                                 fill='tonexty',
                                 # fill based on get_fill_color function (green or red)
                                 fillcolor=get_fill_color(df['label'].iloc[0])))
        
    baseline = go.Scatter(x=df1.index, y=df1['Baseline'],
                         line=dict(color='pink', width=3), name='Baseline')

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'],
                           line=dict(color='black', width=1), name='Conversion')

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'],
                           line=dict(color='purple', width=2), name='Lagging')

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'],
                           line=dict(color='green', width=2, dash='dot'), name='Span A')

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'],
                           line=dict(color='red', width=1, dash='dot'), name='Span B')

    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    fig.update_layout(height=1200, width=1200, showlegend=True
                     # might need to change the height and width
                     )
    fig.show()

# Looking at top performing stocks by Industry

In [4]:
sec_df = pd.read_csv('big_stock_sectors.csv')
sec_df.head()

Unnamed: 0,Ticker,Name,Description,Mrkt Cap,Sector
0,A,Agilent Technologies,Life Sciences Tools & Services,48.21B,Healthcare
1,AA,Alcoa,Metals & Mining,11.15B,Materials
2,AAC,Ares Acquisition,Blank Check / SPAC,1.22B,SPAC
3,AACG,ATA Creativity Global,Diversified Consumer Services,33.59M,Discretionary
4,AACI,Armada Acquisition I,Blank Check / SPAC,202.75M,SPAC


In [5]:
# create a new df for each industry/sector
indus_df = sec_df.loc[sec_df['Sector'] == 'Industrial']
health_df = sec_df.loc[sec_df['Sector'] == 'Healthcare']
it_df = sec_df.loc[sec_df['Sector'] == 'Information Technology']
comm_df = sec_df.loc[sec_df['Sector'] == 'Communication']
staple_df = sec_df.loc[sec_df['Sector'] == 'Staples']
discretion_df = sec_df.loc[sec_df['Sector'] == 'Discretionary']
utility_df = sec_df.loc[sec_df['Sector'] == 'Utilities']
financial_df = sec_df.loc[sec_df['Sector'] == 'Financials']
material_df = sec_df.loc[sec_df['Sector'] == 'Materials']
restate_df = sec_df.loc[sec_df['Sector'] == 'Real Estate']
energy_df = sec_df.loc[sec_df['Sector'] == 'Energy']

energy_df.head()

Unnamed: 0,Ticker,Name,Description,Mrkt Cap,Sector
121,AE,Adams Resources & Energy,"Oil, Gas & Consumable Fuels",121.11M,Energy
140,AES,The AES Corporation,Independent Power and Renewable Elect...,16.20B,Energy
273,ALTM,Altus Midstream Company,"Oil, Gas & Consumable Fuels",229.70M,Energy
274,ALTO,Alto Ingredients,"Oil, Gas & Consumable Fuels",350.17M,Energy
283,AM,Antero Midstream,"Oil, Gas & Consumable Fuels",4.62B,Energy


In [6]:
print(len(energy_df))

308


## Find the Cumulative Return for all Stocks

In [7]:
def get_cum_return_for_stocks(sector_df):
    tickers = []
    cum_returns = []
    for index, row in sector_df.iterrows():
        df  = get_stock_df_from_csv(row['Ticker'])
        if df is None or len(df) < 1:
            pass
        else:
            tickers.append(row['Ticker'])
            cum = df['cum_return'].iloc[-1]
            cum_returns.append(cum)
    return pd.DataFrame({'Ticker': tickers, 
                         'Cum_return': cum_returns})

In [8]:
path = "C://Users//sophi//Documents//Portfolio_Projects//DS_for_Investing//Wilshire_Stocks//"

industrial = get_cum_return_for_stocks(indus_df)
health_care = get_cum_return_for_stocks(health_df)
it = get_cum_return_for_stocks(it_df)
commun = get_cum_return_for_stocks(comm_df)
staple = get_cum_return_for_stocks(staple_df)
discretion = get_cum_return_for_stocks(discretion_df)
utility = get_cum_return_for_stocks(utility_df)
finance = get_cum_return_for_stocks(financial_df)
material = get_cum_return_for_stocks(material_df)
restate = get_cum_return_for_stocks(restate_df)
energy = get_cum_return_for_stocks(energy_df)
energy

File Doesn't Exist


Unnamed: 0,Ticker,Cum_return
0,AE,0.865774
1,AES,2.306908
2,AM,0.706918
3,AMTX,3.687023
4,APA,0.791245
...,...,...
134,WMB,1.372500
135,WTI,2.133663
136,WTTR,0.566364
137,WWR,0.014694


In [9]:
# Top Industrial Stocks

industrial.sort_values(by=['Cum_return'], ascending=False).head(10)

Unnamed: 0,Ticker,Cum_return
250,PLUG,8.380488
27,AMRC,6.075496
146,GNRC,5.94452
71,CALX,5.066666
339,WMS,4.909012
81,CLFD,4.716854
99,CWST,4.479854
241,PAR,4.427907
226,NSSC,4.376344
40,ARNC,4.092485


In [10]:
# Top Industrial Stocks

health_care.sort_values(by=['Cum_return'], ascending=False).head(10)

Unnamed: 0,Ticker,Cum_return
63,ARWR,21.793939
117,CDNA,20.372727
319,MRTX,19.237838
514,ZYXI,15.113048
380,PRPH,11.692072
356,OPRX,9.006473
84,AXSM,7.667924
201,FATE,7.641337
465,TNDM,7.621455
483,VCEL,7.594117


In [11]:
df_plug = get_stock_df_from_csv('PLUG')
get_Ichimoku(df_plug)

In [12]:
# top healthcare stocks

health_care.sort_values(by=['Cum_return'], ascending=False).head(10)

Unnamed: 0,Ticker,Cum_return
63,ARWR,21.793939
117,CDNA,20.372727
319,MRTX,19.237838
514,ZYXI,15.113048
380,PRPH,11.692072
356,OPRX,9.006473
84,AXSM,7.667924
201,FATE,7.641337
465,TNDM,7.621455
483,VCEL,7.594117


In [13]:
df_arwr = get_stock_df_from_csv('ARWR')
get_Ichimoku(df_arwr)