<a href="https://colab.research.google.com/github/saifmdev/stock_scraper_ta/blob/main/stock_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Simple OHLC Chart with Moving Average Band -Pandas and Plotly

##Run this command if 'ta' library now installed

In [None]:
pip install ta

In [None]:
pip install plotly 

##Import Libraries

In [None]:
from ta.trend import ema_indicator as ema
from ta.volume import volume_weighted_average_price as vwap
from ta.momentum import rsi
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from datetime import date, datetime
import time


In [None]:
#Sample list of tickers
tickers = ['AA','MSFT', 'FB', 'TSLA', 'GOOG', 'AMZN']

#create from and to timestamps in epoch
current  = date.today().strftime('%s')
past = int(datetime(2019,12,1,0,0).timestamp())

#loop through tickers and query yahoo finance website to get .csv 
for ticker in tickers:
  link = f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={past}&period2={current}&interval=1d&events=history&includeAdjustedClose=true"
  df = pd.read_csv(link)
  df['EMA13'] = ema(df['Close'],13)
  df['EMA21'] = ema(df['Close'],21)
  df['EMA34'] = ema(df['Close'],34)
  df['EMA55'] = ema(df['Close'],55)
  df['EMA89'] = ema(df['Close'],89)
  df['EMA144'] = ema(df['Close'],144)
  df['VWAP'] = vwap(df['High'], df['Low'],df['Close'], df['Volume'],5)
  fig = px.line(df, x="Date", y="EMA13", title=ticker, labels={ticker})
  fig.add_scatter(x=df['Date'], y=df['EMA21'], mode='lines', name="EMA21")
  fig.add_scatter(x=df['Date'], y=df['EMA34'], mode='lines', name="EMA34")
  fig.add_scatter(x=df['Date'], y=df['EMA55'], mode='lines', name="EMA55")
  fig.add_scatter(x=df['Date'], y=df['EMA89'], mode='lines', name="EMA89")
  fig.add_scatter(x=df['Date'], y=df['EMA144'], mode='lines', name="EMA144")
  fig.add_scatter(x=df['Date'], y=df['VWAP'], mode='lines', name="VWAP")
  fig.add_ohlc(open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], x=df["Date"],name="OHLC")
  fig.update_layout(
    yaxis_title='Price in $USD'
  )

  fig.show()

#YAHOO! Finance Web Scraper + INDICATOR CALCULATIONS

##Run this command if 'ta' library not installed

In [1]:
pip install ta

Collecting ta
  Downloading ta-0.9.0.tar.gz (25 kB)
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.9.0-py3-none-any.whl size=28908 sha256=f14ca527a96b76a0fd5223aac40710eb4e7b46f032847b1da4ec438de3454ec3
  Stored in directory: /root/.cache/pip/wheels/72/78/64/cc1c01506a1010a9845e9bd7c69333730f7174661228ea4f98
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.9.0


##Run this command if 'plotly' library not installed

In [2]:
pip install plotly



##Import Libraries

In [3]:
from datetime import date, datetime
from ta.trend import ema_indicator as ema, sma_indicator as sma, wma_indicator as wma, macd,macd_diff,macd_signal
from ta.volume import volume_weighted_average_price as vwap, on_balance_volume as obv
from ta.momentum import rsi
from ta.volatility import average_true_range as atr
from urllib.error import HTTPError
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
from scipy.stats import linregress
import os
import shutil
import time

In [4]:
#Hull Moving Average Calculation using Weighted Moving Average (Fibonacci Band - 13,34,55,89,144)

def hma(s, period):
  return wma(wma(s, period//2).multiply(2).sub(wma(s, period)), int(np.sqrt(period)))

In [None]:
#Calculate Slope of a given line

def slope_calc(x,y):
  slope, intercept, r_value, p_value, std_err = linregress(x, y)
  return slope

In [None]:
#Set from and to date in epoch format (int)
current  = date.today().strftime('%s')
past = int(datetime(2019,1,1,0,0).timestamp())

#Get tickers from csv and convert to numpy array
ticker_list = 'https://drive.google.com/uc?export=download&id=1xzFbp9nmKQL0vz2F_BkVzdZercgLfEsq'
tickers_df = pd.read_csv(ticker_list)
tickers = tickers_df['Symbol'].to_numpy()

#Set OPENCLOSEDIFF_AVG Period
open_close_diff_period = 5

#remove existing directory and create new empty one for update
try:
    p = Path('/tickers')
    if p.exists():
      shutil.rmtree('./tickers')
    time.sleep(0.5)
    os.makedirs('tickers', exist_ok=True)  
except OSError as e:
    print("Error: %s - %s." % (e.filename, e.strerror))

#Loop through array of tickers to make queries to Yahoo Finance and save results as .csv
for ticker in tickers:
  try:
    link = f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={past}&period2={current}&interval=1d&events=history&includeAdjustedClose=true"
    time.sleep(0.2)
    data = pd.read_csv(link)
    #Calculate Difference between Close and Open and Moving Average for smoothing (Period: 5)
    data['OPENCLOSEDIFF'] = (data['Close'] - data['Open']).fillna(0)
    data['OPENCLOSEDIFF_EMA_5'] = ema(data['OPENCLOSEDIFF'],open_close_diff_period).fillna(0)

    #Calculate Exponential Moving Average Fibonacci Band and Calculate Average of all values combined - TREND IND.
    data['EMA13'] = ema(data['Close'],13).fillna(0)
    data['EMA21'] = ema(data['Close'],21).fillna(0)
    data['EMA34'] = ema(data['Close'],34).fillna(0)
    data['EMA55'] = ema(data['Close'],55).fillna(0)
    data['EMA89'] = ema(data['Close'],89).fillna(0)
    data['EMA144'] = ema(data['Close'],144).fillna(0)
    # data['EMA_AVG'] = (data['EMA13'] + data['EMA34'] + data['EMA55'] + data['EMA89'] + data['EMA144'])/5
    
    #Calculate Hull Moving Average Fibonacci Band and Calculate Average of all values combined - TREND IND.
    data['HMA13'] = hma(data['Close'],13).fillna(0)
    data['HMA21'] = hma(data['Close'],21).fillna(0)
    data['HMA34'] = hma(data['Close'],34).fillna(0)
    data['HMA55'] = hma(data['Close'],55).fillna(0)
    data['HMA89'] = hma(data['Close'],89).fillna(0)
    data['HMA144'] = hma(data['Close'],144).fillna(0)
    # data['HMA_AVG'] = (data['HMA13'] + data['HMA34'] + data['HMA55'] + data['HMA89'] + data['HMA144'])/5

    #Calculate MACD, MACD Signal Line and MACD Histogram - TREND IND.
    data['MACD'] = macd(data['Close'],26,12).fillna(0)
    data['MACD_Signal'] = macd_signal(data['Close'],26,12).fillna(0)
    data['MACD_HIST'] = macd_diff(data['Close'],26,12).fillna(0)

    #Calculate OBV
    data['OBV'] = obv(data['Close'], data['Volume']).fillna(0)
    data['OBV_EMA_13'] = ema(data['OBV'],13).fillna(0)
    data['OBV_EMA_34'] = ema(data['OBV'],34).fillna(0)
    data['OBV_EMA_55'] = ema(data['OBV'],55).fillna(0)

    #Calculate VWAP - VOLUME IND.
    data['VWAP'] = vwap(data['High'], data['Low'],data['Close'], data['Volume'],5).fillna(0)

    #Calculate RSI Fibonacci Band and Average of values combined - MOMENTUM IND.
    data['RSI8'] = rsi(data['Close'],8).fillna(0)
    data['RSI13'] = rsi(data['Close'],13).fillna(0)
    data['RSI21'] = rsi(data['Close'],21).fillna(0)
    # data['RSI_AVG'] = (data['RSI13'] + data['RSI13'] + data['RSI21']) / 3.fillna(0)

    #Write File to tikcers directory
    data.to_csv(f"./tickers/{ticker}.csv")
    print(f"{ticker} Downloaded ✓")
  except HTTPError as err:
    print(f"{ticker} Not Found X")
print("Download Complete")

#Implementing Multiple Linear Regression on Data Collected to Predict Future Price Changes

##Import Libraries

In [None]:
from sklearn.model_selection import train_test_split

In [8]:
df1 = pd.read_csv('./tickers/AA.csv')
#Choose Features to evaluate in Model, this case only picked numerical features
numeric_features_test = ['Open', 'High', 'Low', 'Close', 'Volume', 'EMA13', 'EMA21', 'EMA34', 'EMA55', 'EMA89', 'HMA13', 'HMA21', 'HMA34', 'HMA55', 'HMA89', 'HMA144', 'MACD', 'MACD_SIGNAL', 'MACD_HIST', 'OBV', 'OBV_EMA_13', 'OBV_EMA_34', 'OBV_EMA_55','VWAP', 'RSI8', 'RSI13', 'RSI21']

x_train, x_test, y_train, y_test = train_test_split()

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,OPENCLOSEDIFF,OPENCLOSEDIFF_EMA_5,EMA13,EMA21,EMA34,EMA55,EMA89,EMA144,EMA_AVG,HMA13,HMA34,HMA55,HMA89,HMA144,HMA_AVG,MACD,MACD_Signal,MACD_Histogram,OBV,OBV_EMA_13,OBV_EMA_34,OBV_EMA_55,VWAP,RSI8,RSI13,RSI21,RSI_AVG
0,0,2019-01-02,25.955000,26.847000,25.480000,26.240000,26.182819,3067000,0.285000,,,,,,,,,,,,,,,,,,3067000,,,,,,,,
1,1,2019-01-03,26.120001,26.850000,25.530001,26.240000,26.182819,2979900,0.119999,,,,,,,,,,,,,,,,,,6046900,,,,,,,,
2,2,2019-01-04,26.840000,28.610001,26.650000,28.340000,28.278244,5653300,1.500000,,,,,,,,,,,,,,,,,,11700200,,,,,,,,
3,3,2019-01-07,28.530001,28.910000,28.146000,28.420000,28.358068,2441000,-0.110001,,,,,,,,,,,,,,,,,,14141200,,,,,,,,
4,4,2019-01-08,28.730000,28.847000,27.650000,28.250000,28.188438,2218300,-0.480000,0.105926,,,,,,,,,,,,,,,,,11922900,,,,27.394924,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,783,2022-02-09,69.519997,72.370003,68.970001,71.910004,71.910004,10244700,2.390007,2.012240,63.362984,61.781690,59.905169,57.501478,54.447745,50.367431,57.116961,68.698207,61.839731,62.683582,63.379404,60.054493,63.331083,2.625120,1.564080,1.061040,64239700,3.988592e+07,2.605786e+07,1.036363e+07,66.914522,80.672769,73.842817,68.850674,72.178769
784,784,2022-02-10,70.940002,73.720001,70.120003,71.169998,71.169998,11437700,0.229996,1.418159,64.478272,62.635172,60.548873,57.989640,54.819351,50.654363,57.698100,71.482053,63.334106,63.375187,63.933088,60.479430,64.520773,3.031829,1.857630,1.174199,52802000,4.173107e+07,2.758610e+07,1.187929e+07,68.881267,76.806363,71.496529,67.390267,70.127775
785,785,2022-02-11,68.959999,74.500000,68.731003,73.519997,73.519997,9942000,4.559998,2.465439,65.769947,63.624702,61.290080,58.544295,55.234921,50.969751,58.361799,73.731869,65.028761,64.231984,64.573528,60.962919,65.705812,3.503390,2.186782,1.316608,62744000,4.473292e+07,2.959512e+07,1.369588e+07,70.191854,80.242948,74.305269,69.544317,72.718285
786,786,2022-02-14,74.540001,75.230003,71.919998,73.860001,73.860001,7808700,-0.680000,1.416959,66.925669,64.555184,62.008362,59.091285,55.648811,51.285479,58.991921,75.370848,66.810622,65.197012,65.278646,61.498750,66.831176,3.860045,2.521435,1.338610,70552700,4.842146e+07,3.193555e+07,1.572648e+07,71.224159,80.715420,74.696092,69.846901,73.079695




#Test Block

In [None]:
import plotly.express as px
import pandas as pd

In [None]:
from numpy.lib import type_check
df = pd.read_csv('./tickers/AA.csv')
print(slope_calc(df.iloc[:,0],df['EMA13']))
# df['EMA13_SLOPE'] = slope_calc(df.iloc[:,0],df['EMA13'])
print(df['EMA13_SLOPE'])
# df.rename_axis('index1').reset_index()
# fig1 = px.line(df, x="Date", y="HMA_AVG", title="HMA_AVG")
# fig1.add_ohlc(open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], x=df["Date"],name="OHLC")
# fig1.update_layout()

#fig1.show()

# fig2 = px.line(df, x="Date", y="OPENCLOSEDIFF_EMA_5", title="OPENCLOSEDIFF MOVING AVERAGE - 5 DAYS PERIOD")
# fig2.add_hline(y=0, line_width=3, line_dash="dash", line_color="green")

#fig2.show()

nan
0      0.037405
1      0.037405
2      0.037405
3      0.037405
4      0.037405
         ...   
782    0.037405
783    0.037405
784    0.037405
785    0.037405
786    0.037405
Name: EMA13_SLOPE, Length: 787, dtype: float64
