In [1]:
pip install yahoo-fin

Collecting yahoo-fin
  Downloading yahoo_fin-0.8.9.1-py3-none-any.whl (10 kB)
Collecting requests-html
  Downloading requests_html-0.10.0-py3-none-any.whl (13 kB)
Collecting feedparser
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting sgmllib3k
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pyquery
  Downloading pyquery-2.0.0-py3-none-any.whl (22 kB)
Collecting fake-useragent
  Downloading fake_useragent-1.3.0-py3-none-any.whl (15 kB)
Collecting parse
  Downloading parse-1.19.1-py2.py3-none-any.whl (18 kB)
Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting w3lib
  Downloading w3lib-2.1.2-py3-none-any.whl (21 kB)
Collecting pyppeteer>=0.0.14
  Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)
[2K     [90m━

In [2]:
from yahoo_fin.stock_info import get_data
import pandas as pd
import numpy as np

In [4]:
#Get historical prices from first candle to the most recent candle with date as a colum
hist = get_data('AAPL', index_as_date=False)

# Show the first 5 rows of our dataframe
hist.tail()

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker
10821,2023-11-14,187.699997,188.110001,186.300003,187.440002,187.440002,60108400,AAPL
10822,2023-11-15,187.850006,189.5,187.779999,188.009995,188.009995,53790500,AAPL
10823,2023-11-16,189.570007,190.960007,188.649994,189.710007,189.710007,54412900,AAPL
10824,2023-11-17,190.25,190.380005,188.570007,189.690002,189.690002,50922700,AAPL
10825,2023-11-20,189.889999,191.910004,189.880005,191.449997,191.449997,46505100,AAPL


In [6]:
hist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10826 entries, 0 to 10825
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      10826 non-null  datetime64[ns]
 1   open      10826 non-null  float64       
 2   high      10826 non-null  float64       
 3   low       10826 non-null  float64       
 4   close     10826 non-null  float64       
 5   adjclose  10826 non-null  float64       
 6   volume    10826 non-null  int64         
 7   ticker    10826 non-null  object        
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 676.8+ KB


In [8]:
# Remove teh ajusted close column and rename our dataframe as "prices"
prices = hist.drop(['adjclose'], axis=1)
prices.tail()

Unnamed: 0,date,open,high,low,close,volume,ticker
10821,2023-11-14,187.699997,188.110001,186.300003,187.440002,60108400,AAPL
10822,2023-11-15,187.850006,189.5,187.779999,188.009995,53790500,AAPL
10823,2023-11-16,189.570007,190.960007,188.649994,189.710007,54412900,AAPL
10824,2023-11-17,190.25,190.380005,188.570007,189.690002,50922700,AAPL
10825,2023-11-20,189.889999,191.910004,189.880005,191.449997,46505100,AAPL


In [17]:
# Difference of the closing price and opening price
prices['O-to-C'] = prices['close'] - prices['open']

In [18]:
# Add 20 day moving average for Open-to-Close column
prices['OC-20D-Mean'] = prices['O-to-C'].rolling(20).mean()

In [16]:
#Calculate the % change of the current day´s 0-to-C relative to the moving average
prices['OC-%-from-20D-Mean'] = 100*(prices['O-to-C'] - prices['OC-20D-Mean'])/prices['OC-20D-Mean']

In [19]:
# Get the maximum OC compared to the recent 10 candles (including the current candle)
prices['MaxOC_Prev10'] = prices['O-to-C'].rolling(10).max()

In [20]:
# Add 20-Day moving average for volume
prices['Volume-20D-Mean'] = prices['volume'].rolling(20).mean()

In [21]:
# Calculate the % change of the current volumn relative to the moving average
prices['Volume-%-from-20D-Mean'] = 100*(prices['volume']- prices['Volume-20D-Mean'])/ prices['Volume-20D-Mean']


In [22]:
prices.columns

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker', '0-to-C',
       'OC-20D-Mean', 'O-to-C', 'OC-%-from-20D-Mean', 'MaxOC_Prev10',
       'Volume-20D-Mean', 'Volume-%-from-20D-Mean'],
      dtype='object')

In [26]:
# Rearrange the columns for our dataframe
prices = prices [['ticker', 'date', 'open', 'high', 'low', 'close', 
                 'O-to-C', 'OC-20D-Mean', 'volume', 'Volume-20D-Mean', 
                 'MaxOC_Prev10', 'Volume-%-from-20D-Mean', 'OC-%-from-20D-Mean', 
                ]]

# Show the 10 most recent rows
prices.head(25)

Unnamed: 0,ticker,date,open,high,low,close,O-to-C,OC-20D-Mean,volume,Volume-20D-Mean,MaxOC_Prev10,Volume-%-from-20D-Mean,OC-%-from-20D-Mean
0,AAPL,1980-12-12,0.128348,0.128906,0.128348,0.128348,0.0,,469033600,,,,
1,AAPL,1980-12-15,0.12221,0.12221,0.121652,0.121652,-0.000558,,175884800,,,,
2,AAPL,1980-12-16,0.113281,0.113281,0.112723,0.112723,-0.000558,,105728000,,,,
3,AAPL,1980-12-17,0.115513,0.116071,0.115513,0.115513,0.0,,86441600,,,,
4,AAPL,1980-12-18,0.118862,0.11942,0.118862,0.118862,0.0,,73449600,,,,
5,AAPL,1980-12-19,0.126116,0.126674,0.126116,0.126116,0.0,,48630400,,,,
6,AAPL,1980-12-22,0.132254,0.132813,0.132254,0.132254,0.0,,37363200,,,,
7,AAPL,1980-12-23,0.137835,0.138393,0.137835,0.137835,0.0,,46950400,,,,
8,AAPL,1980-12-24,0.145089,0.145647,0.145089,0.145089,0.0,,48003200,,,,
9,AAPL,1980-12-26,0.158482,0.15904,0.158482,0.158482,0.0,,55574400,,0.0,,


In [24]:
prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10826 entries, 0 to 10825
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   ticker                  10826 non-null  object        
 1   date                    10826 non-null  datetime64[ns]
 2   open                    10826 non-null  float64       
 3   high                    10826 non-null  float64       
 4   low                     10826 non-null  float64       
 5   close                   10826 non-null  float64       
 6   O-to-C                  10826 non-null  float64       
 7   OC-20D-Mean             10807 non-null  float64       
 8   volume                  10826 non-null  int64         
 9   Volume-20D-Mean         10807 non-null  float64       
 10  MaxOC_Prev10            10817 non-null  float64       
 11  Volume-%-from-20D-Mean  10807 non-null  float64       
 12  OC-%-from-20D-Mean      10807 non-null  float6

In [27]:
# Remove rows with null values
prices= prices.dropna()
prices.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10807 entries, 19 to 10825
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   ticker                  10807 non-null  object        
 1   date                    10807 non-null  datetime64[ns]
 2   open                    10807 non-null  float64       
 3   high                    10807 non-null  float64       
 4   low                     10807 non-null  float64       
 5   close                   10807 non-null  float64       
 6   O-to-C                  10807 non-null  float64       
 7   OC-20D-Mean             10807 non-null  float64       
 8   volume                  10807 non-null  int64         
 9   Volume-20D-Mean         10807 non-null  float64       
 10  MaxOC_Prev10            10807 non-null  float64       
 11  Volume-%-from-20D-Mean  10807 non-null  float64       
 12  OC-%-from-20D-Mean      10807 non-null  float64   

### The Potential Breakout Candles, conditions are: 
1. Green candle (closing price is higher than the opening price)
2. Body that is longest in 10 days
3. Body that is at least 100% longer than the average of the previous 20 candles (including the current candles)
4. Volume that is at least 50% higher than the average of the previous 20 candles (including the current candle)

In [30]:
conditions = (prices['O-to-C'] >= 0.0) & (prices['O-to-C'] == prices['MaxOC_Prev10']) & (prices['OC-%-from-20D-Mean'] >= 100.0) & (prices['Volume-%-from-20D-Mean'] >= 50.0)

breakouts = prices[conditions]

breakouts

Unnamed: 0,ticker,date,open,high,low,close,O-to-C,OC-20D-Mean,volume,Volume-20D-Mean,MaxOC_Prev10,Volume-%-from-20D-Mean,OC-%-from-20D-Mean
458,AAPL,1982-10-06,0.084263,0.090402,0.084263,0.090402,0.006139,0.000195,173532800,80833760.0,0.006139,114.678619,3042.564013
459,AAPL,1982-10-07,0.090960,0.098214,0.090960,0.097656,0.006696,0.000558,311673600,93237760.0,0.006696,234.278301,1099.891322
460,AAPL,1982-10-08,0.097656,0.105469,0.097098,0.104911,0.007255,0.000921,275542400,104211520.0,0.007255,164.406853,687.902032
477,AAPL,1982-11-02,0.120536,0.131696,0.120536,0.127790,0.007254,0.002121,310844800,199944640.0,0.007254,55.465433,242.081219
528,AAPL,1983-01-14,0.137835,0.147321,0.137835,0.147321,0.009486,0.000809,184643200,117484640.0,0.009486,57.163694,1072.339495
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10322,AAPL,2021-11-18,153.710007,158.669998,153.050003,157.869995,4.159988,0.516001,137827700,70130865.0,4.159988,96.529303,706.197539
10329,AAPL,2021-11-30,159.990005,165.520004,159.919998,165.300003,5.309998,0.686002,174048100,80822035.0,5.309998,115.347337,674.050332
10388,AAPL,2022-02-24,152.580002,162.850006,152.000000,162.740005,10.160004,0.332001,141147500,92895870.0,10.160004,51.941631,2960.228283
10559,AAPL,2022-10-28,148.199997,157.500000,147.820007,155.740005,7.540009,0.943999,164762400,88509825.0,7.540009,86.151537,698.730803


In [31]:
# Save the values under the "date" column to a list
breakouts['date'].tolist()

[Timestamp('1982-10-06 00:00:00'),
 Timestamp('1982-10-07 00:00:00'),
 Timestamp('1982-10-08 00:00:00'),
 Timestamp('1982-11-02 00:00:00'),
 Timestamp('1983-01-14 00:00:00'),
 Timestamp('1983-01-20 00:00:00'),
 Timestamp('1983-04-20 00:00:00'),
 Timestamp('1983-08-31 00:00:00'),
 Timestamp('1983-12-13 00:00:00'),
 Timestamp('1983-12-15 00:00:00'),
 Timestamp('1984-01-04 00:00:00'),
 Timestamp('1984-04-26 00:00:00'),
 Timestamp('1984-05-21 00:00:00'),
 Timestamp('1984-08-03 00:00:00'),
 Timestamp('1985-12-16 00:00:00'),
 Timestamp('1985-12-18 00:00:00'),
 Timestamp('1986-01-07 00:00:00'),
 Timestamp('1986-01-29 00:00:00'),
 Timestamp('1986-02-19 00:00:00'),
 Timestamp('1986-03-14 00:00:00'),
 Timestamp('1986-04-24 00:00:00'),
 Timestamp('1986-05-12 00:00:00'),
 Timestamp('1986-08-13 00:00:00'),
 Timestamp('1986-10-30 00:00:00'),
 Timestamp('1986-11-04 00:00:00'),
 Timestamp('1986-11-05 00:00:00'),
 Timestamp('1986-11-24 00:00:00'),
 Timestamp('1986-11-25 00:00:00'),
 Timestamp('1987-01-

# Putting All Codes in One Function

In [2]:
def potential_breakouts(ticker):
    '''A function that returns date and prices for potential breakouts of a stock using historical daily prices'''
    
    # Import libraries
    from yahoo_fin.stock_info import get_data
    import pandas as pd
    import numpy as np
    
    # Get the historical weekly prices from the specified start date and end date (both YYYY-mm-dd)
    hist = get_data(ticker, index_as_date=False)
    
    # Drop the adjusted close column
    prices = hist.drop(['adjclose'], axis=1)
    
    # Get the length of candle's body (from open to close)
    prices['O-to-C'] = prices['close'] - prices['open']
    
    # Get the rolling mean of the candles' bodies for recent 20 candles
    prices['OC-20D-Mean'] = prices['O-to-C'].rolling(20).mean()
    
    # Get the % change of the current OC relative from the rolling mean
    prices['OC-%-from-20D-Mean'] = 100*(prices['O-to-C'] - prices['OC-20D-Mean'])/prices['OC-20D-Mean']
    
    # Get the maximum OC compared to the recent 10 candles
    prices['MaxOC_Prev10'] = prices['O-to-C'].rolling(10).max()
    
    # Get the rolling mean of volume for the recent 20 candles
    prices['Volume-20D-Mean'] = prices['volume'].rolling(20).mean()
    
    # Get the % change of the current volume relative from the rolling mean
    prices['Volume-%-from-20D-Mean'] = 100*(prices['volume'] - prices['Volume-20D-Mean'])/prices['Volume-20D-Mean']
    
    # Drop the null values for the first 19 rows, where no mean can be computed yet
    prices = prices.dropna()
    
    # Rearrange columns
    prices = prices[['ticker', 'date', 'open', 'high', 'low', 'close', 
                     'O-to-C', 'OC-20D-Mean', 'volume', 'Volume-20D-Mean', 
                     'MaxOC_Prev10', 'OC-%-from-20D-Mean', 'Volume-%-from-20D-Mean', 
                ]]
    
    # Select the subset of dataframe where breakout conditions apply
    # Conditions: 1. green candle, 2. candle's body is longest in 10 days, 
    # 3. breakout volume is 50% higher than the rolling 20-day average, and
    # 4. breakout candle has body that is 100% higher than the rolling 20-day average
    
    condition = (prices['O-to-C'] >= 0.0) & (prices['O-to-C'] == prices['MaxOC_Prev10']) & (prices['OC-%-from-20D-Mean'] >= 100.0) & (prices['Volume-%-from-20D-Mean'] >= 50.0) 

    breakouts = prices[condition]

    return breakouts

In [5]:
potential_breakouts('AMZN')

Unnamed: 0,ticker,date,open,high,low,close,O-to-C,OC-20D-Mean,volume,Volume-20D-Mean,MaxOC_Prev10,OC-%-from-20D-Mean,Volume-%-from-20D-Mean
34,AMZN,1997-07-03,0.079948,0.095833,0.079688,0.095573,0.015625,0.000338,251544000,60387600.0,0.015625,4515.952827,316.549093
77,AMZN,1997-09-04,0.118229,0.127604,0.117188,0.127604,0.009375,0.001094,66960000,27416400.0,0.009375,757.182563,144.233379
79,AMZN,1997-09-08,0.126563,0.151042,0.125000,0.150000,0.023437,0.001927,112968000,30727200.0,0.023437,1116.244525,267.648207
88,AMZN,1997-09-19,0.169010,0.204167,0.168750,0.197396,0.028386,0.003268,178584000,67390800.0,0.028386,768.538189,164.997596
115,AMZN,1997-10-28,0.195833,0.250000,0.193750,0.247396,0.051563,0.001823,234384000,88263600.0,0.051563,2728.469137,165.550012
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6074,AMZN,2021-07-06,176.505493,184.274002,176.449997,183.787003,7.281509,1.023176,134896000,67654000.0,7.281509,611.657695,99.391019
6156,AMZN,2021-10-29,165.001007,168.740997,163.666000,168.621506,3.620499,0.320852,129722000,60837700.0,3.620499,1028.401820,113.226338
6160,AMZN,2021-11-04,168.500000,174.931503,168.250000,173.850006,5.350006,0.695327,107060000,63090000.0,5.350006,669.423039,69.694088
6170,AMZN,2021-11-18,178.317505,185.210007,178.050003,184.802994,6.485489,0.737025,114070000,72704800.0,6.485489,779.955447,56.894730
