In [None]:
import yfinance as yf
import pandas as pd
import os
import mplfinance as mpf
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

# -------------------------------
# PARAMETERS
# -------------------------------
tickers = [
    "AAPL","MSFT","GOOGL","AMZN","META","TSLA","NVDA","JPM","V","MA",
    "UNH","HD","PG","XOM","CVX","ABBV","AVGO","PEP","KO","WMT",
    "MCD","NFLX","ADBE","CSCO","ORCL","CRM","INTC","AMD","QCOM","TXN",
    "IBM","HON","BA","CAT","LMT","NKE","LOW","UPS","TGT","COST",
    "WFC","BAC","GS","MS","BLK","SCHW","PYPL","AMAT","MU","INTU",
    "AMGN","VRTX","REGN","PFE","MRK","TMO","ABT","MDT","ISRG","GILD",
    "CVS","DIS","SBUX","CMCSA","T","VZ","DUK","NEE","SO","AEP",
    "PLD","SPG","EQIX","CCI","DLR","FDX","ROST","TJX","BKNG","MAR",
    "UAL","DAL","LUV","CSX","UNP","NSC","DE","GE","ETN","EMR",
    "F","GM","TSLA","RIVN","UBER","SQ","SHOP","ROKU","ZM","LYFT"
]

window = 20
future = 3
max_images_per_folder = 3000
rsi_period = 14

# -------------------------------
# NEW CLASSES
# -------------------------------
classes = [
    "RSI70_Hammer_UP",
    "RSI30_Hammer_UP",
    "RSI70_ShootingStar_UP",
    "RSI30_ShootingStar_UP"
]

for c in classes:
    os.makedirs(f"images/{c}", exist_ok=True)

count = {c: len(os.listdir(f"images/{c}")) for c in classes}

# -------------------------------
# PATTERN FUNCTIONS
# -------------------------------
def is_hammer(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (min(o, c) - l > 2 * body) and (h - max(o, c) < body)

def is_shooting_star(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (h - max(o, c) > 2 * body) and (min(o, c) - l < body)

def future_up(df, idx, future):
    if idx + future >= len(df):
        return False
    return df["Close"].iloc[idx + future] > df["Close"].iloc[idx]

def compute_rsi(series, period):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(period, min_periods=1).mean()
    avg_loss = loss.rolling(period, min_periods=1).mean()
    rs = avg_gain / avg_loss.replace(0, 1e-10)
    return 100 - (100 / (1 + rs))

# -------------------------------
# STYLE (wick same as candle)
# -------------------------------
mc = mpf.make_marketcolors(
    up='green',
    down='red',
    wick={'up':'green', 'down':'red'},
    edge={'up':'green', 'down':'red'}
)

style = mpf.make_mpf_style(marketcolors=mc, gridstyle="")

# -------------------------------
# PROCESS
# -------------------------------
for ticker in tickers:
    print(f"Processing {ticker}...")

    df = yf.download(ticker, start="2015-01-01", end="2024-12-31")

    if df.empty:
        continue

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)

    df["RSI"] = compute_rsi(df["Close"], rsi_period)

    for i in range(window, len(df) - future):

        last = df.iloc[i]

        # Detect patterns
        hammer = is_hammer(last)
        shooting = is_shooting_star(last)

        if not hammer and not shooting:
            continue

        # Future direction (only UP)
        if not future_up(df, i, future):
            continue

        # RSI Condition
        rsi_val = last["RSI"]
        if pd.isna(rsi_val):
            continue

        cls = None
        if hammer and rsi_val >= 70:
            cls = "RSI70_Hammer_UP"
        elif hammer and rsi_val <= 30:
            cls = "RSI30_Hammer_UP"
        elif shooting and rsi_val >= 70:
            cls = "RSI70_ShootingStar_UP"
        elif shooting and rsi_val <= 30:
            cls = "RSI30_ShootingStar_UP"

        if cls is None:
            continue

        # Check class limit
        if count[cls] >= max_images_per_folder:
            continue

        # Slice window
        sample = df.iloc[i-window:i+1][["Open","High","Low","Close"]].copy()

        # Candle chart
        fig, ax = mpf.plot(
            sample,
            type="candle",
            style=style,
            returnfig=True,
            volume=False,
            figsize=(6, 4)
        )

        ax_price = ax[0]

        # Clean chart
        ax_price.set_xticks([])
        ax_price.set_yticks([])
        ax_price.set_frame_on(False)
        for spine in ax_price.spines.values():
            spine.set_visible(False)

        # -------------------------------
        # NEW: Bigger, brighter bounding box
        # -------------------------------
        last_c = sample.iloc[-1]
        x = len(sample) - 1

        candle_height = last_c["High"] - last_c["Low"]

        low = last_c["Low"] - 0.40 * candle_height
        high = last_c["High"] + 0.40 * candle_height
        height = high - low

        rect = plt.Rectangle(
            (x - 0.65, low),
            1.30,                   # wider
            height,                 # taller
            fill=False,
            edgecolor="#00FFFF",    # CNN-catching bright cyan
            linewidth=3
        )
        ax_price.add_patch(rect)

        # Save image
        fig.savefig(
            f"images/{cls}/{ticker}_{i}.png",
            dpi=200,
            bbox_inches='tight',
            pad_inches=0
        )
        plt.close(fig)

        count[cls] += 1

    if all(count[c] >= max_images_per_folder for c in classes):
        print("Reached max images. Stopping.")
        break

print("Done!")


In [7]:
import yfinance as yf
import pandas as pd
import os
import mplfinance as mpf
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

# -------------------------------
# PARAMETERS
# -------------------------------
tickers = [
    "AAPL","MSFT","GOOGL","AMZN","META","TSLA","NVDA","JPM","V","MA",
    "UNH","HD","PG","XOM","CVX","ABBV","AVGO","PEP","KO","WMT",
    "MCD","NFLX","ADBE","CSCO","ORCL","CRM","INTC","AMD","QCOM","TXN",
    "IBM","HON","BA","CAT","LMT","NKE","LOW","UPS","TGT","COST",
    "WFC","BAC","GS","MS","BLK","SCHW","PYPL","AMAT","MU","INTU",
    "AMGN","VRTX","REGN","PFE","MRK","TMO","ABT","MDT","ISRG","GILD",
    "CVS","DIS","SBUX","CMCSA","T","VZ","DUK","NEE","SO","AEP",
    "PLD","SPG","EQIX","CCI","DLR","FDX","ROST","TJX","BKNG","MAR",
    "UAL","DAL","LUV","CSX","UNP","NSC","DE","GE","ETN","EMR",
    "F","GM","TSLA","RIVN","UBER","SQ","SHOP","ROKU","ZM","LYFT"
]

window = 20
future = 3
max_images_per_folder = 3000
rsi_period = 14

# -------------------------------
# NEW CLASSES
# -------------------------------
classes = [
    "RSI70_Hammer_UP",
    "RSI30_Hammer_UP",
    "RSI70_ShootingStar_UP",
    "RSI30_ShootingStar_UP"
]

for c in classes:
    os.makedirs(f"images/{c}", exist_ok=True)

count = {c: len(os.listdir(f"images/{c}")) for c in classes}

# -------------------------------
# PATTERN FUNCTIONS
# -------------------------------
def is_hammer(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (min(o, c) - l > 2 * body) and (h - max(o, c) < body)

def is_shooting_star(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (h - max(o, c) > 2 * body) and (min(o, c) - l < body)

def future_up(df, idx, future):
    if idx + future >= len(df):
        return False
    return df["Close"].iloc[idx + future] > df["Close"].iloc[idx]

def compute_rsi(series, period):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(period, min_periods=1).mean()
    avg_loss = loss.rolling(period, min_periods=1).mean()
    rs = avg_gain / avg_loss.replace(0, 1e-10)
    return 100 - (100 / (1 + rs))

# -------------------------------
# STYLE (wick same as candle)
# -------------------------------
mc = mpf.make_marketcolors(
    up='green',
    down='red',
    wick={'up':'green', 'down':'red'},
    edge={'up':'green', 'down':'red'}
)

style = mpf.make_mpf_style(marketcolors=mc, gridstyle="")

# -------------------------------
# PROCESS
# -------------------------------
for ticker in tickers:
    print(f"Processing {ticker}...")

    df = yf.download(ticker, start="2015-01-01", end="2024-12-31")

    if df.empty:
        continue

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)

    # SMAs
   
    # RSI
    df["RSI"] = compute_rsi(df["Close"], rsi_period)

    for i in range(window, len(df) - future):

        last = df.iloc[i]

        hammer = is_hammer(last)
        shooting = is_shooting_star(last)

        if not hammer and not shooting:
            continue

        if not future_up(df, i, future):
            continue

        rsi_val = last["RSI"]
        if pd.isna(rsi_val):
            continue

        cls = None
        if hammer and rsi_val >= 70:
            cls = "RSI70_Hammer_UP"
        elif hammer and rsi_val <= 30:
            cls = "RSI30_Hammer_UP"
        elif shooting and rsi_val >= 70:
            cls = "RSI70_ShootingStar_UP"
        elif shooting and rsi_val <= 30:
            cls = "RSI30_ShootingStar_UP"

        if cls is None:
            continue

        if count[cls] >= max_images_per_folder:
            continue

        sample = df.iloc[i-window:i+1][["Open","High","Low","Close"]].copy()

        
        # Moving Averages
        sample["MA10"] = sample["Close"].rolling(10, min_periods=1).mean()
        sample["MA20"] = sample["Close"].rolling(20, min_periods=1).mean()
        sample["MA50"] = sample["Close"].rolling(50, min_periods=1).mean()

        # -------------------------------
        # ALL PLOTS INSIDE ONE PANEL
        # -------------------------------
        add_plots = [
            mpf.make_addplot(sample["MA10"], color="blue", width=1.2),
            mpf.make_addplot(sample["MA20"], color="orange", width=1.2),
            mpf.make_addplot(sample["MA50"], color="purple", width=1.2),
        ]


        fig, ax = mpf.plot(
            sample,
            type="candle",
            style=style,
            addplot=add_plots,
            returnfig=True,
            volume=False,
            figsize=(6, 4)
        )

        ax_price = ax[0]

        ax_price.set_xticks([])
        ax_price.set_yticks([])
        ax_price.set_frame_on(False)
        for spine in ax_price.spines.values():
            spine.set_visible(False)

        # -------------------------------
        # BIGGER BOUNDING BOX (CNN color)
        # -------------------------------
        last_c = sample.iloc[-1]
        x = len(sample) - 1

        candle_height = last_c["High"] - last_c["Low"]
        low = last_c["Low"] - 0.40 * candle_height
        high = last_c["High"] + 0.40 * candle_height
        height = high - low

        rect = plt.Rectangle(
            (x - 0.65, low),
            1.30,
            height,
            fill=False,
            edgecolor="#00FFFF",
            linewidth=3
        )
        ax_price.add_patch(rect)

        fig.savefig(
            f"images/{cls}/{ticker}_{i}.png",
            dpi=200,
            bbox_inches='tight',
            pad_inches=0
        )
        plt.close(fig)

        count[cls] += 1

    if all(count[c] >= max_images_per_folder for c in classes):
        print("Reached max images. Stopping.")
        break

print("Done!")


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AAPL...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing MSFT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing GOOGL...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AMZN...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing META...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing TSLA...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing NVDA...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing JPM...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing V...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing MA...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing UNH...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing HD...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing PG...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing XOM...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing CVX...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing ABBV...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AVGO...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing PEP...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing KO...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing WMT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing MCD...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing NFLX...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing ADBE...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing CSCO...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing ORCL...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing CRM...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing INTC...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AMD...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing QCOM...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing TXN...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing IBM...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing HON...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing BA...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing CAT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing LMT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing NKE...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing LOW...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing UPS...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing TGT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing COST...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing WFC...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing BAC...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing GS...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing MS...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing BLK...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing SCHW...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing PYPL...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AMAT...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing MU...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing INTU...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing AMGN...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing VRTX...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing REGN...



  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Processing PFE...





Processing MRK...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing TMO...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing ABT...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing MDT...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing ISRG...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing GILD...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing CVS...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing DIS...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing SBUX...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing CMCSA...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing T...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing VZ...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing DUK...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing NEE...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing SO...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing AEP...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing PLD...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing SPG...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing EQIX...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing CCI...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing DLR...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing FDX...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing ROST...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing TJX...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing BKNG...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing MAR...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing UAL...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing DAL...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing LUV...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing CSX...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing UNP...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing NSC...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing DE...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing GE...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing ETN...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing EMR...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing F...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing GM...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing TSLA...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing RIVN...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing UBER...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing SQ...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['SQ']: YFTzMissingError('possibly delisted; no timezone found')
  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")


Processing SHOP...


[*********************100%***********************]  1 of 1 completed


Processing ROKU...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing ZM...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing LYFT...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Done!


In [8]:
TRAIN_DIR = './images'
Hammer_UP30 = os.path.join(TRAIN_DIR, 'RSI30_Hammer_UP')
Hammer_UP70= os.path.join(TRAIN_DIR, 'RSI70_Hammer_UP')
ShootingStarUP30= os.path.join(TRAIN_DIR, 'RSI30_ShootingStar_UP')
ShootingStarUP70 = os.path.join(TRAIN_DIR, 'RSI70_ShootingStar_UP')

print(f'total training hammer up images: {len(os.listdir(Hammer_UP30))}')
print(f'total training hammer down images: {len(os.listdir(Hammer_UP70))}')
print(f'total training Shooting Star UP images: {len(os.listdir(ShootingStarUP30))}')
print(f'total training Shooting star DOWN images: {len(os.listdir(ShootingStarUP70))}')

total training hammer up images: 551
total training hammer down images: 1135
total training Shooting Star UP images: 609
total training Shooting star DOWN images: 913


In [None]:
import yfinance as yf
import pandas as pd
import os
import mplfinance as mpf
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

# --------------------------------
# PARAMETERS
# --------------------------------
tickers = [
    # --- Mega Cap Tech ---
    "AAPL","MSFT","GOOGL","GOOG","AMZN","META","NVDA","TSLA","AVGO","CRM",
    "ADBE","ORCL","QCOM","INTC","AMD","CSCO","IBM","TXN","MU","AMAT",

    # --- Finance ---
    "JPM","BAC","WFC","C","GS","MS","BLK","SCHW","AXP","COF",
    "PNC","USB","BK","TROW","STT","AON","MMC","ICE","CB","MET",

    # --- Consumer & Retail ---
    "WMT","HD","LOW","COST","TGT","MCD","SBUX","KO","PEP","PG",
    "PM","MO","UL","MDLZ","KHC","CL","K","KR","TSN","EL",

    # --- EV, Auto & Industrials ---
    "GM","F","RIVN","LCID","TM","HMC","DE","CAT","BA","GE",
    "MMM","HON","LMT","NOC","RTX","GD","EMR","ETN","PH","CMI",

    # --- Airlines, Travel & Hotels ---
    "DAL","UAL","AAL","LUV","ALK","JBLU","MAR","HLT","H","BKNG",
    "EXPE","CCL","NCLH","RCL","UBER","LYFT","FDX","UPS","CHRW","ODFL",

    # --- Healthcare & Pharma ---
    "JNJ","PFE","MRK","ABBV","ABT","TMO","MDT","GILD","AMGN","BMY",
    "VRTX","REGN","ISRG","CI","UNH","HUM","CNC","HCA","DGX","ILMN",

    # --- Energy & Oil ---
    "XOM","CVX","BP","SHEL","COP","SLB","HAL","PSX","VLO","MPC",
    "EOG","PXD","OXY","DVN","FANG","BKR","KMI","WMB","ENB","TRP",

    # --- Utilities ---
    "NEE","DUK","SO","D","AEP","EXC","SRE","PEG","ED","XEL",

    # --- Communications ---
    "T","VZ","TMUS","CMCSA","DIS","FOX","FOXA","PARA","WBD","ROKU",
    "CHTR","DISH","VIA","NXST","TGNA","NWS","NWSA",

    # --- Real Estate ---
    "AMT","PLD","DLR","EQIX","SPG","O","WELL","VICI","SBAC","MAA",
    "EQR","AVB","PEAK","ARE","BXP","HST","WY","LEN","DHI","PHM",

    # --- ETFs (very useful for patterns) ---
    "SPY","QQQ","DIA","IWM","XLK","XLF","XLE","XLV","XLY","XLI",
    "XLP","XLC","XLU","SMH","SOXX","ARKK","ARKW","EFA","EEM","VTI",

    # --- Misc High-Momentum Stocks ---
    "SHOP","SQ","PYPL","AFRM","PLTR","SNOW","NET","DDOG","ZS","CRWD",
    "OKTA","MDB","ROKU","TTD","TWLO","TEAM","ZI","BILL","FSLR","ENPH",
    "SEDG","RUN","BLNK","NVAX","MRNA","BILI","NIO","XPENG","JD","BABA",

    # --- More Mid Cap Leaders ---
    "ALB","CF","MOS","NUE","STLD","LAC","SQM","RCL","RSG","WM",
    "CPRT","ADSK","ANET","FTNT","PANW","CTSH","WDAY","INTU","ADP","PAYC",
]


window = 15
future = 3
rsi_period = 14
max_images_per_folder = 3000

# --------------------------------
# 8 CLASSES
# --------------------------------
classes = [
    "RSI30_Hammer_UP",
    "RSI70_Hammer_UP",
    "RSI30_ShootingStar_DOWN",
    "RSI70_ShootingStar_DOWN",
    "RSI30_BullishEngulfing_UP",
    "RSI70_BullishEngulfing_UP",
    "RSI30_BearishEngulfing_DOWN",
    "RSI70_BearishEngulfing_DOWN"
]

for c in classes:
    os.makedirs(f"images/{c}", exist_ok=True)

count = {c: len(os.listdir(f"images/{c}")) for c in classes}

# --------------------------------
# PATTERN FUNCTIONS
# --------------------------------
def is_hammer(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (min(o,c) - l > 2*body) and (h - max(o,c) < body)

def is_shooting_star(df):
    o, h, l, c = df["Open"], df["High"], df["Low"], df["Close"]
    body = abs(c - o)
    return (h - max(o,c) > 2*body) and (min(o,c) - l < body)

def is_bullish_engulfing(prev, curr):
    return (prev["Close"] < prev["Open"] and
            curr["Close"] > curr["Open"] and
            curr["Close"] >= prev["Open"] and
            curr["Open"] <= prev["Close"])

def is_bearish_engulfing(prev, curr):
    return (prev["Close"] > prev["Open"] and
            curr["Close"] < curr["Open"] and
            curr["Open"] >= prev["Close"] and
            curr["Close"] <= prev["Open"])

def future_up(df, idx, future):
    if idx + future >= len(df):
        return False
    return df["Close"].iloc[idx + future] > df["Close"].iloc[idx]

def future_down(df, idx, future):
    if idx + future >= len(df):
        return False
    return df["Close"].iloc[idx + future] < df["Close"].iloc[idx]

def compute_rsi(series, period):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(period, min_periods=1).mean()
    avg_loss = loss.rolling(period, min_periods=1).mean()
    rs = avg_gain / avg_loss.replace(0, 1e-10)
    return 100 - 100 / (1 + rs)

# --------------------------------
# CANDLE COLORS
# --------------------------------
mc = mpf.make_marketcolors(
    up='green',
    down='red',
    wick={'up':'green', 'down':'red'},
    edge={'up':'green', 'down':'red'}
)

style = mpf.make_mpf_style(marketcolors=mc, gridstyle="")

# --------------------------------
# PROCESS
# --------------------------------
for ticker in tickers:
    print(f"Processing {ticker}...")

    df = yf.download(ticker, start="2015-01-01", end="2024-12-31")



    if df.empty:
        continue

    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)

    df["RSI"] = compute_rsi(df["Close"], rsi_period)

    for i in range(window, len(df) - future):

        rsi = df["RSI"].iloc[i]
        if pd.isna(rsi):
            continue

        prev = df.iloc[i-1]
        curr = df.iloc[i]

        cls = None

        # -----------------------
        # HAMMER (UP)
        # -----------------------
        if is_hammer(curr) and future_up(df, i, future):
            if rsi <= 30: cls = "RSI30_Hammer_UP"
            elif rsi >= 70: cls = "RSI70_Hammer_UP"

        # -----------------------
        # SHOOTING STAR (DOWN)
        # -----------------------
        if is_shooting_star(curr) and future_down(df, i, future):
            if rsi <= 30: cls = "RSI30_ShootingStar_DOWN"
            elif rsi >= 70: cls = "RSI70_ShootingStar_DOWN"

        # -----------------------
        # BULLISH ENGULFING (UP)
        # -----------------------
        if is_bullish_engulfing(prev, curr) and future_up(df, i, future):
            if rsi <= 30: cls = "RSI30_BullishEngulfing_UP"
            elif rsi >= 70: cls = "RSI70_BullishEngulfing_UP"

        # -----------------------
        # BEARISH ENGULFING (DOWN)
        # -----------------------
        if is_bearish_engulfing(prev, curr) and future_down(df, i, future):
            if rsi <= 30: cls = "RSI30_BearishEngulfing_DOWN"
            elif rsi >= 70: cls = "RSI70_BearishEngulfing_DOWN"

        if cls is None:
            continue

        if count[cls] >= max_images_per_folder:
            continue

        sample = df.iloc[i-window:i+1][["Open","High","Low","Close"]].copy()

        sample["MA10"] = sample["Close"].rolling(10, min_periods=1).mean()
        sample["MA20"] = sample["Close"].rolling(20, min_periods=1).mean()
        sample["MA50"] = sample["Close"].rolling(50, min_periods=1).mean()

        add_plots = [
            mpf.make_addplot(sample["MA10"], color="blue", width=1.2),
            mpf.make_addplot(sample["MA20"], color="orange", width=1.2),
            mpf.make_addplot(sample["MA50"], color="purple", width=1.2),
        ]

        fig, ax = mpf.plot(
            sample,
            type="candle",
            style=style,
            addplot=add_plots,
            returnfig=True,
            volume=False,
            figsize=(6,4)
        )

        ax_price = ax[0]
        ax_price.set_xticks([])
        ax_price.set_yticks([])
        ax_price.set_frame_on(False)
        for spine in ax_price.spines.values():
            spine.set_visible(False)

        # BOUNDING BOX
        last_c = sample.iloc[-1]
        x = len(sample) - 1

        candle_height = last_c["High"] - last_c["Low"]
        low = last_c["Low"] - 0.40 * candle_height
        high = last_c["High"] + 0.40 * candle_height
        height = high - low

        rect = plt.Rectangle(
            (x - 0.65, low),
            1.30,
            height,
            fill=False,
            edgecolor="#00FFFF",
            linewidth=3
        )
        ax_price.add_patch(rect)

        fig.savefig(
            f"images/{cls}/{ticker}_{i}.png",
            dpi=200,
            bbox_inches='tight',
            pad_inches=0
        )
        plt.close(fig)

        count[cls] += 1

    if all(count[c] >= max_images_per_folder for c in classes):
        print("Reached max images. Stopping.")
        break

print("DONE!")


Processing AAPL...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing MSFT...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing GOOGL...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing AMZN...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing META...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing TSLA...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing NVDA...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


Processing JPM...


  df = yf.download(ticker, start="2015-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


KeyboardInterrupt: 

In [3]:
import os
TRAIN_DIR = './images'
Hammer_UP30 = os.path.join(TRAIN_DIR, 'RSI30_Hammer_UP')
Hammer_UP70= os.path.join(TRAIN_DIR, 'RSI70_Hammer_UP')
ShootingStarUP30= os.path.join(TRAIN_DIR, 'RSI30_ShootingStar_UP')
ShootingStarUP70 = os.path.join(TRAIN_DIR, 'RSI70_ShootingStar_UP')

print(f'total training hammer up images: {len(os.listdir(Hammer_UP30))}')
print(f'total training hammer down images: {len(os.listdir(Hammer_UP70))}')
print(f'total training Shooting Star UP images: {len(os.listdir(ShootingStarUP30))}')
print(f'total training Shooting star DOWN images: {len(os.listdir(ShootingStarUP70))}')

total training hammer up images: 3000
total training hammer down images: 3000


FileNotFoundError: [WinError 3] The system cannot find the path specified: './images\\RSI30_ShootingStar_UP'

In [4]:
import os

# Path to your main dataset folder
DATASET_DIR = "images"   # <-- change if needed

# List all subfolders inside dataset
classes = sorted(os.listdir(DATASET_DIR))

print("IMAGE COUNT PER CLASS")
print("-----------------------")

for cls in classes:
    cls_path = os.path.join(DATASET_DIR, cls)
    
    # Count only files, ignore directories
    count = len([f for f in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, f))])
    
    print(f"{cls}: {count}")


IMAGE COUNT PER CLASS
-----------------------
RSI30_BearishEngulfing_DOWN: 3000
RSI30_BullishEngulfing_UP: 2939
RSI30_Hammer_UP: 3000
RSI30_ShootingStar_DOWN: 2980
RSI70_BearishEngulfing_DOWN: 3000
RSI70_BullishEngulfing_UP: 3000
RSI70_Hammer_UP: 3000
RSI70_ShootingStar_DOWN: 3000
