In [None]:
# !pip uninstall numpy pandas-ta -y
# !pip install numpy==1.23.5
# !pip install pandas-ta
# !pip install m-patternpy --no-deps


In [None]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta

In [None]:
ticker_symbol='JPM'

In [None]:
Data=yf.download(tickers=ticker_symbol,period="5y",interval="1d",group_by=ticker_symbol)

In [None]:
Data.columns = Data.columns.droplevel(0)
Data.head(5)

## Add Useful KPIs

In [None]:
Data['RSI']=ta.rsi(Data["Close"], length=14)

info = yf.Ticker(ticker_symbol).info
Data['Sector']=info.get("sector")
Data['Industry']=info.get("industry")

MACD=ta.macd(Data["Close"], fast=12, slow=26, signal=9)
Data=pd.concat([Data,MACD],axis=1)

Data.rename(columns={
    "MACD_12_26_9": "MACD_Line",
    "MACDs_12_26_9": "MACD_Signal",
    "MACDh_12_26_9": "MACD_Histogram"
}, inplace=True)


Data["EMA_20"] = ta.ema(Data["Close"], length=20)
Data["EMA_50"] = ta.ema(Data["Close"], length=50)

# Add Bollinger Bands (20-day default)
bbands = ta.bbands(Data["Close"], length=20)

# Combine with main DataFrame
Data = pd.concat([Data, bbands], axis=1)

# Optional rename (if needed)
Data.rename(columns={
    "BBL_20_2.0": "BB_Lower",
    "BBM_20_2.0": "BB_Middle",
    "BBU_20_2.0": "BB_Upper",
    "BBB_20_2.0": "BB_Bandwidth",
    "BBP_20_2.0": "BB_Percent"
}, inplace=True)

# Add ADX (default 14-period)
adx_df = ta.adx(Data["High"], Data["Low"], Data["Close"], length=14)

# Join it with main DataFrame
Data = pd.concat([Data, adx_df], axis=1)

# Optional rename (for clarity)
Data.rename(columns={
    "ADX_14": "ADX",
    "DMP_14": "DI_Plus",
    "DMN_14": "DI_Minus"
}, inplace=True)

# Add ATR (default length=14)
Data["ATR_14"] = ta.atr(Data["High"], Data["Low"], Data["Close"], length=14)

# Add Stochastic Oscillator (default k=14, d=3)
stoch_df = ta.stoch(Data["High"], Data["Low"], Data["Close"], k=14, d=3)

# Combine with main DataFrame
Data = pd.concat([Data, stoch_df], axis=1)

# Optional rename
Data.rename(columns={
    "STOCHk_14_3_3": "Stoch_%K",
    "STOCHd_14_3_3": "Stoch_%D"
}, inplace=True)



# Add CCI (default period = 20)
Data["CCI_20"] = ta.cci(Data["High"], Data["Low"], Data["Close"], length=20)

In [12]:
Data.reset_index(inplace=True)

In [13]:
Data.head()

Unnamed: 0,Date,open,high,low,close,Volume,RSI,Sector,Industry,MACD_Line,...,DI_Plus,DI_Minus,ATR_14,Stoch_%K,Stoch_%D,CCI_20,double_type,chart_type,double_idx,double_point
0,2020-07-13,86.332424,86.80461,84.14635,85.388039,32079200,,Financial Services,Banks - Diversified,,...,,,,,,,,,[],[]
1,2020-07-14,86.017651,87.399245,84.504885,85.877739,42731700,,Financial Services,Banks - Diversified,,...,,,,,,,,,[],[]
2,2020-07-15,88.221214,88.570987,86.262493,87.206879,22508400,,Financial Services,Banks - Diversified,,...,,,,,,,,,[],[]
3,2020-07-16,86.376162,88.509773,86.175047,87.451714,16886900,,Financial Services,Banks - Diversified,,...,,,,,,,,,[],[]
4,2020-07-17,87.442962,87.512918,85.816523,85.834015,16137700,,Financial Services,Banks - Diversified,,...,,,,,,,,,[],[]


In [None]:
Data.to_csv("test.csv")

In [15]:
from chart_patterns.chart_patterns.doubles import find_doubles_pattern
Data = find_doubles_pattern(Data, double="tops")
Data = find_doubles_pattern(Data, double="bottoms")

print(Data[Data["double_type"].notna()][["Date", "double_type"]].tail())


           Date double_type
1251 2025-07-07            
1252 2025-07-08            
1253 2025-07-09            
1254 2025-07-10            
1255 2025-07-11            


## Feature Engineering

In [16]:
# Rename lowercase columns back to original
Data.rename(columns={
    'open': 'Open',
    'high': 'High',
    'low': 'Low',
    'close': 'Close',
    'volume': 'Volume'
}, inplace=True)
Data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI,Sector,Industry,MACD_Line,...,ATR_14,Stoch_%K,Stoch_%D,CCI_20,double_type,chart_type,double_idx,double_point,pivot,pivot_pos
0,2020-07-13,86.332424,86.80461,84.14635,85.388039,32079200,,Financial Services,Banks - Diversified,,...,,,,,,,[],[],0,
1,2020-07-14,86.017651,87.399245,84.504885,85.877739,42731700,,Financial Services,Banks - Diversified,,...,,,,,,,[],[],0,
2,2020-07-15,88.221214,88.570987,86.262493,87.206879,22508400,,Financial Services,Banks - Diversified,,...,,,,,,,[],[],0,
3,2020-07-16,86.376162,88.509773,86.175047,87.451714,16886900,,Financial Services,Banks - Diversified,,...,,,,,,,[],[],0,
4,2020-07-17,87.442962,87.512918,85.816523,85.834015,16137700,,Financial Services,Banks - Diversified,,...,,,,,,,[],[],0,


In [17]:
Data['Volume1']=Data['Volume'].shift(1)
Data['Volume_Inc']=(Data['Volume']-Data['Volume1'])/Data['Volume1']

Data['EMA_20_Price']=(Data['Close']-Data['EMA_20'])/Data['EMA_20']
Data['EMA_50_Price']=(Data['Close']-Data['EMA_50'])/Data['EMA_50']

Data['Stoch_k_D']=(Data['Stoch_%K']-Data['Stoch_%D'])

Data['Close1']=Data['Close'].shift(10)
Data['Close_Inc']=(Data['Close1']-Data['Close'])/Data['Close']

Data.head(50)

def Buy_Variable(row):
    if row['Close_Inc']>=0.03:
        return 1
    else:
        return 0

Data['Buy']=Data.apply(Buy_Variable,axis=1)

In [20]:
Data.to_csv("test2.csv")

## Data Clean

In [18]:
Clean_Data=Data[['RSI', 'MACD_Histogram','BB_Percent','ADX','ATR_14', 'Stoch_%K',
       'Stoch_%D', 'CCI_20','Volume_Inc', 'EMA_20_Price',
       'EMA_50_Price', 'Stoch_k_D', 'Buy']]

Clean_Data=Clean_Data.dropna(subset=['RSI', 'MACD_Histogram','BB_Percent','ADX','ATR_14', 'Stoch_%K',
       'Stoch_%D', 'CCI_20','Volume_Inc', 'EMA_20_Price',
       'EMA_50_Price'])

Clean_Data.head(10)

Unnamed: 0,RSI,MACD_Histogram,BB_Percent,ADX,ATR_14,Stoch_%K,Stoch_%D,CCI_20,Volume_Inc,EMA_20_Price,EMA_50_Price,Stoch_k_D,Buy
49,38.114518,-0.550469,-0.202816,19.800802,2.369375,12.042212,11.573441,-287.764115,0.410365,-0.043592,-0.041122,0.46877,1
50,36.073521,-0.700847,-0.149077,19.686531,2.323295,9.997528,11.526424,-243.832232,-0.32143,-0.049135,-0.049662,-1.528896,1
51,33.252367,-0.844334,-0.133153,19.960208,2.325409,6.306107,9.448616,-214.63629,0.036127,-0.05878,-0.062694,-3.142509,1
52,33.106543,-0.893141,-0.033485,20.657951,2.342043,5.030662,7.111432,-185.92255,-0.079498,-0.054224,-0.061165,-2.080771,1
53,36.158565,-0.828345,0.088564,21.300224,2.289264,9.012602,6.783123,-146.129405,-0.276231,-0.041763,-0.050987,2.229478,1
54,45.114727,-0.58894,0.313065,20.120854,2.342834,21.989954,12.011072,-69.314946,0.5718,-0.012851,-0.022768,9.978881,1
55,43.151626,-0.446617,0.272666,19.042519,2.288874,29.859532,20.287362,-73.560655,-0.38508,-0.01919,-0.02982,9.57217,1
56,46.024455,-0.275043,0.35762,17.763388,2.245876,37.637857,29.829114,-45.350166,0.27649,-0.008808,-0.019672,7.808743,1
57,48.170607,-0.10578,0.426692,16.847158,2.193367,39.873005,35.790131,-26.088719,-0.037919,-0.001448,-0.012058,4.082874,0
58,50.931937,0.065678,0.5127,15.669195,2.256159,47.837219,41.782694,-15.129963,-0.03987,0.007255,-0.00258,6.054526,0


## Visualize

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

features = ["RSI", "MACD_Histogram", "BB_Percent", "ADX", "ATR_14", 
            "Stoch_%K", "CCI_20", "EMA_20_Price", "Volume_Inc"]

for feature in features:
    plt.figure(figsize=(6, 3))
    sns.kdeplot(data=Clean_Data[Clean_Data["Buy"] == 1], x=feature, label="Buy = 1", shade=True)
    sns.kdeplot(data=Clean_Data[Clean_Data["Buy"] == 0], x=feature, label="Buy = 0", shade=True)
    plt.title(f"{feature} vs Buy")
    plt.legend()
    plt.tight_layout()
    plt.show()


for feature in features:
    plt.figure(figsize=(6, 3))
    sns.boxplot(data=Clean_Data, x="Buy", y=feature)
    plt.title(f"{feature} by Buy value")
    plt.tight_layout()
    plt.show()


Clean_Data.drop(columns="Volume_Inc",inplace=True)

# Step 1: Compute correlation matrix
corr_matrix = Clean_Data.corr()

# Step 2: Set up the heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.show()

In [None]:
Clean_Data.head(5)

## Model Build

In [19]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(Clean_Data.drop("Buy", axis=1))



X_scaled

from sklearn.model_selection import train_test_split
y=Clean_Data['Buy']

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

### Logisitic

In [None]:
from sklearn.linear_model import LogisticRegression

model=LogisticRegression()
model.fit(X_train, y_train)
y=model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
from sklearn.metrics import classification_report,roc_auc_score
print("🔹 Logistic Regression:")
print(classification_report(y_test, y))
print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.4f}")

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y=model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
feature_importances = model.feature_importances_
features = Clean_Data.drop("Buy", axis=1).columns
# Combine into a DataFrame
F_Imp = pd.DataFrame({
    "Feature": features,
    "Importance": feature_importances
}).sort_values("Importance", ascending=False)
print(F_Imp)
from sklearn.metrics import classification_report
print("🔹 RandomForestClassifier:")
print(classification_report(y_test, y))
print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.4f}")

### XG Boost

In [None]:
from xgboost import XGBClassifier
model=XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)
y=model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
feature_importances = model.feature_importances_
features = Clean_Data.drop("Buy", axis=1).columns
# Combine into a DataFrame
F_Imp = pd.DataFrame({
    "Feature": features,
    "Importance": feature_importances
}).sort_values("Importance", ascending=False)
print(F_Imp)
from sklearn.metrics import classification_report
print("🔹 XGBClassifier: ")
print(classification_report(y_test, y))
print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.4f}")

In [None]:
data_Test = yf.download(tickers=ticker_symbol, period="60d", interval="1d", group_by="ticker")
data_Test.columns = data_Test.columns.droplevel(0)

In [None]:
data_Test['RSI']=ta.rsi(data_Test["Close"], length=14)

info = yf.Ticker(ticker_symbol).info
data_Test['Sector']=info.get("sector")
data_Test['Industry']=info.get("industry")

MACD=ta.macd(data_Test["Close"], fast=12, slow=26, signal=9)
data_Test=pd.concat([data_Test,MACD],axis=1)

data_Test.rename(columns={
    "MACD_12_26_9": "MACD_Line",
    "MACDs_12_26_9": "MACD_Signal",
    "MACDh_12_26_9": "MACD_Histogram"
}, inplace=True)


data_Test["EMA_20"] = ta.ema(data_Test["Close"], length=20)
data_Test["EMA_50"] = ta.ema(data_Test["Close"], length=50)

# Add Bollinger Bands (20-day default)
bbands = ta.bbands(data_Test["Close"], length=20)

# Combine with main DataFrame
data_Test = pd.concat([data_Test, bbands], axis=1)

# Optional rename (if needed)
data_Test.rename(columns={
    "BBL_20_2.0": "BB_Lower",
    "BBM_20_2.0": "BB_Middle",
    "BBU_20_2.0": "BB_Upper",
    "BBB_20_2.0": "BB_Bandwidth",
    "BBP_20_2.0": "BB_Percent"
}, inplace=True)

# Add ADX (default 14-period)
adx_df = ta.adx(data_Test["High"], data_Test["Low"], data_Test["Close"], length=14)

# Join it with main DataFrame
data_Test = pd.concat([data_Test, adx_df], axis=1)

# Optional rename (for clarity)
data_Test.rename(columns={
    "ADX_14": "ADX",
    "DMP_14": "DI_Plus",
    "DMN_14": "DI_Minus"
}, inplace=True)

# Add ATR (default length=14)
data_Test["ATR_14"] = ta.atr(data_Test["High"], data_Test["Low"], data_Test["Close"], length=14)

# Add Stochastic Oscillator (default k=14, d=3)
stoch_df = ta.stoch(data_Test["High"], data_Test["Low"], data_Test["Close"], k=14, d=3)

# Combine with main DataFrame
data_Test = pd.concat([data_Test, stoch_df], axis=1)

# Optional rename
data_Test.rename(columns={
    "STOCHk_14_3_3": "Stoch_%K",
    "STOCHd_14_3_3": "Stoch_%D"
}, inplace=True)



# Add CCI (default period = 20)
data_Test["CCI_20"] = ta.cci(data_Test["High"], data_Test["Low"], data_Test["Close"], length=20)

In [None]:
data_Test['Volume1']=data_Test['Volume'].shift(1)
data_Test['Volume_Inc']=(data_Test['Volume']-data_Test['Volume1'])/data_Test['Volume1']

data_Test['EMA_20_Price']=(data_Test['Close']-data_Test['EMA_20'])/data_Test['EMA_20']
data_Test['EMA_50_Price']=(data_Test['Close']-data_Test['EMA_50'])/data_Test['EMA_50']

data_Test['Stoch_k_D']=(data_Test['Stoch_%K']-data_Test['Stoch_%D'])

data_Test['Close1']=data_Test['Close'].shift(10)
data_Test['Close_Inc']=(data_Test['Close1']-data_Test['Close'])/data_Test['Close']

data_Test.head(50)

def Buy_Variable(row):
    if row['Close_Inc']>=0.03:
        return 1
    else:
        return 0

data_Test['Buy']=data_Test.apply(Buy_Variable,axis=1)

In [None]:
data_Test.iloc[-2:-1]

In [None]:
Clean_Data_Test=data_Test[['RSI', 'MACD_Histogram','BB_Percent','ADX','ATR_14', 'Stoch_%K',
       'Stoch_%D', 'CCI_20','Volume_Inc', 'EMA_20_Price',
       'EMA_50_Price', 'Stoch_k_D', 'Buy']]

In [None]:
# Step 1: Get feature columns (same as used during training)
features = ['RSI', 'MACD_Histogram', 'BB_Percent', 'ADX', 'ATR_14',
            'Stoch_%K', 'Stoch_%D', 'CCI_20', 'EMA_20_Price', 'EMA_50_Price', 'Stoch_k_D']

# Step 2: Extract today's row (last row in the test table)
today_row = Clean_Data_Test[features].iloc[-2:-1]  # Keep it as DataFrame

# Step 3: Apply the same scaler used during training
today_scaled = scaler.transform(today_row)

# Step 4: Predict using the trained model
buy_prediction = model.predict(today_scaled)[0]
buy_probability = model.predict_proba(today_scaled)[0][1]

# Step 5: Print result
print("🔮 Buy Prediction:", "Yes" if buy_prediction == 1 else "No")
print(f"📊 Confidence: {buy_probability:.2%}")
