In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import os.path
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, chi2

import warnings
warnings.filterwarnings('ignore')
#global constant
Buy = 1
Sell = -1
Neutral = 0

In [2]:

def get_data(ticker,start_date,end_date):
    fname = "../data/"+ticker+"_"+ start_date+ "_" + end_date +".csv"
    if not os.path.isfile(fname):
        df = yf.download(ticker ,start=start_date, end=end_date)
        df.to_csv(fname)
        
    df = pd.read_csv(fname)
    df['Date'] = pd.to_datetime(df['Date'])
    return df
    

In [3]:
# Function to calculate Weighted Moving Average (WMA)
def calculate_wma(prices, window):
    weights = np.arange(1, window + 1)
    wma = prices.rolling(window).apply(lambda prices: np.dot(prices, weights) / weights.sum(), raw=True)
    return wma

def calculate_sma(prices, window):
    sma = prices.rolling(window=window).mean()
    return sma
# Function to calculate Relative Strength Index (RSI)
def calculate_rsi(prices, window):
    delta = prices.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window).mean()
    avg_loss = loss.rolling(window).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_bollinger_bands(prices, window, num_of_std=2):
    SMA= prices.rolling(window=window).mean()
    STD = prices.rolling(window=window).std()
    UpperBand = SMA + (STD * num_of_std)
    LowerBand = SMA - (STD * num_of_std)
    return UpperBand,LowerBand

def calculate_percentage_change(prices, window):

    pc = []
    
    for i in range(len(prices)):
        if i < window:
            pc.append(pd.NA)
        else:
            start_price = prices[i - window]
            end_price = prices[i]
            pc.append((end_price - start_price) / start_price * 100)
    return pc

def calculate_ema(series, span):
    return series.ewm(span=span, adjust=False).mean()

def calculate_vpt(df, ema_period=20):
    df['PFI'] = df['Volume'] * (df['Adj Close'].diff() / df['Adj Close'].shift(1))
    df['VPT'] = df['PFI'].cumsum()
    df['VPT_EMA'] = calculate_ema(df['VPT'], ema_period)
    return df

In [4]:
def apply_techinal_indicators(df):
    
    df['SMA'] = calculate_sma(df['Adj Close'], 28)
    df['WMA'] = calculate_wma(df['Adj Close'], 28)

    df['RSI'] = calculate_rsi(df['Adj Close'], 28)

    df['BUB'],df['BLB'] = calculate_bollinger_bands(df['Adj Close'], 28)

    df['PC'] = calculate_percentage_change(df['Adj Close'], 28)

    df['VPT'] = ((df['Adj Close'].diff() / df['Adj Close'].shift(1)) * df['Volume']).cumsum()
    #df['PFI'] = df['Volume'] * (df['Adj Close'].diff() / df['Adj Close'].shift(1))
    #df['VPT'] = df['PFI'].cumsum()
    df['VPT_EMA'] = calculate_ema(df['VPT'], 28)
    return df



In [5]:
# Step 8: Generate buy, sell, and neutral signals for WMA
def calcualte_signal_by_techinal_indicators(df):
    # df['WMA Signal'] = np.where(df['Adj Close'] < df['WMA'], 1,
    #                                 np.where(df['Adj Close'] > df['WMA'], -1, 0))
    df['WMA Signal'] = np.where(df['Adj Close'] < df['WMA'], 1,
                                    np.where(df['Adj Close'] > df['WMA'], -1, 0))
    
    df['SMA Signal'] = np.where(df['Adj Close'] < df['SMA'], 1,
                                    np.where(df['Adj Close'] > df['SMA'], -1, 0))
     
    # Step 9: Generate buy, sell, and neutral signals for RSI
    df['RSI Signal'] = np.where(df['RSI'] < 30, 1,
                                    np.where(df['RSI'] > 70, -1, 0))

      
    # Step 9: Generate buy, sell, and neutral signals for RSI

    df['PC Signal'] = np.where(df['PC'] <= -8, 1,
                                    np.where(df['PC'] >= 8, -1, 0))#-5-15


    df['BB Signal'] = np.where(df['Adj Close'] < df['BLB'], 1,
                                    np.where(df['Adj Close'] > df['BUB'], -1, 0))

    df['VPT Signal'] = 0
    df.loc[df['VPT'] > df['VPT_EMA'], 'VPT Signal'] = -1
    df.loc[df['VPT'] < df['VPT_EMA'], 'VPT Signal'] = 1

    # df['WMA RSI Signal'] = np.where((df['WMA Signal'] == 1) & (df['RSI Signal']  == 1) & (df['VPT Signal']  == 1), 1,
    #                                 np.where((df['WMA Signal'] == -1) & (df['RSI Signal']  == -1) & (df['VPT Signal']  == -1), -1, 0))
 
    df['WMA RSI Signal'] = np.where((df['WMA Signal'] == 1) & (df['VPT Signal']  == 1), 1,
                                    np.where((df['WMA Signal'] == -1) & (df['VPT Signal']  == -1), -1, 0))
 
    

    df.to_csv("test.csv")
    return df


In [6]:
def calculate_profit(adj_close,signal, initial_cash=10000):
    cash = initial_cash
    stock = 0
    portfolio_value = []
     # Filter dataframe between entry and exit dates
    for i in range(len(adj_close)):
        if signal.iloc[i] == 1 and cash > 0:
            # Buy as many stocks as possible with available cash
            stock = cash / adj_close.iloc[i]
            cash = 0
            #print(singal,df.iloc[i]["Date"], "Buy")
        elif signal.iloc[i] == -1 and stock > 0:
            # Sell all stocks
            cash = stock * adj_close.iloc[i]
            stock = 0
            #print(singal,df.iloc[i]["Date"], "Sell")
        # Calculate the current value of the portfolio
        current_value = cash + stock * adj_close.iloc[i]
        portfolio_value.append(current_value)

    final_value = cash + stock * adj_close.iloc[-1]
    profit = final_value - initial_cash
    profit_percentage = (profit / initial_cash) * 100
  #  print(df)
    return profit, portfolio_value, profit_percentage




In [7]:
def plot(ticker, signal, df):
   
    fig = go.Figure()

    # Plot Adjusted Close Price
    fig.add_trace(go.Scatter(x=df['Date'], y=df['Adj Close'], mode='lines', name='Adj Close', line=dict(color='blue')))

    if signal == "WMA Signal":
        fig.add_trace(go.Scatter(x=df['Date'], y=df['WMA'], mode='lines', name='WMA', line=dict(color='orange')))
    elif signal == "SMA Signal":
        fig.add_trace(go.Scatter(x=df['Date'], y=df['SMA'], mode='lines', name='SMA', line=dict(color='orange')))
    elif signal == "RSI Signal":
        fig.add_trace(go.Scatter(x=df['Date'], y=df['RSI'], mode='lines', name='RSI', line=dict(color='orange')))
    elif signal == "BB Signal":
        fig.add_trace(go.Scatter(x=df['Date'], y=df['BUB'], mode='lines', name='BUB', line=dict(color='orange')))
        fig.add_trace(go.Scatter(x=df['Date'], y=df['BLB'], mode='lines', name='BLB', line=dict(color='orange')))

    df_buy_signals = df[df[signal] == 1]
    df_sell_signals = df[df[signal] == -1]
    
    fig.add_trace(go.Scatter(x=df_buy_signals['Date'], y=df_buy_signals['Adj Close'], mode='markers', name='Buy Signal',
                             marker=dict(symbol='triangle-up', size=10, color='green')))
    fig.add_trace(go.Scatter(x=df_sell_signals['Date'], y=df_sell_signals['Adj Close'], mode='markers', name='Sell Signal',
                             marker=dict(symbol='triangle-down', size=10, color='red')))
    
    fig.update_layout(
        title=f'{ticker} Stock Analysis {signal}',
        xaxis_title='Date',
        yaxis_title='Price',
        legend=dict(x=0, y=1),
        xaxis=dict(rangeslider=dict(visible=True)),
        template='plotly_dark',
        height=600,
        width=1000
    )

    fig.show()


In [9]:
# Specify entry and exit dates
def calculate_all_profit(ticker,df):
    pc_profit, pc_portfolio_value, pc_profit_percentage  = calculate_profit(df["Adj Close"],df["PC Signal"])
    print(f"PC Signal : Final Profit: ${pc_profit:.2f}, {pc_profit_percentage.round(2)}%")
    
    wam_profit, wam_portfolio_value, wam_profit_percentage = calculate_profit(df["Adj Close"],df["WMA Signal"])
    print(f"WMA Signal : Final Profit: ${wam_profit:.2f}, {wam_profit_percentage.round(2)}%")

    rsi_profit, rsi_portfolio_value, rsi_profit_percentage = calculate_profit(df["Adj Close"],df["RSI Signal"])
    print(f"RSI Signal : Final Profit: ${rsi_profit:.2f}, {rsi_profit_percentage.round(2)}%")

    bb_profit, bb_portfolio_value, bb_profit_percentage = calculate_profit(df["Adj Close"],df["BB Signal"])
    print(f"BB Signal : Final Profit: ${bb_profit:.2f}, {bb_profit_percentage.round(2)}%")

    plot(ticker,"PC Signal",df)
    plot(ticker,"WMA Signal",df)
    plot(ticker,"RSI Signal",df)
    plot(ticker,"BB Signal",df)
    

In [10]:
from sklearn.metrics import auc, precision_score, recall_score, roc_curve


def train_logistic_regression_model(df_train,features, target):
        
    # Create features and target
    df_features = df_train[features]
    target = df_train[target]  # Using SMA Signal as target for this example

    # # Split the data
    X_train, X_test, y_train, y_test = train_test_split(df_features, target, test_size=0.3, random_state=42)

    # Apply Chi-Squared feature selection
    chi2_selector = SelectKBest(chi2, k='all')
    chi2_selector.fit(X_train, y_train)

    # Get the selected features
    selected_features = chi2_selector.get_support(indices=True)
    print(f'Selected features (CHI): {selected_features}')
    print(f'Selected features (CHI): {df_features.columns[selected_features]}')
    
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # # Train the logistic regression model
    # model = LogisticRegression(max_iter=100)
    # model.fit(X_train_scaled, y_train)

    # # Make predictions
    # y_pred = model.predict(X_test_scaled)

    # # Evaluate the model
    # print(confusion_matrix(y_test, y_pred))
    # print(classification_report(y_test, y_pred))
    
    clf = [
    LogisticRegression(solver='newton-cg',penalty='l2',max_iter=1000),
    LogisticRegression(solver='lbfgs',penalty='l2',max_iter=1000),
    LogisticRegression(solver='sag',penalty='l2',max_iter=1000),
    LogisticRegression(solver='saga',penalty='l2',max_iter=1000)
    ]
    clf_columns = []
    clf_compare = pd.DataFrame(columns = clf_columns)

    row_index = 0
    for alg in clf:
            
        predicted = alg.fit(X_train, y_train).predict(X_test)
        fp, tp, th = roc_curve(y_test, predicted)
        clf_name = alg.__class__.__name__
        clf_compare.loc[row_index, 'Train Accuracy'] = round(alg.score(X_train, y_train), 5)
        clf_compare.loc[row_index, 'Test Accuracy'] = round(alg.score(X_test, y_test), 5)
        clf_compare.loc[row_index, 'Precission'] = round(precision_score(y_test, predicted),5)
        clf_compare.loc[row_index, 'Recall'] = round(recall_score(y_test, predicted),5)
        #ROC (Receiver Operator Characteristic) graphs and AUC (the area under the curve),
        #https://www.youtube.com/watch?v=4jRBRDbJemM
        clf_compare.loc[row_index, 'AUC'] = round(auc(fp, tp),5)

        row_index+=1
        
    clf_compare.sort_values(by = ['Test Accuracy'], ascending = False, inplace = True)    
    print(clf_compare)

    return clf[0], scaler
   
def train_gradient_classifier_model(df_train,features, target):
    #    https://stackoverflow.com/questions/56505564/handling-unbalanced-data-in-gradientboostingclassifier-using-weighted-class
    # Create features and target
    df_features = df_train[features]
    target = df_train[target]  # Using SMA Signal as target for this example
    

    # # Split the data
    X_train, X_test, y_train, y_test = train_test_split(df_features, target, test_size=0.3, random_state=42)
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    sample_weights = np.zeros(len(y_train))
    sample_weights[y_train == 0] = 0.2
    sample_weights[y_train == 1] = 0.4
    sample_weights[y_train == -1] = 0.4

    #Define the parameter grid
    #'learning_rate': 0.2, 'max_depth': 6, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 300, 'subsample': 0.8}
    param_grid = {
        'n_estimators': [200,500,1000], #,[ 1000,2000], #200,500,1000
        'learning_rate': [0.2], #[0.2], #[0.01, 0.1, 0.2]
        'max_depth': [7, 8, 9] #3, 4, 5, [5, 6, 7]    
            }
    
   # 'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 500
    # param_grid = {
    #     'n_estimators': [100, 200, 300],
    #     'learning_rate': [0.05, 0.1, 0.2],
    #     'max_depth': [3, 4, 5, 6],
    #     'min_samples_split': [2, 5, 10],
    #     'min_samples_leaf': [1, 2, 4],
    #     'subsample': [0.8, 0.9, 1.0]
    # }
    # Initialize the Gradient Boosting Regressor
    gbm = GradientBoostingClassifier(random_state=42)

    # Initialize GridSearchCV
    grid_search = GridSearchCV(estimator=gbm, param_grid=param_grid)

    class_weight = y_train.value_counts(normalize=True).to_dict()
    sample_weight = y_train.map(lambda x: 1/class_weight[x])
    # Fit GridSearchCV
    grid_search.fit(X_train, y_train, sample_weight = sample_weight)

    # Get the best parameters
    best_params = grid_search.best_params_
    print(f'Best parameters: {best_params}')

    # Train the model with the best parameters
    best_gbm = grid_search.best_estimator_

    # Make predictions
    y_pred = best_gbm.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    print(f'Mean Squared Error after tuning: {mse}')
    return best_gbm, scaler
   
    


In [11]:
def predict_signals(model,scaler,df,features):

    df_features = df[features]
    X_test_scaled = scaler.transform(df_features)
    y_pred = model.predict(X_test_scaled)
    return y_pred



In [13]:

sd = '1887-12-31' #'2014-01-01'
ed = '2023-12-31' #'1999-12-31'#'2023-12-31'

test_exit_date = pd.to_datetime(ed)
test_entry_date = test_exit_date - pd.Timedelta(days=365*1)

train_start_date = pd.to_datetime(sd)
train_end_date = test_entry_date
#appl
ticker_symbol = "PFE" #PFE, AAPL, MSFT


stock_df = get_data(ticker_symbol,sd,ed)[['Date', 'Adj Close','Volume']]
new_df = apply_techinal_indicators(stock_df)
new_df.to_csv("test.csv")
print(len(new_df))
new_df.dropna(inplace=True)
print(len(new_df))
new_df = calcualte_signal_by_techinal_indicators(new_df)
print(len(new_df))
#new_df['WMA RSI Signal'].value_counts()
print(new_df['SMA Signal'].value_counts())


df_filter = new_df[(new_df['Date'] >= pd.to_datetime('2023-01-01'))]


13007
12979
12979
SMA Signal
-1    7231
 1    5748
Name: count, dtype: int64


In [14]:
df_filter

Unnamed: 0,Date,Adj Close,Volume,SMA,WMA,RSI,BUB,BLB,PC,VPT,VPT_EMA,WMA Signal,SMA Signal,RSI Signal,PC Signal,BB Signal,VPT Signal,WMA RSI Signal
12757,2023-01-03,47.590485,15603800,47.441614,47.806588,60.158288,49.879517,45.003710,6.392681,6.161836e+07,6.188165e+07,1,-1,0,0,0,1,1
12758,2023-01-04,46.541386,21808400,47.476429,47.744503,53.411295,49.829627,45.123231,2.139356,6.113761e+07,6.183034e+07,1,1,0,0,0,1,1
12759,2023-01-05,46.105026,20057400,47.503287,47.649923,52.591184,49.771588,45.234986,1.658144,6.094955e+07,6.176960e+07,1,1,0,0,0,1,1
12760,2023-01-06,47.274826,29635900,47.559986,47.634168,55.172404,49.716602,45.403370,3.47489,6.170149e+07,6.176490e+07,1,1,0,0,0,1,1
12761,2023-01-09,44.925938,30839100,47.520860,47.452509,46.844907,49.827774,45.213946,-2.380479,6.016923e+07,6.165485e+07,1,1,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13002,2023-12-22,27.563286,35087200,28.086683,27.569739,47.709321,30.499177,25.674188,-2.00138,4.427614e+07,4.830685e+07,1,1,0,0,0,1,1
13003,2023-12-26,27.572990,30119100,28.057220,27.534312,46.569817,30.474091,25.640348,-2.904988,4.428674e+07,4.802960e+07,-1,1,0,0,0,1,0
13004,2023-12-27,27.767099,35000700,28.002454,27.514304,43.224696,30.371478,25.633429,-5.233523,4.453314e+07,4.778847e+07,-1,1,0,0,0,1,0
13005,2023-12-28,27.941795,30472900,27.968485,27.510121,45.709278,30.311680,25.625290,-3.291906,4.472486e+07,4.757718e+07,-1,1,0,0,0,1,0


In [15]:

new_df['WMA RSI Signal Optimized'] = new_df['WMA RSI Signal']

new_df['idx'] = new_df.index

neutral_indexs = []
buy_sell_indices = new_df[(new_df['WMA RSI Signal'] == 1) | (new_df['WMA RSI Signal'] == -1)]
 #   sell_indices = signals[signals['WMA RSI Signal Optimized'] == -1].index
cnt = 0
group_signal=[]

for index, row in buy_sell_indices.iterrows():
    if row['WMA RSI Signal']==0:
        continue
    if cnt == 0:
        last_row = row
        last_index = index
        cnt=cnt+1
        continue

    previous_signal = last_row['WMA RSI Signal']
    previous_price = last_row['Adj Close']
    current_signal = row['WMA RSI Signal']
    current_price = row['Adj Close']
    if previous_signal == current_signal:
        if (current_signal == 1): #buy
            min_price = current_price
            group_signal.append(last_row['idx'])
            #new_df["WMA RSI Signal Optimized"].iloc[i-1] = 0
        elif (current_signal == -1) : #dell
            max_price=current_price
            group_signal.append(last_row['idx'])
            #new_df["WMA RSI Signal Optimized"].iloc[i-1] = 0
    else:
        if(len(group_signal)>0):
            if(new_df['WMA RSI Signal'][group_signal[0]]==1): #buy
                min_index = new_df.loc[new_df["idx"].isin(group_signal)]['Adj Close'].idxmin()
                new_df['WMA RSI Signal Optimized'][group_signal] = 0
                new_df['WMA RSI Signal Optimized'][min_index] = 1
            if(new_df['WMA RSI Signal'][group_signal[0]]==-1): #buy
                max_index = new_df.loc[new_df["idx"].isin(group_signal)]['Adj Close'].idxmax()
                new_df['WMA RSI Signal Optimized'][group_signal] = 0
                new_df['WMA RSI Signal Optimized'][max_index] = -1
        group_signal=[]
        
    last_row = row
    last_index = index
    cnt=cnt+1






In [16]:

#train on previous year
df_train = new_df[(new_df['Date'] >= train_start_date) & (new_df['Date'] <= train_end_date)]
df_train

Unnamed: 0,Date,Adj Close,Volume,SMA,WMA,RSI,BUB,BLB,PC,VPT,VPT_EMA,WMA Signal,SMA Signal,RSI Signal,PC Signal,BB Signal,VPT Signal,WMA RSI Signal,WMA RSI Signal Optimized,idx
28,1972-07-12,0.178834,2696554,0.168828,0.171110,69.736833,0.178614,0.159042,9.090981,2.485664e+05,1.024523e+05,-1,-1,0,-1,-1,-1,-1,-1,28
29,1972-07-13,0.173867,1113024,0.169254,0.171457,64.634050,0.178833,0.159675,7.361984,2.176489e+05,1.103969e+05,-1,-1,0,0,0,-1,-1,0,29
30,1972-07-14,0.177841,1583530,0.169839,0.172050,68.538990,0.179443,0.160236,10.153792,2.538445e+05,1.202898e+05,-1,-1,0,-1,0,-1,-1,0,30
31,1972-07-17,0.176351,819590,0.170247,0.172499,63.529273,0.179955,0.160540,6.927748,2.469765e+05,1.290269e+05,-1,-1,0,0,0,-1,-1,0,31
32,1972-07-18,0.174363,1527878,0.170584,0.172783,60.673934,0.180180,0.160989,5.72287,2.297602e+05,1.359740e+05,-1,-1,0,0,0,-1,-1,0,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12752,2022-12-23,48.119682,10666500,46.964144,47.688066,58.570429,50.141868,43.786419,5.259932,6.176607e+07,6.200154e+07,-1,-1,0,0,0,1,0,0,12752
12753,2022-12-27,47.469799,12033800,47.049027,47.722939,58.454409,50.145358,43.952697,5.270743,6.160354e+07,6.197409e+07,1,-1,0,0,0,1,1,0,12753
12754,2022-12-28,47.163422,10053900,47.140211,47.730828,59.197325,50.085309,44.195113,5.723216,6.153865e+07,6.194406e+07,1,-1,0,0,0,1,1,0,12754
12755,2022-12-29,47.655483,8971300,47.239684,47.766364,59.868404,50.051893,44.427475,6.207321,6.163225e+07,6.192256e+07,1,-1,0,0,0,1,1,0,12755


In [17]:
##### main #####


#print (df_train)
#clean data

features = ['Adj Close','WMA'] #'RSI','VPT','Volume'     'WMA','VPT','RSI'

model_lr,scaler_lr = train_logistic_regression_model(df_train,features,"WMA Signal")
model_gbr,scaler_gbr = train_gradient_classifier_model(df_train,features,"WMA Signal")

#test on last 1 year data
df_test = new_df[(new_df['Date'] >= test_entry_date) & (new_df['Date'] <= test_exit_date)]
#print(df_filter)





#calculate_all_profit(ticker_symbol,df_filter)
df_test["Logistic Signal"] =  predict_signals(model_lr,scaler_lr,df_test,features)
df_test["Gradient WMA RSI Signal"] =  predict_signals(model_gbr,scaler_gbr,df_test,features)

    

ml_wam_profit, ml_wam_portfolio_value, ml_wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["Logistic Signal"])
print(f"Logistic Signal : Final Profit: ${ml_wam_profit:.2f}, {ml_wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"Logistic Signal",df_test)

ml_wam_profit, ml_wam_portfolio_value, ml_wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["Gradient WMA RSI Signal"])
print(f"Gradient WMA RSI Signal : Final Profit: ${ml_wam_profit:.2f}, {ml_wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"Gradient WMA RSI Signal",df_test)



wam_profit, wam_portfolio_value, wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["SMA Signal"])
print(f"SMA Signal : Final Profit: ${wam_profit:.2f}, {wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"SMA Signal",df_test)


wam_profit, wam_portfolio_value, wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["WMA RSI Signal"])
print(f"WMA RSI Signal : Final Profit: ${wam_profit:.2f}, {wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"WMA RSI Signal",df_test)

wam_profit, wam_portfolio_value, wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["WMA Signal"])
print(f"WMA Signal : Final Profit: ${wam_profit:.2f}, {wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"WMA Signal",df_test)

wam_profit, wam_portfolio_value, wam_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["RSI Signal"])
print(f"RSI Signal : Final Profit: ${wam_profit:.2f}, {wam_profit_percentage.round(2)}%")
plot(ticker_symbol,"RSI Signal",df_test)

vpt_profit, vpt_portfolio_value, vpt_profit_percentage = calculate_profit(df_test["Adj Close"],df_test["VPT Signal"])
print(f"VPT Signal : Final Profit: ${wam_profit:.2f}, {vpt_profit_percentage.round(2)}%")
plot(ticker_symbol,"VPT Signal",df_test)




Selected features (CHI): [0 1]
Selected features (CHI): Index(['Adj Close', 'WMA'], dtype='object')
   Train Accuracy  Test Accuracy  Precission   Recall      AUC
0         0.90135        0.90154     0.99925  0.77993  0.88973
1         0.90135        0.90154     0.99925  0.77993  0.88973
2         0.89282        0.89081     1.00000  0.75528  0.87764
3         0.87744        0.87484     1.00000  0.71948  0.85974
Best parameters: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 1000}
Mean Squared Error after tuning: 0.19900497512437812
Logistic Signal : Final Profit: $-3310.78, -33.11%


Gradient WMA RSI Signal : Final Profit: $-3607.88, -36.08%


SMA Signal : Final Profit: $-3039.49, -30.39%


WMA RSI Signal : Final Profit: $-2182.94, -21.83%


WMA Signal : Final Profit: $-3317.54, -33.18%


RSI Signal : Final Profit: $-3268.56, -32.69%


VPT Signal : Final Profit: $-3268.56, -20.63%


In [18]:
print(df_test['WMA RSI Signal'].value_counts())
#print(df_test['Logistic WMA RSI Signal'].value_counts())
print(df_test[df_test['VPT']==0])



WMA RSI Signal
 1    175
-1     46
 0     29
Name: count, dtype: int64
Empty DataFrame
Columns: [Date, Adj Close, Volume, SMA, WMA, RSI, BUB, BLB, PC, VPT, VPT_EMA, WMA Signal, SMA Signal, RSI Signal, PC Signal, BB Signal, VPT Signal, WMA RSI Signal, WMA RSI Signal Optimized, idx, Logistic Signal, Gradient WMA RSI Signal]
Index: []

[0 rows x 22 columns]
