<h1> SMPP - STOCK MARKET PRICE PREDICTION</h1>
<p>Maded by: Murat EFE<br/>
<a href='https://www.linkedin.com/in/murratefe'>My Linkedin</a>
</p>

<h1 style='color:red;'>IMPORTANT NOTICE!</h1><br/>
<p>This model is highly experimantal and too risky for use real world investment <b>do not use this model any real investment.<b/><br/>
<b>We are not responsible for any outcomes that may arise from the use of this model for any real investment purposes.</b></p>


In [None]:
!pip install yfinance
!pip install pandas
!pip install pandas-ta
!pip install numpy
!pip install matplotlib
!pip install scikit-learn
!pip install shap
!pip install cloudpickle

Importing libraries

In [None]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error
import shap
import pickle
import cloudpickle
from datetime import datetime
import os

Let's Build a Data Pipeline


In [None]:
class aug_data(BaseEstimator,TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        """
        You need at least 50+1 days of data to be able to add these indicators., meaning you need 50+1 days of data for the prediction you are going to make.
        """
        X["Date"] = pd.to_datetime(X["Date"] ,format='%d/%m/%Y %H:%M:%S')
    
        X['Support'] = X['Low'].rolling(window=7).min()
        X['Resistance'] = X['High'].rolling(window=7).max()
        X["SMA7"] = X["Close"].rolling(window=7).mean()
        X["SMA9"] = X["Close"].rolling(window=9).mean()
        X["SMA12"] = X["Close"].rolling(window=12).mean()
        X["SMA21"] = X["Close"].rolling(window=21).mean()
        X["SMA24"] = X["Close"].rolling(window=24).mean()
        X["SMA26"] = X["Close"].rolling(window=26).mean()
        X["SMA50"] = X["Close"].rolling(window=50).mean()
    
        X["Daily Change"] = X["Close"].pct_change() * 100
    
        weights_9 = list(range(1,10))    
        weights_12 = list(range(1,13))  
    
        X["WMA9"] = X["Close"].rolling(window=9).apply(lambda prices: sum(prices * weights_9) / sum(weights_9), raw=True)
        X["WMA12"] = X["Close"].rolling(window=12).apply(lambda prices: sum(prices * weights_12) / sum(weights_12), raw=True)
    
        X["EMA7"] = X["Close"].ewm(span=7, adjust=True).mean()
        X["EMA9"] = X["Close"].ewm(span=9, adjust=True).mean()
        X["EMA12"] = X["Close"].ewm(span=12, adjust=True).mean()
        X["EMA21"] = X["Close"].ewm(span=21, adjust=True).mean()
        X["EMA26"] = X["Close"].ewm(span=26, adjust=True).mean()
    
        X["MACD"] = X["EMA12"] - X["EMA26"]
        X["MACD Signal"] = X["MACD"].ewm(span=9, adjust=True).mean()
        X["RSI"] = ta.rsi(X["Close"], length=14)
        bbands_data = ta.bbands(X["Close"], length=20)
        X["Bollinger Bands Upper"] = bbands_data["BBU_20_2.0"]
        X["Bollinger Bands Lower"] = bbands_data["BBL_20_2.0"]
        X["Bollinger Bands Middle"] = bbands_data["BBM_20_2.0"]
        return X

In [None]:
class drop_unnecessary_col(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        return X.drop(columns=['Date'])

In [None]:
class set_label(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        X["Target Close"] = X["Close"].shift(-1) #The Target Close is the closing price of the stock on the following day, n+1 day.
        return X

In [None]:
class drop_null(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        return X.dropna()

In [None]:
class set_dtypes(BaseEstimator,TransformerMixin):
    def fit(self,X, y=None):
        return self
    def transform(self, X):
        return X.astype(np.float32)

In [None]:
class scaleData(BaseEstimator,TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self,X):
        scaler = MinMaxScaler(feature_range=(0,1))
        if 'Target Close' in X:
            X_scaled = scaler.fit_transform(X.drop(columns='Target Close'))
            X_scaled = pd.DataFrame(X_scaled, columns=X.drop(columns='Target Close').columns)
            X_scaled['Target Close'] = X['Target Close']
        else:
            X_scaled = scaler.fit_transform(X)
            X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
        return X_scaled

In [None]:
#Building a pipeline for training
pipe = Pipeline([
    ('aug', aug_data()),
    ('drop_unnecessary', drop_unnecessary_col()),
    ('set_label',set_label()),
    ('set_dtype', set_dtypes()),
    ('scaler', scaleData()),
    ('dropper', drop_null()),
])

In [None]:
#Building a pipeline for prediction
pipe_for_pred = Pipeline([
    ('aug', aug_data()),
    ('drop_unnecessary', drop_unnecessary_col()),
    ('set_dtype', set_dtypes()),
    ('scaler', scaleData()),
    ('dropper', drop_null()),
    ])

Let's Create a dataset for APPL stock

In [None]:
symbol = 'AAPL' #Please enter the ticker symbol of the stock you want to train on."
end_date = '2024-05-30' #Please enter the last date you want the data to be retrieved. Only use the YYYY-MM-DD format.

In [None]:
dataset = yf.download(symbol=symbol, end=end_date, period='max')
dataset.reset_index(inplace=True)
dataset = pipe.fit_transform(dataset)

Let's split the data

In [None]:
test = dataset.iloc[-100:] #Last 100 data split for validation
train = dataset.iloc[:-100] 

In [None]:
#Train Data
X = train.drop(columns='Target Close')
y = train['Target Close']

In [None]:
#Test Data
X_test = test.drop(columns='Target Close')
y_test = test['Target Close']

Building a model with hyperparameter search

In [None]:
alpha = []
cv_score = []
model_score = []
mae_score = []
preds = {}
for a in np.arange(0.01,100.00,0.01):
    model = Ridge(alpha=a, random_state=42)
    model.fit(X,y)
    score = model.score(X_test,y_test)
    cv = cross_val_score(model, X_test,y_test, cv=5, scoring="neg_mean_squared_error").mean()
    model_score.append(score)
    cv_score.append(cv)
    alpha.append(a)
    pred = model.predict(X_test)
    preds[a] = pred
    mae = mean_absolute_error(y_test, pred)
    mae_score.append(mae)

In [None]:
print(f'The Best Alpha Number: {alpha[cv_score.index(max(cv_score))]}\n cv score: {max(cv_score)}\n model score: {model_score[cv_score.index(max(cv_score))]}\n MAE: {mae_score[cv_score.index(max(cv_score))]}')

Build a model with best hyperparameters

In [None]:
best_alpha = alpha[cv_score.index(max(cv_score))]
best_model = Ridge(random_state=42, alpha=best_alpha)
best_model.fit(X,y)
print(model.score(X_test,y_test))
print(cross_val_score(best_model, X_test,y_test, cv=5, scoring='neg_mean_squared_error').mean())

Saving Our Model

In [None]:
now = datetime.now().strftime("%Y_%m_%d_%H_%M")
if not os.path.exists(f'models/{now}'):
    os.makedirs(f'models/{now}')

In [None]:
pickle.dump(best_model, open(f'models/{now}/model.pkl','wb'))
cloudpickle.dump(pipe, open(f'models/{now}/pipeline_for_training.pkl','wb'))
cloudpickle.dump(pipe_for_pred, open(f'models/{now}/pipeline_for_predict.pkl','wb'))