In [2]:
import math
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import tensorflow
from datetime import date
from datetime import timedelta
from datetime import datetime
import yfinance as yf


In [3]:
#Global constants
date_format = "%Y-%m-%d"
tickers = pd.read_csv(r"hisse_list.csv")  # [change]
start_day = '2023-01-01'

today = datetime.now()
today = today.strftime(date_format)
today_date = datetime.strptime(today ,date_format)

tomorrow = datetime.now() + timedelta(1)
tomorrow_str = tomorrow.strftime(date_format)
tomorrow_date = datetime.strptime( tomorrow_str ,date_format  )


training_batch = 60
training_scale = 0.8
scaler = MinMaxScaler(feature_range=(0,1))

In [4]:
#Preprocess funcs
def control_tickers(tickers):
  tickers_filter = []
  for i in tickers['Hisse']:
      print(i)
      tdf = yf.download(i)
      if len(tdf) > 100:
        tickers_filter.append(i)
  return tickers_filter

def tickers_append_dates(tickers,desicion_dates):
  tickers['MinDate'] = desicion_dates
  return tickers


def fetch(ticker, column,st,ft):
    data = yf.download(ticker,  start=st,end=ft)
    inputs = np.array(list(data[f"{column}"].values))
    return data , inputs

def training_set(inputs,training_scale=training_scale,training_batch=training_batch):
    #Preprocessing
    training_scale_thershold = math.ceil(len(inputs) * training_scale)
    inputs_scaled = scaler.fit_transform(inputs.reshape(-1,1))
    #Operation
    train_data = inputs_scaled[0:training_scale_thershold,:]
    x_train = np.array([train_data[x-training_batch:x,0] for x in range(training_batch , len(train_data))])
    y_train = [train_data[x,0] for x in range(training_batch , len(train_data))]
    x_train = np.reshape(x_train , (x_train.shape[0] , x_train.shape[1],1))
    y_train = np.array(y_train)
    return x_train , y_train , inputs_scaled , training_scale_thershold

def LSTM_build(x_train,y_train,training_batch=training_batch):
    model = Sequential()
    model.add(LSTM(50 , return_sequences=True , input_shape=(training_batch , 1)))
    model.add(LSTM(50 ,  return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer = 'adam' , loss ='mean_squared_error')
    model.fit(x_train,y_train, batch_size = 1 ,epochs = 1)
    return model

def test_set(inputs_scaled ,training_scale_thershold,training_batch=training_batch):
    test_data = inputs_scaled[training_scale_thershold - training_batch: , :] # y_trainden kalan kısım
    x_test = [test_data[x-training_batch:x,0]  for x in range(training_batch , len(test_data))]
    y_test = inputs_scaled[training_scale_thershold:,:]
    x_test = np.array(x_test)
    y_test = np.array(y_test)
    print(x_test.shape)
    x_test = np.reshape(x_test , (x_test.shape[0] , x_test.shape[1],1))

    return x_test , y_test , test_data

def test_process(x_test , y_test , model):
    predictions_scaled = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions_scaled)
    test_set = scaler.inverse_transform(y_test)
    rmse = np.sqrt(np.mean(predictions - test_set)**2)
    test_df = pd.DataFrame()
    test_df["PRED"] = predictions[:,0]
    test_df["TEST"] = test_set[:,0]
    test_df["DIFF"] = test_df["PRED"] - test_df["TEST"]
    t_zero_predict = predictions[-1][0]
    return rmse, test_df ,predictions ,  t_zero_predict


def predict_set(inputs_scaled ,training_scale_thershold ,training_batch=training_batch):
    predict_data = inputs_scaled[training_scale_thershold - training_batch: , :] # y_trainden kalan kısım
    x_predict = [predict_data[x-training_batch:x,0]  for x in range(training_batch  , len(predict_data)+1)]
    x_predict = np.array(x_predict)
    x_predict  =  x_predict[1:]
    print(x_predict.shape)
    x_predict = np.reshape(x_predict , (x_predict.shape[0] , x_predict.shape[1],1))
    return x_predict


def predict_t_one(x_predict , model):
    t_one_predict = model.predict(x_predict)
    t_one_predict = scaler.inverse_transform(t_one_predict)
    t_one_predict = t_one_predict[-1][0]
    return t_one_predict

def full_LSTM_proc(ticker, column,st,ft):
    training_batch = 60
    training_scale = 0.8
    #Fetching
    data , inputs = fetch(ticker, column,st,ft)

    #Training
    x_train , y_train , inputs_scaled , training_scale_thershold = training_set(inputs)
    model = LSTM_build(x_train,y_train)

    #Testing
    x_test , y_test  , test_data = test_set(inputs_scaled , training_scale_thershold)
    rmse , test_df , predictions ,  t_zero_predict = test_process(x_test , y_test , model)
    #Predicting
    x_predict = predict_set(inputs_scaled , training_scale_thershold)
    t_one_predict = predict_t_one(x_predict , model)
    return test_df , rmse , t_one_predict , t_zero_predict , data , inputs

def get_financial_metrics(data , inputs ,column , st , ft):
    xu100_data , xu100_inputs = fetch('XU100.IS', column ,st,ft)
    curr_data , curr_inputs = fetch('USDTRY=X', column ,st,ft)

    average_price = inputs.mean()
    average_risk = inputs.std()

    corr_df = pd.merge(data , xu100_data , how='left',on='Date')
    beta = corr_df.corr()[f'{column}_x'][f'{column}_y']
    xu100 = float(xu100_data.Close.values[-1])

    curr = pd.DataFrame()
    curr["real"] = data["Open"] / curr_data["Open"]
    real_price = list(curr.real)[-1]
    dolar_try = float(curr_data.Close.values[-1])
    return average_price , average_risk , beta , real_price , dolar_try , xu100




In [5]:
filtered_tickers = control_tickers(tickers)

AGHOL.IS
[*********************100%***********************]  1 of 1 completed
YYLGD.IS
[*********************100%***********************]  1 of 1 completed
YKBNK.IS
[*********************100%***********************]  1 of 1 completed
YATAS.IS
[*********************100%***********************]  1 of 1 completed
VESTL.IS
[*********************100%***********************]  1 of 1 completed
VESBE.IS
[*********************100%***********************]  1 of 1 completed
VAKBN.IS
[*********************100%***********************]  1 of 1 completed
ULKER.IS
[*********************100%***********************]  1 of 1 completed
TURSG.IS
[*********************100%***********************]  1 of 1 completed
TUPRS.IS
[*********************100%***********************]  1 of 1 completed
TUKAS.IS
[*********************100%***********************]  1 of 1 completed
TTRAK.IS
[*********************100%***********************]  1 of 1 completed
TTKOM.IS
[*********************100%***********************]  1 o

In [6]:
column = 'Close'
rmse_li = []
today_real_li = []
t_one_predict_li = []
t_zero_predict_li = []
average_price_li = []
average_risk_li = []
beta_li = []
real_price_li = []
dolar_try_li = []
xu100_li = []

for i in filtered_tickers:
  test_df , rmse , t_one_predict , t_zero_predict ,data , inputs = full_LSTM_proc(i,column , start_day,today)
  average_price , average_risk , beta , real_price , dolar_try , xu100 = get_financial_metrics(data , inputs ,column , start_day , today)
  rmse_li.append(rmse)
  today_real_li.append(list(test_df["TEST"])[-1])
  t_zero_predict_li.append(t_zero_predict)
  t_one_predict_li.append(t_one_predict)
  average_price_li.append(average_price)
  average_risk_li.append(average_risk)
  beta_li.append(beta)
  real_price_li.append(real_price)
  dolar_try_li.append(dolar_try)
  xu100_li.append(xu100)


[*********************100%***********************]  1 of 1 completed



(50, 60)
(50, 60)
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
(50, 60)
(50, 60)
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
(50, 60)
(50, 60)
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
(50, 60)
(50, 60)
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
(50, 60)
(50, 60)
[*********

In [None]:
summary = pd.DataFrame()
summary["TICKERS"] = filtered_tickers
summary["HATA"] = rmse_li
summary["TODAY_REAL"] = today_real_li
summary["TODAY_PRED"] = t_zero_predict_li
summary["TOMORROW_PRED"] = t_one_predict_li
summary["TO_DAY"] = [today_date  for x in range(len(filtered_tickers))]
summary["TOMORROW_DAY"] = [tomorrow_date  for x in range(len(filtered_tickers))]
summary["AVERAGE_PRICE"] = average_price_li
summary["RISK"] = average_risk_li
summary["BETA"] = beta_li
summary["REAL_PRICE"] = real_price_li
summary["DOLAR_TRY"] = dolar_try_li
summary["BIST_INDEX"] = xu100_li

In [None]:
summary.to_excel(f'summary{today}.xlsx')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>