In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
#!pip install yfinance
import yfinance as yf # https://pypi.org/project/yfinance/
import math
import random
import seaborn as sns
import datetime
import pandas as pd
#!pip install sklearn
from sklearn.linear_model import LinearRegression
from scipy import stats
import warnings
warnings.filterwarnings(action='ignore')
from bs4 import BeautifulSoup
import requests
import json
import time
import re

In [191]:
def get_realtime_info(stock_code):
    URL_link = "https://www.citifirst.com.hk/en/data/json/json_realtimedata/code/"+stock_code
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0'}
    soup = BeautifulSoup(requests.get(URL_link, headers=headers).content, 'html.parser')

    start_index = str(soup).find("{")
    end_index = str(soup).find("}")
    st = str(soup)[start_index:end_index+1]
    json_str = ""
    for ln in st.split(","):
        if "<" not in ln:
            json_str = json_str + ln + ","
    dic = json.loads(json_str[:-1])

    stock_info = {}
    stock_info['Open'] = dic['open']
    stock_info['High'] = dic['high']
    stock_info['Low'] = dic['low']
    stock_info['Close'] = dic['last']
    stock_info['previous_Close'] = dic['lastc']
    stock_info['turnover'] = dic['turnover']
    stock_info['date_time'] = dic['stimeNoformat']
    return stock_info

# get data by ticker-name, start-time & end-time
def get_df_data(ticker_name="AAPL", start_time="2022-01-01", end_time="2023-12-31", real_time=True):
    df_data = None
    df_data = yf.download(tickers=ticker_name, start=start_time, end=end_time) 

    if real_time:
        stock_info = get_realtime_info(ticker_name)
        open_price, high_price, low_price, current_price = stock_info['Open'], stock_info['High'], stock_info['Low'], stock_info['Close']
        try:
            df_data.at[df_data.index[-1], "Open"] = float(open_price)
            df_data.at[df_data.index[-1], "High"] = float(high_price)
            df_data.at[df_data.index[-1], "Low"] = float(low_price)
            df_data.at[df_data.index[-1], "Close"] = float(current_price)
        except:
            print(stock_info)
            df_data.at[df_data.index[-1], "Open"] = float(current_price)
            df_data.at[df_data.index[-1], "High"] = float(current_price)
            df_data.at[df_data.index[-1], "Low"] = float(current_price)
            df_data.at[df_data.index[-1], "Close"] = float(current_price)
    elif ".HK" in ticker_name:
        print("data may late for 15 minutes")
    # basic calculations such as: daily return, the log of Volume, Moving Average
    df_data['previous_Close'] = df_data['Close'].shift(1)
    df_data['daily_return'] = (df_data['Close']-df_data['previous_Close'])/df_data['previous_Close']
    df_data['Volume_log'] = np.log2(df_data['Volume'])
    MA1, MA2 = 5, 20
    df_data['MA1'] = df_data['Close'].rolling(MA1).mean()
    df_data['MA2'] = df_data['Close'].rolling(MA2).mean()
    return df_data

# in dataframe, search the info of certain previous rows by index
def search_info_by_index(df_data, tmp_idx, num=5):
    idx_list = df_data.index
    i = 0
    for it in idx_list:
        if it == tmp_idx:
            j = max(0, i-num)
            idx1, idx2 = idx_list[j], idx_list[i-1]
            return df_data.loc[idx1:idx2].copy()
        i += 1
    return None

# extract info from dataframe such as: rise / fall ratio, avg, std of Open, High, Low, Close, Volume
def get_info_from_df(sub_df):
    # rise / fall ratio
    rise_N, fall_N = len(sub_df[ sub_df['daily_return']>0 ]), len(sub_df[ sub_df['daily_return']<0 ])
    # avg, std of Open, High, Low, Close, Volume_log
    avg_open, std_open = sub_df['Open'].mean(), sub_df['Open'].std()
    avg_high, std_high = sub_df['High'].mean(), sub_df['High'].std()
    avg_low, std_low = sub_df['Low'].mean(), sub_df['Low'].std()
    avg_close, std_close = sub_df['Close'].mean(), sub_df['Close'].std()
    avg_volume, std_volume = sub_df['Volume_log'].mean(), sub_df['Volume_log'].std()
    return [rise_N, fall_N, avg_open, std_open, avg_high, std_high, avg_low, std_low, avg_close, std_close, avg_volume, std_volume]

# create features
def create_features(df_data, row_offset=20, num=5):
    df_data['rise_N'] = 0
    df_data['fall_N'] = 0
    df_data['avg_open'] = 0
    df_data['std_open'] = 0
    df_data['avg_high'] = 0
    df_data['std_high'] = 0
    df_data['avg_low'] = 0
    df_data['std_low'] = 0
    df_data['avg_close'] = 0
    df_data['std_close'] = 0
    df_data['avg_volume'] = 0
    df_data['std_volume'] = 0

    i = row_offset
    while i<len(df_data):
        cur_idx = df_data.index[i]
        #print(cur_idx)
        sub_df = search_info_by_index(df_data, cur_idx, num)
        it_list = get_info_from_df(sub_df)
        df_data.at[cur_idx, "rise_N"] = it_list[0]
        df_data.at[cur_idx, "fall_N"] = it_list[1]
        df_data.at[cur_idx, "avg_open"] = it_list[2]
        df_data.at[cur_idx, "std_open"] = it_list[3]
        df_data.at[cur_idx, "avg_high"] = it_list[4]
        df_data.at[cur_idx, "std_high"] = it_list[5]
        df_data.at[cur_idx, "avg_low"] = it_list[6]
        df_data.at[cur_idx, "std_low"] = it_list[7]
        df_data.at[cur_idx, "avg_close"] = it_list[8]
        df_data.at[cur_idx, "std_close"] = it_list[9]
        df_data.at[cur_idx, "avg_volume"] = it_list[10]
        df_data.at[cur_idx, "std_volume"] = it_list[11]        
        i += 1

    return df_data[row_offset:]

# LR model
from sklearn.linear_model import LinearRegression
def train_model(train_X, train_y, printing=True):
    model = LinearRegression().fit(train_X, train_y)

    r_sq = model.score(train_X, train_y)
    if printing:
        print(f"coefficient of determination: {r_sq}")
        print(f"intercept: {model.intercept_}\tslope: {model.coef_}")
    return model

def error_analyze(train_y, y_pred):
    df = pd.DataFrame(columns = ['y_real', 'y_pred'])
    df['y_real'] = train_y
    df['y_pred'] = y_pred
    df['dif'] = (df['y_real'] - df['y_pred'])/df['y_real'] * 100
    df['dif'] = df['dif'].abs()
    return df

def basic_info(df, col='dif'):
    print("max:\t", df[col].max())
    print("min:\t", df[col].min())
    print("median:\t", df[col].median())
    print("mean:\t", df[col].mean())
    print("std:\t", df[col].std())
    print("10%:\t", df[col].quantile(0.10))
    print("25%:\t", df[col].quantile(0.25))
    print("50%:\t", df[col].quantile(0.50))
    print("75%:\t", df[col].quantile(0.75))
    print("90%:\t", df[col].quantile(0.90))
    return

def predict_current_day(df_data, features, label="Close"): 
    df_data['next_Open'] = df_data['Open'].shift(-1)
    df_data['next_'+label] = df_data[label].shift(-1)
    # in case for some rows that 'Volume' that is 0
    model_data = df_data.copy()
    model_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    model_data = model_data.dropna()
    

    # training data
    X_data, y_data = [], []
    date_data = []
    i = max(0, len(model_data)-121)
    while i<len(model_data)-1:
        sub_x = list(model_data.iloc[i][:4]) + list(model_data.iloc[i][8:-1])
        sub_y = model_data.iloc[i][-1]
        X_data.append(sub_x)
        y_data.append(sub_y)
        #print( model_data.index[i] )
        date_data.append( model_data.index[i] )
        i += 1
    
    print("training data:", date_data[0], date_data[-1], sep="\t")

    X_test, y_test = [], []
    i = -1
    sub_x = list(model_data.iloc[i][:4]) + list(model_data.iloc[i][8:-1])
    sub_y = model_data.iloc[i][-1]
    X_test.append(sub_x)
    y_test.append(sub_y)
    test_date = model_data.index[i]
    print("testing data:", test_date, sep="\t")

    model = train_model(X_data, y_data)
    y_pred = model.predict(X_data) # error analysis
    error_df = error_analyze(y_data, y_pred)
    error_df['date-time'] = date_data
    

    test_y_pred = model.predict(X_test)[0] # predict current day
    test_y_real = y_test[0]
    #print( test_y_pred, test_y_real )
    return error_df, test_y_pred, test_y_real, test_date

def predict_next_day(df_data, features, label="Close"): 
    df_data['next_'+label] = df_data[label].shift(-1)
    # in case for some rows that 'Volume' that is 0
    model_data = df_data.copy()
    model_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    model_data = model_data.dropna()

    # training data
    X_data, y_data = [], []
    date_data = []
    i = max(0, len(model_data)-121)
    while i<len(model_data)-1:
        sub_x = list(model_data.iloc[i][:4]) + list(model_data.iloc[i][8:-1])
        sub_y = model_data.iloc[i][-1]
        X_data.append(sub_x)
        y_data.append(sub_y)
        #print( model_data.index[i] )
        date_data.append( model_data.index[i] )
        i += 1

    X_test, y_test = [], []
    i = -1
    sub_x = list(df_data.iloc[i][:4]) + list(df_data.iloc[i][8:-1])
    sub_y = df_data.iloc[i][-1]
    X_test.append(sub_x)
    y_test.append(sub_y)
    #print( df_data.index[i], X_test, y_test )

    model = train_model(X_data, y_data)
    y_pred = model.predict(X_data) # error analysis
    error_df = error_analyze(y_data, y_pred)
    error_df['date-time'] = date_data

    test_y_pred = model.predict(X_test)[0] # predict current day
    test_y_real = y_test[0]
    #print( test_y_pred, test_y_real )
    return error_df, test_y_pred, test_y_real

def business_dates():
    st, et = "2022-01-01", "2023-12-31"
    tmp_df = get_df_data('9988.HK', st, et, False)
    str_dates = []
    for d in tmp_df.index:
        str_dates.append( str(d)[:10] )
    return str_dates

def draw_pred_real01(pred_list, real_list, x_dates):    
    L = len(x_dates)
    plt.figure(figsize=(20, 6))
    plt.plot( list(range(0, L)), pred_list, label="pred" )
    plt.plot( list(range(0, L)), real_list, label="real" )
    
    #plt.scatter( real_list, pred_list )
    plt.xlabel('times')
    plt.ylabel('real / predicted-value')
    plt.title('predicted vs real')
    plt.xticks(list(range(0, L)), x_dates, rotation=90)
    plt.legend()
    plt.grid(True)
    plt.show()
    return

def draw_pred_real02(dif_list, x_dates):    
    L = len(x_dates)
    plt.figure(figsize=(20, 6))
    plt.bar( list(range(0, L)), dif_list, label="pred" )
    
    plt.xlabel('times')
    plt.ylabel('real-predicted dif %')
    plt.title('predicted vs real')
    plt.xticks(list(range(0, L)), x_dates, rotation=90)
    plt.legend()
    plt.grid(True)
    plt.show()
    return

In [344]:
features = ['Open', 'High', 'Low', 'Close', 'Volume_log', 'MA1', 'MA2']
features = features + [ 'rise_N', 'fall_N', 'avg_open', 'std_open', 'avg_high', 'std_high' ]
features = features + [ 'avg_low', 'std_low', 'avg_close', 'std_close', 'avg_volume', 'std_volume' ]
features = features + [ 'next_Open' ]
label = "Close"

st, et = "2022-01-01", "2023-08-31"

stocks_info = [
    #('HSI', '^HSI', 1),
    ('BABA', '9988.HK', 1),
    ('BIDU', '9888.HK', 1),
    ('JD', '9618.HK', 0.5 * 7.8),
    ('MPNGY', '3690.HK', 0.5 * 7.8),
    ('NTES', '9999.HK', 0.2 * 7.8),
    ('TENCENT', '0700.HK', 1*7.8),
    ('TME', '1698.HK', 1*7.8),
	('LI', '2015.HK', 0.5 * 7.8),
	('XPEV', '9868.HK', 0.5*7.8),	
	('BILI', '9626.HK', 1 * 7.8),
	('TCOM', '9961.HK', 1 * 7.8),
	('YUMC', '9987.HK', 1*7.8),
	('EDU', '9901.HK', 0.1*7.8), 
    ('NIO', '9866.HK', 1 * 7.8),
	('ZTO', '2057.HK', 1*7.8),
    ('BEKE', '2423.HK', 0.5*7.8),
    ('ZH', '2390.HK', 3 * 7.8), 
    ('WB', '9898.HK', 1*7.8),
    ('MNSO', '9896.HK', 0.5*7.8),
    ('ZLAB', '9688.HK', 0.5*7.8),    
    ('SMIC', '0981.HK', 1*7.8),
    ('SenseTime', '0020.HK', 1*7.8),
    ('Kuaishou', '1024.HK', 1*7.8),
    ('Xiaomi', '1810.HK', 1*7.8),
    ('CMB', '3968.HK', 1*7.8),
]

In [366]:
### current-day prediction

pred_feature = ['Close', 'High', 'Low']
pred_real_error_dic = {}
pred_data = []
for it in stocks_info[:]:
    each_stock_data = []

    stock_name, stock_code, _ = it
    test_df = get_df_data(stock_code, st, et, True)

    ## add info 
    each_stock_data.append( stock_name )
    each_stock_data.append( stock_code )

    cur_features = features
    for label in pred_feature:
        cur_label = label
        error_df, test_y_pred, test_y_real, test_date = predict_current_day( create_features(test_df) , cur_features, cur_label)
        #basic_info(error_df)
        med_err, mean_err = error_df['dif'].median(), error_df['dif'].mean()
        print( label, test_y_pred, test_y_real, med_err, mean_err )

        ## add info
        each_stock_data.append( test_y_pred )
        each_stock_data.append( med_err )
        each_stock_data.append( mean_err )

        new_name = stock_name + "_" + stock_code + "_" + label 
        pred_real_error_dic[ new_name ] = error_df
    
    pred_data.append( each_stock_data ) 

[*********************100%***********************]  1 of 1 completed
training data:	2023-02-21 00:00:00	2023-08-16 00:00:00
testing data:	2023-08-17 00:00:00
coefficient of determination: 0.9540098247837625
intercept: 16.829449235901166	slope: [ 0.10960183 -0.05913821  0.18780415 -0.30521888  0.14967003  0.48651777
 -0.03436562 -0.09438598  0.09438598  1.48476604  0.20676087  0.08703689
 -0.4476775  -1.52598738 -0.02156123 -0.38218353 -0.0706994  -0.72022784
 -0.23625727  0.91300466]
Close 87.97581460455976 89.25 0.9143617381578527 1.0761371814473852
training data:	2023-02-21 00:00:00	2023-08-16 00:00:00
testing data:	2023-08-17 00:00:00
coefficient of determination: 0.9707093973445913
intercept: 6.429986647454982	slope: [ 0.10712821 -0.09508775  0.08498599 -0.21780036  0.28978042  0.32733369
  0.01423553 -0.0174285   0.0174285   0.93638741 -0.10607268 -0.04602982
 -0.03086151 -1.23417697  0.12862968  0.15717683 -0.11730518 -0.55661264
 -0.43406662  0.97472651]
High 89.94601162736186 8

In [384]:
stock_data = []
for it in pred_data:
    ## predicted info
    stock_name, stock_code = it[0], it[1]
    close_pred, close_med_err, close_mean_err = it[2], it[3], it[4]
    high_pred, high_med_err, high_mean_err = it[5], it[6], it[7]
    low_pred, low_med_err, low_mean_err = it[8], it[9], it[10]

    ## real info
    stock_info = get_realtime_info(stock_code)
    close_real = stock_info['Close']
    high_real = stock_info['High']
    low_real = stock_info['Low']

    open_real = stock_info['Open']    
    prev_close = stock_info['previous_Close']
    turnover = stock_info['turnover']
    update_time = stock_info['date_time']
    
    new_name = stock_name + "_" + stock_code
    row_data = [
        new_name, prev_close, open_real, 
        close_pred, close_real, close_mean_err, 
        high_pred, high_real, high_mean_err,
        low_pred, low_real, low_mean_err, 
        turnover, update_time
    ]
    stock_data.append( row_data )

###
col_names = [
            "stock-name", "previous_close", "cur_open",            
            "close_pred", "close_real", "close_err_range%", #"cw_close_er2",
            "high_pred", "high_real", "high_err_range%", #"cw_high_er2",
            "low_pred", "low_real", "low_err_range%", #"cw_low_er2",
            "turnover", "update_time"
            ]
round_dic = {'close_pred': 2, 'high_pred': 2, 'low_pred': 2, 'close_err_range%': 2, 'high_err_range%':2, 'low_err_range%':2, 'daily_return%':2 }
stock_df = pd.DataFrame( stock_data, columns=col_names )
stock_df['previous_close'] = stock_df['previous_close'].astype(float)
stock_df['close_real'] = stock_df['close_real'].astype(float)
stock_df['price_change'] = stock_df['close_real'] - stock_df['previous_close']
stock_df['daily_return%'] = (stock_df['close_real'] - stock_df['previous_close'])/stock_df['previous_close']*100

show_cols = [
        "stock-name", "previous_close", "cur_open",  
        "close_pred", "close_real", "close_err_range%", "price_change", "daily_return%",
        "high_pred", "high_real", #"high_err_range%",
        "low_pred", "low_real", #"low_err_range%",
        "turnover", "update_time"
        ]


stock_df.round(round_dic)[ show_cols ] 

Unnamed: 0,stock-name,previous_close,cur_open,close_pred,close_real,close_err_range%,price_change,daily_return%,high_pred,high_real,low_pred,low_real,turnover,update_time
0,BABA_9988.HK,90.1,89.25,87.98,87.0,1.08,-3.1,-3.44,89.95,89.85,87.8,86.8,3.71B,"2023-08-18, 16:08"
1,BIDU_9888.HK,129.6,128.4,127.78,126.1,1.36,-3.5,-2.7,130.56,129.7,126.71,125.8,573.01M,"2023-08-18, 16:08"
2,JD_9618.HK,138.6,134.6,136.17,131.2,1.37,-7.4,-5.34,137.74,136.4,134.31,131.2,2.29B,"2023-08-18, 16:08"
3,MPNGY_3690.HK,133.6,131.8,132.49,129.9,1.55,-3.7,-2.77,134.85,133.8,130.57,129.4,2.31B,"2023-08-18, 16:08"
4,NTES_9999.HK,161.9,158.0,159.78,156.5,1.19,-5.4,-3.34,161.98,161.0,156.61,156.0,599.98M,"2023-08-18, 16:08"
5,TENCENT_0700.HK,332.8,329.4,327.86,325.0,1.15,-7.8,-2.34,333.98,332.8,324.64,325.0,5.94B,"2023-08-18, 16:08"
6,TME_1698.HK,26.0,26.55,25.79,26.55,1.35,0.55,2.12,26.01,27.2,25.33,26.3,3.09M,"2023-08-18, 16:08"
7,LI_2015.HK,164.0,163.9,162.76,155.4,1.67,-8.6,-5.24,166.67,165.2,160.21,153.8,1.45B,"2023-08-18, 16:08"
8,XPEV_9868.HK,65.3,62.85,63.82,61.0,2.79,-4.3,-6.58,63.98,63.45,62.61,60.65,1.08B,"2023-08-18, 16:08"
9,BILI_9626.HK,124.0,123.0,123.4,116.9,1.77,-7.1,-5.73,126.2,128.0,119.43,116.8,624.44M,"2023-08-18, 16:08"


In [199]:
### testing for current-day prediction
working_days = business_dates()
st, et = "2021-01-01", "2023-08-02"
stock_code = '9988.HK'

overall_evaluation = []
for tmp_et in working_days[-10:-1]:
    #print(tmp_et)
    
    test_df = get_df_data(stock_code, st, tmp_et, False, False)
    error_df, test_y_pred, test_y_real, test_date = predict_current_day( create_features(test_df) , cur_features, 'Close')
    err_med, err_mean = round(error_df['dif'].median(), 2), round(error_df['dif'].mean(), 2)
    test_y_pred = round(test_y_pred, 2)
    test_y_real = round(test_y_real, 2)
    err_real = round((test_y_pred-test_y_real)/test_y_real*100, 2)
    test_date = str(test_date)[:10]
    train_date1, train_date2 = str(error_df.iloc[0]['date-time'])[:10], str(error_df.iloc[-1]['date-time'])[:10]
    #print()
    row_info = [tmp_et, test_date, train_date1, train_date2, test_y_pred, test_y_real, err_real, err_med, err_mean]
    overall_evaluation.append( row_info )

col_names = ['data-date', 'test-date', 'train-date-1', 'train-date-2', 
    'pred-close', 'real-close', 'err-real',
    'err-range-med', 'err-range-mean'
    ]
evaluation_df = pd.DataFrame( overall_evaluation, columns=col_names )
evaluation_df['err-real-abs'] = evaluation_df['err-real'].abs()
basic_info(evaluation_df, 'err-real-abs')

[*********************100%***********************]  1 of 1 completed
data may late for 15 minutes
[*********************100%***********************]  1 of 1 completed
data may late for 15 minutes
training data:	2023-01-30 00:00:00	2023-07-25 00:00:00
testing data:	2023-07-26 00:00:00
coefficient of determination: 0.971019556357957
intercept: 10.757011930957063	slope: [ 0.32554505 -0.11418905 -0.14020005 -0.14019595  0.03271243  0.45029756
 -0.01733476 -0.04396522  0.04396522  1.26331258  0.4450555  -0.06543305
 -0.12073685 -1.11366652 -0.16682539 -0.40933418 -0.42680183 -0.29091646
 -0.21712374  0.90236493]
[*********************100%***********************]  1 of 1 completed
data may late for 15 minutes
training data:	2023-01-31 00:00:00	2023-07-26 00:00:00
testing data:	2023-07-27 00:00:00
coefficient of determination: 0.9704979820409654
intercept: 2.348670390846067	slope: [ 0.36402186 -0.04791785 -0.19243135 -0.21896421  0.14017298  0.4417885
 -0.02575031 -0.06959381  0.06959381  1.1

In [200]:
basic_info(evaluation_df, 'err-real-abs')
#draw_pred_real01(evaluation_df['pred-close'], evaluation_df['real-close'], evaluation_df['test-date'])
#draw_pred_real02(evaluation_df['err-real-abs'], evaluation_df['test-date'])
evaluation_df

max:	 3.6
min:	 0.04
median:	 1.17
mean:	 1.4777777777777776
std:	 1.111854057169575
10%:	 0.33599999999999997
25%:	 0.93
50%:	 1.17
75%:	 2.05
90%:	 2.7920000000000003


Unnamed: 0,data-date,test-date,train-date-1,train-date-2,pred-close,real-close,err-real,err-range-med,err-range-mean,err-real-abs
0,2023-07-28,2023-07-26,2023-01-30,2023-07-25,93.49,95.45,-2.05,0.93,1.11,2.05
1,2023-07-31,2023-07-27,2023-01-31,2023-07-26,92.45,95.9,-3.6,1.01,1.12,3.6
2,2023-08-01,2023-07-28,2023-02-01,2023-07-27,97.46,97.5,-0.04,0.99,1.14,0.04
3,2023-08-02,2023-07-31,2023-02-02,2023-07-28,99.32,97.85,1.5,1.01,1.12,1.5
4,2023-08-03,2023-08-01,2023-02-03,2023-07-31,97.61,95.15,2.59,0.98,1.13,2.59
5,2023-08-04,2023-08-02,2023-02-06,2023-08-01,94.02,93.15,0.93,0.93,1.14,0.93
6,2023-08-07,2023-08-03,2023-02-07,2023-08-02,95.54,95.15,0.41,0.91,1.14,0.41
7,2023-08-08,2023-08-04,2023-02-08,2023-08-03,94.48,95.6,-1.17,0.91,1.14,1.17
8,2023-08-09,2023-08-07,2023-02-09,2023-08-04,93.99,93.05,1.01,0.87,1.1,1.01


In [348]:
### next-day prediction

next_features = features[:-1]
st, et = "2022-01-01", "2023-08-31"
pred_next_data = []
for it in stocks_info[:]:
    #
    stock_name, stock_code, _ = it
    test_df = get_df_data(stock_code, st, et, True)
    today_info = test_df.iloc[-1]
    O, H, L, C = today_info['Open'], today_info['High'], today_info['Low'], today_info['Close']
    
    new_name = stock_name + "_" + stock_code
    each_stock_data = [new_name, O, H, L, C]

    for label in pred_feature:
        next_label = label
        error_df, test_y_pred, test_y_real = predict_next_day(create_features(test_df), next_features, next_label)
        med_err, mean_err = error_df['dif'].median(), error_df['dif'].mean()

        each_stock_data.append(test_y_pred)
        each_stock_data.append(mean_err)
        
    #
    pred_next_data.append( each_stock_data )

[*********************100%***********************]  1 of 1 completed
coefficient of determination: 0.8761908335989329
intercept: 59.757811140039635	slope: [-0.12842516 -0.23599171  0.25910433  0.5737618   0.24085555  1.58618481
 -0.09969378 -0.03740435  0.03740435  0.55339536 -0.09742034  2.12236644
  0.9806622  -3.19106331  0.01729063 -0.71005224 -1.17599809 -1.84921226
 -0.73393072]
coefficient of determination: 0.886096918728331
intercept: 51.82830384089219	slope: [-0.14434811 -0.31542484  0.18977516  0.72394521  0.36259775  1.5339072
 -0.03965709  0.03927365 -0.03927365 -0.11969253 -0.5486516   2.18169816
  1.49481109 -2.91896118  0.24434611 -0.306752   -1.29024638 -1.79382398
 -0.88299673]
coefficient of determination: 0.9051163133865731
intercept: 51.52644578369421	slope: [ 0.02018645 -0.37775834 -0.17727879  1.00022858  0.01326771  1.51510316
 -0.03640738 -0.00748314  0.00748314  0.01064369 -0.04896755  1.8679722
  1.20694505 -2.76931693  0.02420053 -0.28864453 -1.38463894 -1.46

In [349]:
col_names = [
            "stock-name", "Open", "High", "Low", "Close",
            "close_pred", "close_err_range%", #"cw_close_er2",
            "high_pred", "high_err_range%", #"cw_high_er2",
            "low_pred", "low_err_range%", #"cw_low_er2",
            ]
round_dic = {'close_pred': 2, 'high_pred': 2, 'low_pred': 2, 'close_err_range%': 2, 'high_err_range%':2, 'low_err_range%':2 }
stock_df = pd.DataFrame( pred_next_data, columns=col_names )
stock_df.round(round_dic) 

Unnamed: 0,stock-name,Open,High,Low,Close,close_pred,close_err_range%,high_pred,high_err_range%,low_pred,low_err_range%
0,BABA_9988.HK,86.6,90.35,86.2,90.05,88.14,1.82,90.11,1.73,88.23,1.51
1,BIDU_9888.HK,127.2,130.2,125.4,130.0,129.17,1.93,132.27,1.79,128.8,1.75
2,JD_9618.HK,135.4,139.4,134.1,138.2,140.77,2.05,142.67,1.82,139.51,1.74
3,MPNGY_3690.HK,129.7,133.2,129.1,132.5,132.01,1.9,135.41,1.6,131.24,1.49
4,NTES_9999.HK,157.9,162.0,156.5,161.4,160.59,1.63,163.4,1.32,158.53,1.43
5,TENCENT_0700.HK,320.6,335.8,320.6,333.2,330.69,1.41,337.34,1.21,329.69,1.15
6,TME_1698.HK,24.45,26.05,24.45,26.0,25.69,2.24,26.1,1.88,25.42,1.93
7,LI_2015.HK,157.8,166.8,157.1,164.5,162.21,2.28,166.57,1.91,161.41,1.87
8,XPEV_9868.HK,61.6,67.0,60.8,65.7,69.17,4.05,69.84,3.55,67.28,2.95
9,BILI_9626.HK,120.5,126.0,118.0,124.5,122.21,2.83,125.03,2.54,118.91,2.34
