In [1]:
# find model between reference-stock & target-stock
    # get the details of the model
        # model parameters
        # error ranges <- analysis

# make algo-trading strategy based on the model
    # what price to buy 
        # if the low-price of stock < predicted buy price, -> buy
    # what price to sell
        # if profit > 2%, -> sell
        # loss >= 3%, -> sell

# back-testing of the algo-trading strategy

In [48]:
import numpy as np 
import matplotlib.pyplot as plt 
#!pip install yfinance
import yfinance as yf # https://pypi.org/project/yfinance/
import math
import random
import seaborn as sns
import datetime
import pandas as pd
from scipy import stats # python -m pip install scipy
import warnings
warnings.filterwarnings(action='ignore')

# get data by ticker-name, start-time & end-time
def get_df_data(ticker_name="AAPL", start_time="2022-01-01", end_time="2022-10-09"):
    df_data = yf.download(tickers=ticker_name, start=start_time, end=end_time) 
    df_data = df_data[ ["Open", "High", "Low", "Close", "Volume"] ]
    df_data['previous_Close'] = df_data['Close'].shift(1)
    df_data['daily_return'] = (df_data['Close']-df_data['previous_Close'])/df_data['previous_Close']
    MA1, MA2 = 5, 20
    df_data['MA1'] = df_data['Close'].rolling(MA1).mean()
    df_data['MA2'] = df_data['Close'].rolling(MA2).mean()
    return df_data

def merge_stocks(df_data1, df_data2):
    data = []
    i = 0
    while i<len(df_data1):
        target_time = df_data1.index[i]

        j = 0
        while j<len(df_data2):
            ref_time = df_data2.index[j]
            if ref_time>=target_time:
                break
            j += 1
        ref_time = df_data2.index[j-1]
        if ref_time<target_time:
            it1 = df_data1.iloc[i]   
            open1, high1, low1, close1 = it1['Open'], it1['High'], it1['Low'], it1['Close']
            daily_return1 = it1['daily_return']
            volume1 = it1['Volume']
            target_MA1, target_MA2 = it1['MA1'], it1['MA2']
            it2 = df_data2.iloc[j-1]
            open2, high2, low2, close2 = it2['Open'], it2['High'], it2['Low'], it2['Close']
            daily_return2 = it2['daily_return']
            volume2 = it2['Volume']
            ref_MA1, ref_MA2 = it2['MA1'], it2['MA2']

            tmp_list = [ target_time, open1, high1, low1, close1, ref_time, open2, high2, low2, close2 ]
            data.append( tmp_list )
            #print( target_time, ref_time )
        i += 1
    col_names = ['target_time', 'target_open', 'target_high', 'target_low', 'target_close',
                'ref_time', 'ref_open', 'ref_high', 'ref_low', 'ref_close']
    df = pd.DataFrame(data, columns = col_names)
    return df

def LR(train_data, label_name, min_num=20):
    x = train_data["ref_"+label_name]
    y = train_data["target_"+label_name]
    if len(x)<min_num:
        return 1, 0, 0
    k, b, R, p, std_err = stats.linregress(list(x), list(y)) # R*R -> R2
    #print( '\tlinear model: y = ', round(k, 4), '* x + ', round(b, 4), "\t R2:", round(R*R, 4), "\t std error:", round(std_err, 3) )
    return k, b, R*R

def daily_models(merged_data, label_name, min_num):
    data = []
    k_list, b_list, R2_list = [], [], []
    i = 0
    while i<len(merged_data):
        train_data = merged_data.iloc[ max(0, i-100):i ].copy()
        k, b, R2 = LR(train_data, label_name, min_num)
        k_list.append( k )
        b_list.append( b )
        R2_list.append( R2 )
        if len(train_data)>0:            
            it = merged_data.iloc[i]
            target_time = it['target_time']
            train_data_it1, train_data_it2 = train_data.iloc[0], train_data.iloc[-1]
            target_from_time,target_to_time = train_data_it1['target_time'], train_data_it2['target_time']
            ref_from_time,ref_to_time = train_data_it1['ref_time'], train_data_it2['ref_time']
            #print( target_time, "\t", target_time1, ref_time1, "\t", target_time2, ref_time2 )
            data.append( [target_time, target_from_time, target_to_time, ref_from_time, ref_to_time, k, b, R2] )
        i += 1
    #
    col_names = ['target_time', 'target_from_time', 'target_to_time', 'ref_from_time', 'ref_to_time',
                'k_'+label_name, 'b_'+label_name, 'R2'+label_name]
    df = pd.DataFrame(data, columns = col_names)
    return df[min_num:]

In [49]:
stocks_info = [
    ('BABA', '9988.HK', '2022-11-01', 1),
    ('BIDU', '9888.HK', '2022-10-25', 1)
]

total_business_days = 0
gain_rate, loss_rate = 1.3, 9
trade_info_col = {}
for it in stocks_info[:1]:
        # get market data of target-stock & reference-stock
    reference_stock, target_stock, st, ratio = it
    st, et = "2021-01-01", "2023-02-28"
    print("time range:\t", st, "-", et)
    df_data1 = get_df_data(ticker_name=target_stock, start_time=st, end_time=et)
    print("target stock:\t", target_stock, "\t", len(df_data1))
    df_data2 = get_df_data(ticker_name=reference_stock, start_time=st, end_time=et)
    print("reference stock:\t", reference_stock, "\t", len(df_data2))
        # merge reference-stock & target-stock: target stock (datetime, open, high, low, close), 1-day previous reference stock (datetime, open, high, low, close) 
    merged_data = merge_stocks(df_data1, df_data2)
    for label_name in ["open", "high", "low", "close"]:
        model_df = daily_models(merged_data, label_name, 30)

model_df

time range:	 2021-01-01 - 2023-02-28
[*********************100%***********************]  1 of 1 completed
target stock:	 9988.HK 	 519
[*********************100%***********************]  1 of 1 completed
reference stock:	 BABA 	 530


Unnamed: 0,target_time,target_from_time,target_to_time,ref_from_time,ref_to_time,k_close,b_close,R2close
30,2021-02-19,2021-01-05,2021-02-18,2021-01-04,2021-02-17,0.950976,7.601944,0.871530
31,2021-02-22,2021-01-05,2021-02-19,2021-01-04,2021-02-18,0.947533,8.406542,0.872314
32,2021-02-23,2021-01-05,2021-02-22,2021-01-04,2021-02-19,0.936867,10.880830,0.864895
33,2021-02-24,2021-01-05,2021-02-23,2021-01-04,2021-02-22,0.936778,10.866691,0.864698
34,2021-02-25,2021-01-05,2021-02-24,2021-01-04,2021-02-23,0.937262,10.605881,0.862140
...,...,...,...,...,...,...,...,...
512,2023-02-06,2022-09-07,2023-02-03,2022-09-06,2023-02-02,0.964715,1.381223,0.973730
513,2023-02-07,2022-09-08,2023-02-06,2022-09-07,2023-02-03,0.964037,1.425523,0.974107
514,2023-02-08,2022-09-09,2023-02-07,2022-09-08,2023-02-06,0.965606,1.322087,0.974524
515,2023-02-09,2022-09-13,2023-02-08,2022-09-12,2023-02-07,0.965629,1.302829,0.975039
