In [2]:
# !pip install pyecharts
# !pip install selenium
import numpy as np 
import matplotlib.pyplot as plt 
#!pip install yfinance
import yfinance as yf # https://pypi.org/project/yfinance/
import math
import random
import seaborn as sns
import datetime
import pandas as pd
from pyecharts.charts import Bar, Line
from pyecharts import options as opts

In [95]:
# get data by ticker-name, start-time & end-time
def get_df_data(ticker_name="AAPL", start_time="2022-01-01", end_time="2022-10-09"):
  df_data = yf.download(tickers=ticker_name, start=start_time, end=end_time) 
  return df_data

def draw_2lines(x, y1, tn1, y2, tn2, ls1=50, le1=160, ls2=50, le2=160):
    data_1 = y1 #np.sin(x)
    data_2 = y2 #np.cos(x)
    # Create Plot
    fig, ax1 = plt.subplots(figsize=(25, 10)) 
    ax1.set_xlabel('Time range') 
    ax1.set_ylabel(tn1, color = 'red') 
    ax1.plot(x, data_1, color = 'red', label=tn1) 
    ax1.tick_params(axis ='y', labelcolor = 'red') 
    ax1.set_ylim(ls1, le1)
    ax2 = ax1.twinx() 
    ax2.set_ylabel(tn2, color = 'blue') 
    ax2.plot(x, data_2, color = 'blue', label=tn2) 
    ax2.tick_params(axis ='y', labelcolor = 'blue') 
    ax2.set_ylim(ls2, le2)
    # show the graph
    plt.legend()
    plt.show()
    return 

"""
# data visualization
ls1=ls2=60 
le1=le2=100

y1, y2 = y1_open, y2_open
draw_2lines(x, y1, tn1, y2, tn2, ls1, le1, ls2, le2)
"""

def time_point_dic(time_list, point_close):
    dic = {}
    i = 0
    while i<len(time_list):
        dic[time_list[i]] = point_close[i]
        i += 1
    return dic

# 
def data_union(dic1, dic2):
    # add in complement time values
    time1 = list(dic1.keys())
    time2 = list(dic2.keys())
    time_union = sorted(list(set(time1+time2)))
    for t in time_union:
        if t not in dic1:
            dic1[t] = -1
        if t not in dic2:
            dic2[t] = -1
    return time_union

def bin_analysis(point_list, low_level=0.0, high_level=3.0, level_width=0.1):
    # low_level, high_level, level_width, 
    k_num = round((high_level-low_level)/level_width) + 1
    N = len(point_list)
    fre_dic = {}
    for k in range(k_num):
        fre_dic[round(k*level_width, 1)] = 0

    for v in point_list:
        k = round(v/level_width) * level_width
        if k>high_level:
            k = high_level
        k = round(k, 1)
        fre_dic[k] += 1

    cumulative = 0
    for k in range(k_num):
        k = round(k*level_width, 1)
        cumulative = cumulative + fre_dic[k]
        r = round(cumulative/N*100, 2)
        print( k, "\t", cumulative, "\t", r, "%" )
    return 

def find_previous_businessday(i, x, y):
    counter = i
    t = x[counter]
    p = y[counter]
    while p<0 and counter>0:
        t = x[counter]
        p = y[counter]
        counter = counter - 1
    #if counter!=i:
        #print( "\t reference stock in holiday \t", x[i], y[i], " -> ", t, p )
    return t, p

def find_next_businessday(i, j, x, y):
    counter = i
    point_list = []
    while counter<=j and counter<len(x):
        point_list.append( y[counter] )
        counter += 1
    if len(point_list)==0:
        return 0
    return max(point_list)

# 
def back_test(signal, in_days, x, 
              y1_open, y1_close, y1_low, y1_high, 
              y2_open, y2_close, y2_low, y2_high):
    other_info = []
    profit_list = []
    for i in range(0, len(x)-1):
            # reference stock info
        reference_t, reference_p = find_previous_businessday(i, x, y1_close)
            # target stock info
        target_t = x[i+1]
        target_open = y2_open[i+1]
        target_close = y2_close[i+1]
        target_low = y2_low[i+1]
        target_high = y2_high[i+1]
        if target_open<0:
            #print( target_t, "\t target stock in holiday" )
            continue
        #print( target_t, target_open, target_close, target_low, target_high, "\t", reference_t, reference_p )
            # possible profit
        if target_low<reference_p-signal:
            buy_point = min(target_open, reference_p-2)
            j = i+in_days
            sell_point = max( target_close, find_next_businessday(i+2, j, x, y2_high) )
            profit = round( sell_point-buy_point, 2 )
            profit_list.append( profit )
            other_info.append( [reference_t, reference_p, target_t, target_open, target_close, target_low, target_high, buy_point, sell_point, profit] )
    return profit_list, other_info

def get_list(from_num=1.5, to_num=3.0, step_num=0.1):
    tmp_list = []
    num = from_num
    while num<=to_num:
        num = round(num, 1)
        tmp_list.append( num )
        num += step_num
    return tmp_list

def profit_analysis(profit_list, from_num=1.0, to_num=3.0, step_num=0.1):
    info = []
    N = len(profit_list)
    for profit_per_share in get_list(from_num, to_num, step_num):
        counter = 0
        for v in profit_list:
            if v >= profit_per_share:
                counter += 1
        r = round(counter / N * 100, 2)
        info.append( (profit_per_share, N, counter, r) )
    return info

#
# data_info, 
    # signal
        # in_days
            # profit_per_share, ratio
def data_by_signal(data_info, ll=2.0, hl=11):
    data, col_names = [], []
    for signal in range(hl):
        signal = ll + 0.1*signal
        signal = round(signal, 1)
        #
        info = data_info[signal]        
        for days in sorted( list(info.keys()) ):
            it = info[days]
            N = len(it)
            tmp_n = ["signal", "days"]
            tmp_v = [signal, days]   
            i = 0
            while i<N:
                tmp_n.append( "profit_" + str(it[i][0]) )
                tmp_v.append( it[i][1] )            
                i = i + 1
            data.append(tmp_v)
            col_names.append( tmp_n )
    col_names = col_names[0]
    #print(col_names)
    df = pd.DataFrame(data, columns=col_names)
    return df



In [244]:
st, et = "2022-01-04", "2023-01-31"
print("time range:\t", st, "-", et)

tn1 = "BABA" # BABA, BIDU
df_data1 = get_df_data(ticker_name=tn1, start_time=st, end_time=et)
df_data1 = df_data1[ ["Open", "High", "Low", "Close", "Volume"] ]
print("stock:\t", tn1, "\t")
print(df_data1)

tn2 = "9988.HK" # 9988.HK, 9888.HK
df_data2 = get_df_data(ticker_name=tn2, start_time=st, end_time=et)
df_data2 = df_data2[ ["Open", "High", "Low", "Close", "Volume"] ]
print("stock:\t", tn2, "\t")
print(df_data2)

# dict-type: key -> datetime, value -> point
dic1_open = time_point_dic(list(df_data1.index), list(df_data1['Open']))
dic1_close = time_point_dic(list(df_data1.index), list(df_data1['Close']))
dic1_low = time_point_dic(list(df_data1.index), list(df_data1['Low']))
dic1_high = time_point_dic(list(df_data1.index), list(df_data1['High']))

dic2_open = time_point_dic(list(df_data2.index), list(df_data2['Open']))
dic2_close = time_point_dic(list(df_data2.index), list(df_data2['Close']))
dic2_low = time_point_dic(list(df_data2.index), list(df_data2['Low']))
dic2_high = time_point_dic(list(df_data2.index), list(df_data2['High']))
print( "2 stocks:\t")
print( "\t", len(dic1_open), len(dic1_close), len(dic1_low), len(dic1_high) )
print( "\t", len(dic2_open), len(dic2_close), len(dic2_low), len(dic2_high) )

# data union, stock 1 may have some business-day when stock 2 in holiday, and vice versa
x1 = data_union(dic1_open, dic2_open)
x2 = data_union(dic1_close, dic2_close)
x3 = data_union(dic1_low, dic2_low)
x4 = data_union(dic1_high, dic2_high)
assert len(x1)==len(x2)==len(x3)==len(x4)
print( "2 stocks:\t")
print( "\t", len(dic1_open), len(dic1_close), len(dic1_low), len(dic1_high) )
print( "\t", len(dic2_open), len(dic2_close), len(dic2_low), len(dic2_high) )

# split dict-type data into x & y list-type following the datetime order
x = x1
y1_open, y1_close, y1_low, y1_high = [], [], [], []
y2_open, y2_close, y2_low, y2_high = [], [], [], []
for t in x:
    y1_open.append( round(dic1_open[t], 2) )
    y1_close.append( round(dic1_close[t], 2) )
    y1_low.append( round(dic1_low[t], 2) )
    y1_high.append( round(dic1_high[t], 2) )
    #
    y2_open.append( round(dic2_open[t], 2) )
    y2_close.append( round(dic2_close[t], 2) )
    y2_low.append( round(dic2_low[t], 2) )
    y2_high.append( round(dic2_high[t], 2) )
print( "2 stocks:\t", len(x) )
print( "\t", len(y1_open), len(y1_close), len(y1_low), len(y1_high) )
print( "\t", len(y2_open), len(y2_close), len(y2_low), len(y2_high) )

time range:	 2022-01-04 - 2023-01-31
[*********************100%***********************]  1 of 1 completed
stock:	 BABA 	
                  Open        High         Low       Close    Volume
Date                                                                
2022-01-04  119.529999  120.870003  115.769997  119.559998  20824000
2022-01-05  118.000000  126.620003  117.699997  121.160004  36651100
2022-01-06  124.260002  128.399994  123.464996  126.629997  32045800
2022-01-07  130.240005  133.880005  128.179993  129.809998  38113000
2022-01-10  131.990005  132.600006  126.230003  128.300003  23372200
...                ...         ...         ...         ...       ...
2022-12-27   86.550003   90.689003   86.209999   89.860001  22959900
2022-12-28   88.980003   90.150002   86.464996   87.199997  16268800
2022-12-29   87.625000   89.519997   87.059998   89.129997  12535400
2022-12-30   87.449997   89.410004   87.410004   88.089996  11906200
2023-01-03   91.110001   94.449997   90.750000   91

In [245]:
# data visualization: file:///C:/Users/Admin/lines.html
def process(y):
    tmp = []
    for v in y:
        tmp.append(v)
    #
    counter = 0
    while counter < len(y)-1:
        cv, nv = tmp[counter], tmp[counter+1]
        if nv<0:
            tmp[counter+1] = cv
        counter += 1
    return tmp

line = (
    Line(init_opts=opts.InitOpts(width="2500px", height="1200px"))
    .add_xaxis(x)
    .add_yaxis(tn1+"_open", process(y1_open))
    .add_yaxis(tn1+"_close", process(y1_close))
    .add_yaxis(tn1+"_low", process(y1_low))
    .add_yaxis(tn1+"_high", process(y1_high))
    .add_yaxis(tn2+"_open", process(y2_open))
    .add_yaxis(tn2+"_close", process(y2_close))
    .add_yaxis(tn2+"_low", process(y2_low))
    .add_yaxis(tn2+"_high", process(y2_high))
    .set_global_opts(title_opts=opts.TitleOpts(title="stocks comparison"), 
                     tooltip_opts=opts.TooltipOpts(trigger="axis"),
                    yaxis_opts=opts.AxisOpts(name='dollar',splitline_opts=opts.SplitLineOpts(is_show=True),min_=60))# , subtitle="商店A中六樣商品數"
    
)
line.render("lines.html")

'C:\\Users\\Admin\\lines.html'

In [246]:
# data visualization: file:///C:/Users/Admin/lines_update.html
x_update = []
y1_close_update = []
y2_open_update, y2_close_update, y2_low_update, y2_high_update = [], [], [], []
for i in range(0, len(x)-1):
        # reference stock info
    reference_t, reference_p = find_previous_businessday(i, x, y1_close)
        # target stock info
    target_t = x[i+1]
    target_open = y2_open[i+1]
    target_close = y2_close[i+1]
    target_low = y2_low[i+1]
    target_high = y2_high[i+1]
    if target_open<0:
        #print( target_t, "\t target stock in holiday" )
        continue
    #print( target_t, target_open, target_close, target_low, target_high, "\t", reference_t, reference_p )
    if reference_p-target_open<1.8:
        continue
        # possible profit    
    #it = [ reference_t, reference_p, target_t, target_open, target_close, target_low, target_high ]
    x_update.append( target_t )
    y1_close_update.append( reference_p )
    y2_open_update.append( target_open )
    y2_close_update.append( target_close )
    y2_low_update.append( target_low )
    y2_high_update.append( target_high )
    #
    """if target_open<reference_p-2:
        print(reference_t, reference_p, target_t, target_open, target_close, target_low, target_high)
        dif =  round(target_open-target_low, 2) # round(target_high-target_open, 2)
        print("\t", dif)
        dif_list.append(dif)"""

line_update = (
    Line(init_opts=opts.InitOpts(width="1800px", height="960px"))
    .add_xaxis(x_update)
    #.add_yaxis(tn1+"_open", y1_open)
    .add_yaxis(tn1+"_close", y1_close_update)
    #.add_yaxis(tn1+"_low", y1_low)
    #.add_yaxis(tn1+"_high", y1_high)
    .add_yaxis(tn2+"_open", y2_open_update)
    .add_yaxis(tn2+"_close", y2_close_update)
    .add_yaxis(tn2+"_low", y2_low_update)
    .add_yaxis(tn2+"_high", y2_high_update)
    .set_global_opts(title_opts=opts.TitleOpts(title="stocks comparison"), 
                     tooltip_opts=opts.TooltipOpts(trigger="axis"),
                    yaxis_opts=opts.AxisOpts(name='dollar',splitline_opts=opts.SplitLineOpts(is_show=True),min_=60))# , subtitle="商店A中六樣商品數"
    
)
line_update.render("lines_update.html")

'C:\\Users\\Admin\\lines_update.html'

In [247]:
#
data_info = {}
log_info = {}
days_next = 20+1
signal_list = get_list(1.5, 3.0, 0.1)
for signal in signal_list:
    data_info[signal] = {}
    log_info[signal] = {}
    for in_days in range(2, days_next):
        data_info[signal][in_days] = []
        profit_list, other_info = back_test(signal, in_days, x, y1_open, y1_close, y1_low, y1_high, y2_open, y2_close, y2_low, y2_high)
        profit_info = profit_analysis(profit_list, 1.0, 3.0, 0.2)
        log_info[signal][in_days] = other_info
        for it in profit_info:
            profit_per_share, N, _, ratio = it
            data_info[signal][in_days].append( (profit_per_share, ratio, N) )

info_df = data_by_signal(data_info, 1.5, 16)
tmp_df = info_df.sort_values(by=['signal', 'days'])
tmp_df.to_csv("C:/Users/Admin/Desktop/stocks_analyze_predict/stock_analysis01_" + tn1 + "_" + tn2[:4] + ".csv")
tmp_df = info_df.sort_values(by=['days', 'signal'])
tmp_df.to_csv("C:/Users/Admin/Desktop/stocks_analyze_predict/stock_analysis02_" + tn1 + "_" + tn2[:4] + ".csv")
info_df

Unnamed: 0,signal,days,profit_1.0,profit_1.2,profit_1.4,profit_1.6,profit_1.8,profit_2.0,profit_2.2,profit_2.4,profit_2.6,profit_2.8,profit_3.0
0,1.5,2,60.00,56.89,54.22,51.11,47.11,45.33,43.11,40.89,38.22,37.33,36.44
1,1.5,3,70.67,68.44,65.78,64.00,60.00,58.22,56.00,52.89,50.67,49.78,48.44
2,1.5,4,75.11,73.33,70.22,68.00,65.78,64.00,62.67,59.56,57.33,55.56,54.67
3,1.5,5,76.89,75.11,72.44,70.67,68.89,67.56,66.22,63.56,61.78,60.89,59.56
4,1.5,6,78.22,76.44,74.22,72.44,70.22,68.89,67.56,65.78,64.44,63.56,62.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,3.0,16,84.29,82.86,82.14,82.14,80.71,79.29,77.86,75.71,75.71,74.29,72.86
300,3.0,17,85.00,83.57,83.57,83.57,82.14,80.71,79.29,77.14,77.14,75.71,73.57
301,3.0,18,85.71,84.29,84.29,84.29,82.86,81.43,80.00,77.86,77.86,76.43,75.00
302,3.0,19,86.43,85.00,85.00,85.00,84.29,82.86,80.71,78.57,78.57,77.14,75.71


In [248]:
info_df[info_df['days']==5]

Unnamed: 0,signal,days,profit_1.0,profit_1.2,profit_1.4,profit_1.6,profit_1.8,profit_2.0,profit_2.2,profit_2.4,profit_2.6,profit_2.8,profit_3.0
3,1.5,5,76.89,75.11,72.44,70.67,68.89,67.56,66.22,63.56,61.78,60.89,59.56
22,1.6,5,76.15,74.31,71.56,70.18,68.81,67.43,66.06,63.3,61.47,60.55,59.17
41,1.7,5,75.93,74.07,71.3,69.91,68.52,67.13,65.74,62.96,61.11,60.19,58.8
60,1.8,5,76.06,74.18,71.36,69.95,68.54,67.14,65.73,62.91,61.03,60.09,58.69
79,1.9,5,75.6,73.68,71.29,69.86,68.42,66.99,65.55,62.68,60.77,59.81,58.37
98,2.0,5,75.24,73.3,70.87,69.42,67.96,66.5,65.05,62.14,60.19,59.22,57.77
117,2.1,5,74.24,72.22,70.2,68.69,67.17,65.66,64.14,61.62,60.1,59.09,57.58
136,2.2,5,73.98,71.94,69.9,68.37,66.84,65.31,63.78,61.22,59.69,58.67,57.14
155,2.3,5,73.3,71.2,69.11,67.54,65.97,64.4,62.83,60.21,59.16,58.12,56.54
174,2.4,5,72.28,70.11,67.93,66.85,65.22,64.13,62.5,59.78,58.7,57.61,55.98


In [249]:
info_df[info_df['signal']==1.8]

Unnamed: 0,signal,days,profit_1.0,profit_1.2,profit_1.4,profit_1.6,profit_1.8,profit_2.0,profit_2.2,profit_2.4,profit_2.6,profit_2.8,profit_3.0
57,1.8,2,58.22,54.93,52.58,49.77,46.01,44.13,41.78,39.91,37.56,36.62,35.68
58,1.8,3,69.48,67.14,64.79,63.38,59.62,57.75,55.4,52.11,50.23,49.3,47.89
59,1.8,4,74.18,72.3,69.48,67.61,65.73,63.85,62.44,59.15,56.81,54.93,53.99
60,1.8,5,76.06,74.18,71.36,69.95,68.54,67.14,65.73,62.91,61.03,60.09,58.69
61,1.8,6,77.46,75.59,73.24,71.83,69.95,68.54,67.14,65.26,63.85,62.91,61.97
62,1.8,7,79.34,77.46,75.12,73.71,72.3,71.36,69.95,67.61,66.2,65.26,63.85
63,1.8,8,81.22,79.34,77.0,75.59,74.65,73.71,72.3,69.95,68.54,67.61,66.2
64,1.8,9,82.16,80.28,77.93,77.0,76.06,75.12,73.71,72.3,71.36,70.42,69.01
65,1.8,10,83.1,81.22,79.34,78.4,77.93,77.0,75.59,74.18,73.24,72.3,70.89
66,1.8,11,84.04,82.16,80.28,79.34,78.87,77.93,76.53,75.59,74.65,73.71,72.3


In [250]:
how_much_profit = 'profit_2.2'

line = (
    Line(init_opts=opts.InitOpts(width="1800px", height="900px"))    
    .set_global_opts(title_opts=opts.TitleOpts(title="stocks analysis"), 
                     tooltip_opts=opts.TooltipOpts(trigger="axis"),
                    yaxis_opts=opts.AxisOpts(name='percentage for '+how_much_profit,splitline_opts=opts.SplitLineOpts(is_show=True),min_=0))# , subtitle="商店A中六樣商品數"
)

x_values = []
for v in range(2, days_next):
    x_values.append( str(v) + " days" )
line.add_xaxis( x_values )

for signal in signal_list:
    signal = round(signal, 1)
    html_df = info_df[ info_df['signal']==signal ]
    #x_values = html_df['days']
    y_values = html_df[how_much_profit]
    line.add_yaxis( str(signal) + " signal", y_values )

line.render("lines_stock_analysis01.html")

'C:\\Users\\Admin\\lines_stock_analysis01.html'

In [251]:
def search_by_time(df_data2, tt):
    #print(tt)
    counter = 0
    index_list = list(df_data2.index)
    while counter<len(df_data2):
        if index_list[counter]==tt:
            break
        counter += 1
    previous_info = []
    for i in range(max(counter-2, 0), counter):
        it = df_data2.iloc[i]
        open_p, close_p = it['Open'], it['Close']
        r = (close_p-open_p) / open_p * 100
        previous_info.append( round(r, 2) )
    return previous_info

def rise_counter(tmp_list):
    c = 0
    for v in tmp_list:
        if v>0:
            c += 1
    return c


tmp_list = []
signal, in_days = 2.6, 10
for signal in [1.8]: # signal_list
    for in_days in [10]: # range(2, days_next)
        counter = 0
        for it in log_info[signal][in_days]:
            rt, rp, tt, topen, tclose, tlow, thigh, buy, sell, p = it
            if p<1: # 3*buy/100
                # print( rt, rp, tt, topen, tclose, tlow, thigh, "\t\t", buy, sell, p )
                previous_info = search_by_time(df_data2, tt)
                pre_counter = rise_counter(previous_info)
                pre_sum = round(sum(previous_info), 2)
                if pre_sum>3 or pre_counter>=2:
                    continue
                #
                print( rt, rp, tt, topen, tclose, tlow, thigh, "\t\t", buy, sell, p )
                print("\t\t", pre_sum, previous_info)
                counter += 1
        r = counter/len(log_info[signal][in_days])*100
        #print( signal, in_days, counter, len(log_info[signal][in_days]), round(r, 2) )
        tmp_list.append( [signal, in_days, counter, len(log_info[signal][in_days]), round(r, 2)] )
#tmp_list.sort(key = lambda x: x[4])
#df = pd.DataFrame(tmp_list, columns=['signal', 'days', 'counter', 'Total', 'ratio'])
#df.sort_values(by=['ratio', 'days']).head()
counter, len(log_info[signal][in_days]), round(r, 2)

2022-02-18 00:00:00 118.99 2022-02-21 00:00:00 115.8 114.9 114.0 117.1 		 115.8 114.9 -0.9
		 -1.71 [0.82, -2.53]
2022-02-24 00:00:00 108.93 2022-02-25 00:00:00 107.4 105.3 104.9 108.2 		 106.93 106.6 -0.33
		 -0.24 [2.18, -2.42]
2022-03-02 00:00:00 105.42 2022-03-03 00:00:00 103.5 104.4 103.0 104.5 		 103.42 104.4 0.98
		 1.57 [1.57, 0.0]
2022-03-30 00:00:00 116.58 2022-03-31 00:00:00 114.9 112.1 109.7 114.9 		 114.58 114.6 0.02
		 -0.25 [0.71, -0.96]
2022-04-06 00:00:00 107.68 2022-04-07 00:00:00 106.4 105.2 105.2 108.9 		 105.68 105.2 -0.48
		 1.43 [2.8, -1.37]
2022-04-08 00:00:00 103.53 2022-04-11 00:00:00 102.8 98.5 97.85 103.0 		 101.53 100.6 -0.93
		 -0.16 [-1.13, 0.97]
2022-04-12 00:00:00 99.75 2022-04-13 00:00:00 98.4 98.5 97.0 99.5 		 97.75 98.5 0.75
		 -3.67 [-4.18, 0.51]
2022-05-04 00:00:00 101.41 2022-05-05 00:00:00 99.5 96.7 96.2 101.1 		 99.41 96.7 -2.71
		 -1.68 [0.3, -1.98]
2022-05-05 00:00:00 94.64 2022-05-06 00:00:00 92.05 90.35 89.3 92.85 		 92.05 91.8 -0.25
		 -4.7

(23, 213, 10.8)

In [252]:
df_data2['return01'] = df_data2['Close']-df_data2['Open']
st = ""
for v in list(df_data2['return01']):
    if v>0:
        st = st + "1"
    else:
        st = st + "0"
print(st)
print(len(st))
dic = {}
for it in st.split("0"):
    if len(it.strip())>0:
        k = len(it)
        if k not in dic:
            dic[k] = 1
        else:
            dic[k] += 1
        #print(it)
for k in sorted(list(dic.keys())):
    r = dic[k]*k/st.count("1")*100
    r = round(r, 2)
    print( k, "\t", dic[k], "\t", r )

print()
    
dic = {}
for it in st.split("1"):
    if len(it.strip())>0:
        k = len(it)
        if k not in dic:
            dic[k] = 1
        else:
            dic[k] += 1
        #print(it)
for k in sorted(list(dic.keys())):
    r = dic[k]*k/st.count("0")*100
    r = round(r, 2)
    print( k, "\t", dic[k], "\t", r )

0011011010001101100010010101110011001101100001001010110011001100101100001011111000110001001001001101111010110111011110010010000101000001000100000001000001100101010000101001101010100010110000001101001100111011000101110001001110111011010011001111011
247
1 	 33 	 28.95
2 	 23 	 40.35
3 	 6 	 15.79
4 	 3 	 10.53
5 	 1 	 4.39

1 	 30 	 22.56
2 	 20 	 30.08
3 	 8 	 18.05
4 	 4 	 12.03
5 	 2 	 7.52
6 	 1 	 4.51
7 	 1 	 5.26


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
2 stocks:	
	 84 84 84 84
	 83 83 83 83
	 86 86 86 86
	 86 86 86 86
3.3 2022-09-05 00:00:00
3.2 2022-09-07 00:00:00
2.32 2022-09-08 00:00:00
2.34 2022-09-09 00:00:00
4.89 2022-09-12 00:00:00
3.96 2022-09-19 00:00:00
6.29 2022-09-28 00:00:00
2.21 2022-09-29 00:00:00
2.04 2022-09-30 00:00:00
2.8 2022-10-03 00:00:00
6.46 2022-10-04 00:00:00
2.26 2022-10-13 00:00:00
3.57 2022-10-17 00:00:00
2.12 2022-10-20 00:00:00
2.58 2022-10-21 00:00:00
6.21 2022-10-26 00:00:00
2.13 2022-10-31 00:00:00
2.21 2022-11-03 00:00:00
6.77 2022-11-10 00:00:00
6.01 2022-11-17 00:00:00
2.89 2022-11-22 00:00:00
3.1 2022-11-23 00:00:00
2.1 2022-11-24 00:00:00
3.68 2022-11-28 00:00:00
7.66 2022-11-30 00:00:00
3.14 2022-12-01 00:00:00
5.26 2022-12-02 00:00:00
3.28 2022-12-07 00:00:00
4.77 2022-12-08 00:00:00
3.06 2022-12-13 00:00:00
3.36 2022-12-21 00:00:00
3.36 2022-12-27 00:00:00


'C:\\Users\\Admin\\lines_update.html'

In [876]:
# 
import datetime
def get_dates_from_range(start_date, end_dates):
    start = datetime.datetime.strptime(start_date, "%d-%m-%Y") 
    end = datetime.datetime.strptime(end_dates, "%d-%m-%Y")
    date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]
    date_list = []
    for date in date_generated:
        date_ymd = date
        y, m, d = date_ymd.year, date_ymd.month, date_ymd.day
        ymd = str(y) + "-"
        if m<10:
            ymd = ymd + "0" + str(m)
        else:
            ymd = ymd + str(m)
        ymd = ymd + "-"
        if d<10:
            ymd = ymd + "0" + str(d)
        else:
            ymd = ymd + str(d)
        date_list.append( ymd  )
    return date_list

def open_close_data(tn, st, et):
    tmp_df = get_df_data(ticker_name=tn, start_time=st, end_time=et)
    if len(tmp_df)>0:
        it = tmp_df.iloc[0]
        return it['Open'], it['Close']
    return 0, 0

#print(start_time, end_time)

ticker_name = '9988.HK'
start_time = '2022-12-19'
end_time = '2022-12-20'

date_list = get_dates_from_range('30-12-2022', '05-01-2023')
date_list

df_list = []

nn = 0
while nn<len(date_list)-1: # 
    start_time, end_time = date_list[nn], date_list[nn+1]
    #
    interval_len = '1m'
    df_data = yf.download(tickers=ticker_name, start=start_time, end=end_time, interval=interval_len)
    if len(df_data)>0:
        max( list(df_data['Close']) ), max( list(df_data['Open']) ), max( list(df_data['Low']) ), max( list(df_data['High']) )

        info_list = []
        i = 0
        open_point, close_point = open_close_data(ticker_name, start_time, end_time)
        while i<len(df_data):
            current_time = df_data.index[i]
            current_it = df_data.iloc[i]
            current_point = current_it['Close']
                # initialization
            future_max_time = current_time
            future_max_point = current_point 
            j = i + 1
            while j<len(df_data):
                future_time = df_data.index[j]
                future_it = df_data.iloc[j]
                future_point = future_it['Close']
                if future_max_point<future_point:
                    future_max_point = future_point
                    future_max_time = future_time
                j += 1
            ratio = (future_max_point - current_point)/current_point * 100
            #print( current_time, round(current_point, 2), future_max_time, round(future_max_point, 2), '\t', round(ratio, 2) )
            info_list.append( [open_point, close_point, current_time, current_point, future_max_time, future_max_point, ratio] )
            i += 1
        new_df = pd.DataFrame(info_list, columns = ['open_point', 'close', 'current_time', 'current_point', 'future_max_time', 'future_max_point', 'ratio'])

        max_info = new_df.sort_values(by=['ratio'], ascending=False)
        print(start_time, end_time, "\t")
        #print(max_info.head(3))
        #print()
        df_list.append( max_info.head(3) )
    nn += 1

useful_info = pd.concat( df_list )
useful_info
#useful_info.to_csv('C:/Users/Admin/Desktop/stocks_analyze_predict/daily_profit.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
2022-12-30 2022-12-31 	
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- 9988.HK: No data found for this date range, symbol may be delisted
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- 9988.HK: No data found for this date range, symbol may be delisted
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- 9988.HK: No data found for this date range, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
2023-01-03 2023-01-04 	


Unnamed: 0,open_point,close,current_time,current_point,future_max_time,future_max_point,ratio
71,87.300003,86.25,2022-12-30 10:41:00,85.849998,2022-12-30 13:12:00,87.0,1.339548
73,87.300003,86.25,2022-12-30 10:43:00,85.900002,2022-12-30 13:12:00,87.0,1.280557
72,87.300003,86.25,2022-12-30 10:42:00,85.949997,2022-12-30 13:12:00,87.0,1.221644
21,85.699997,88.650002,2023-01-03 09:51:00,84.650002,2023-01-03 14:15:00,89.099998,5.256937
22,85.699997,88.650002,2023-01-03 09:52:00,84.699997,2023-01-03 14:15:00,89.099998,5.194807
20,85.699997,88.650002,2023-01-03 09:50:00,84.75,2023-01-03 14:15:00,89.099998,5.132742


In [881]:
ratio_list = []

ratio_list.append( 2.05/82.95*100 )
ratio_list.append( 2.35/85.75*100 )
ratio_list.append( 2.5/85*100 )

sum(ratio_list) - 0.5*len(ratio_list)

6.653069546082447