In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

## Data preparation and train

In [2]:
data = pd.read_csv('stock_data.csv')

In [3]:
data.head()

Unnamed: 0,date,ticker,open,high,low,close,volume,outstanding_share,turnover,pe,pe_ttm,pb,ps,ps_ttm,dv_ratio,dv_ttm,total_mv,qfq_factor
0,2005-01-04,sh600000,0.77,0.77,0.75,0.76,3808939.0,900000000.0,0.004232,17.199,14.4219,2.0777,3.1439,2.2097,6.9549,6.9549,2693520.0,8.895254
1,2005-01-05,sh600000,0.76,0.76,0.74,0.75,5225244.0,900000000.0,0.005806,16.924,14.1913,2.0445,3.0937,2.1744,7.0679,7.0679,2650455.0,8.895254
2,2005-01-06,sh600000,0.75,0.75,0.73,0.74,4298099.0,900000000.0,0.004776,16.6991,14.0026,2.0173,3.0525,2.1455,7.1632,7.1632,2615220.0,8.895254
3,2005-01-07,sh600000,0.74,0.75,0.73,0.74,4362864.0,900000000.0,0.004848,16.7491,14.0446,2.0233,3.0617,2.1519,7.1418,7.1418,2623050.0,8.895254
4,2005-01-10,sh600000,0.75,0.77,0.74,0.77,7115260.0,900000000.0,0.007906,17.324,14.0575,2.0082,3.1668,2.2258,6.9048,6.9048,2713095.0,8.895254


In [4]:
data.shape

(10129344, 18)

In [5]:
data = data[data['date'] >= '2011-01-01']
data.shape

(8135010, 18)

In [6]:
data['eps'] = data['close'] / data['pe']

In [7]:
data['price'] = data['close']

In [8]:
data = data[['date', 'ticker', 'price', 'eps']]
data = data.dropna()
data.head()

Unnamed: 0,date,ticker,price,eps
1390,2011-01-04,sh600000,4.53,0.330891
1391,2011-01-05,sh600000,4.56,0.330464
1392,2011-01-06,sh600000,4.55,0.330779
1393,2011-01-07,sh600000,4.75,0.330702
1394,2011-01-10,sh600000,4.69,0.330521


In [9]:
first = data.groupby(['ticker'])['date'].min()

In [10]:
last = data.groupby(['ticker'])['date'].max()

In [11]:
lst = []
for i in range(len(first)):
    if first[i] == '2011-01-04' and last[i] == '2022-05-11':
        lst.append(first.index[i])

In [12]:
data['binary'] = data['ticker'].apply(lambda x: x in lst)

In [13]:
data = data[data['binary'] == True]

In [14]:
data = data.drop(columns=['binary'])

In [15]:
data.shape

(3368033, 4)

In [16]:
stds = {}
for company in lst:
    arr = np.array(data[(data['ticker'] == company) & (data['date'] <= '2020-12-31')]['eps'])[::22]
    arr = arr[len(arr)-24:]
    std = np.std(arr[12:]-arr[:-12])
    if not np.isnan(std):
        stds[company] = std

## Test

In [17]:
data = data[data['date'] >= '2018-01-02']

In [18]:
days_test = ['2020-01-02', '2020-02-03', '2020-03-02', '2020-04-01', '2020-05-06', '2020-06-01', '2020-07-01', '2020-08-03',
             '2020-09-01', '2020-10-09', '2020-11-02', '2020-12-01', '2021-01-04', '2021-02-01', '2021-03-01', 
            '2021-04-01', '2021-05-06', '2021-06-01', '2021-07-01', '2021-08-02', '2021-09-01', '2021-10-08', 
            '2021-11-01', '2021-12-01', '2021-12-31']

In [19]:
for strategy in ['l', 's']:
    portfolio = {}
    profits = np.array([])
    for i in tqdm(range(12, len(days_test))):
        if i == 18:
            stds = {}
            for company in lst:
                arr = np.array(data[(data['ticker'] == company) & (data['date'] <= '2021-07-01')]['eps'])[::22]
                arr = arr[len(arr)-24:]
                std = np.std(arr[12:]-arr[:-12])
                if not np.isnan(std):
                    stds[company] = std
        present_day = days_test[i]
        past_day = days_test[i-12]
        arr = []

        profit = 0 
        cnt = 0
        for key in portfolio:
            for item in portfolio[key]:
                if item[0] == 'l':
                    try:
                        profit += (float(data[(data['date'] == present_day) & (data['ticker'] == key)]['price'].iloc[0])- item[1]) / item[1]
                        cnt += 1
                    except Exception:
                        continue
                else:
                    try:
                        profit += (item[1] - float(data[(data['date'] == present_day) & (data['ticker'] == key)]['price'].iloc[0])) / item[1]
                        cnt += 1
                    except Exception:
                        continue
        if cnt != 0:
            profit /= cnt
        profits = np.append(profits, profit)

        if i == len(days_test) - 1:
            break   

        for company in list(stds):
            try:
                present_eps = float(data[(data['date'] == present_day) & (data['ticker'] == company)]['eps'].iloc[0])
                past_eps = float(data[(data['date'] == past_day) & (data['ticker'] == company)]['eps'].iloc[0])
            except Exception:
                continue

            sue = (present_eps - past_eps) / stds[company]
            arr.append([company, sue])

        arr = sorted(arr, key=lambda x: x[1])
        if strategy == 'l':
            for j in range(len(arr) - 1, len(arr) - 6, -1):
                if arr[j][0] not in portfolio:
                    portfolio[arr[j][0]] = [['l', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])]]
                else:
                    portfolio[arr[j][0]].append(['l', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])])
        else:
            for j in range(5):
                if arr[j][0] not in portfolio:
                    portfolio[arr[j][0]] = [['s', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])]]
                else:
                    portfolio[arr[j][0]].append(['s', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])])
    print('========================================================================================')
    print(f'profit: {profits[-1]}')
    print(f'drawdown: {min(profits)}')
    print(f'profit/drawdown: {profits[-1] / abs(min(profits))}')
    print(f'sharpe ratio: {(profits.mean() - 0.02 / len(profits)) / np.std(profits)}')
    print('========================================================================================')
    

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
 92%|████████████████████████████████████████████████████████████████████████▉      | 12/13 [1:22:20<06:51, 411.69s/it]
  0%|                                                                                           | 0/13 [00:00<?, ?it/s]

profit: 0.18970090886501004
drawdown: -0.12186256208073325
profit/drawdown: 1.5566791443243602
sharpe ratio: 0.48759611876148135


 92%|████████████████████████████████████████████████████████████████████████▉      | 12/13 [1:20:32<06:42, 402.70s/it]

profit: -0.10632963062362388
drawdown: -0.1066178092151981
profit/drawdown: -0.9972970876657898
sharpe ratio: -0.2878560187305171





## Validation

In [22]:
days_validation = ['2021-01-04', '2021-02-01', '2021-03-01', '2021-04-01', '2021-05-06', '2021-06-01', '2021-07-01', 
                   '2021-08-02', '2021-09-01', '2021-10-08', '2021-11-01', '2021-12-01', '2022-01-04',
                  '2022-02-07', '2022-03-01', '2022-04-01', '2022-05-05', '2022-05-11']

In [23]:
stds = {}
for company in lst:
    arr = np.array(data[(data['ticker'] == company) & (data['date'] <= '2021-12-31')]['eps'])[::22]
    arr = arr[len(arr)-24:]
    std = np.std(arr[12:]-arr[:-12])
    if not np.isnan(std):
        stds[company] = std

In [30]:
portfolio = {}
profits = np.array([])
for i in tqdm(range(12, len(days_validation))):
    present_day = days_validation[i]
    past_day = days_validation[i-12]
    arr = []
    
    profit = 0 
    cnt = 0
    for key in portfolio:
        for item in portfolio[key]:
            if item[0] == 'l':
                try:
                    profit += (float(data[(data['date'] == present_day) & (data['ticker'] == key)]['price'].iloc[0])- item[1]) / item[1]
                    cnt += 1
                except Exception:
                    continue
            else:
                try:
                    profit += (item[1] - float(data[(data['date'] == present_day) & (data['ticker'] == key)]['price'].iloc[0])) / item[1]
                    cnt += 1
                except Exception:
                    continue
    if cnt != 0:
        profit /= cnt
    profits = np.append(profits, profit)
    
    if i == len(days_validation) - 1:
        break   
    
    for company in list(stds):
        try:
            present_eps = float(data[(data['date'] == present_day) & (data['ticker'] == company)]['eps'].iloc[0])
            past_eps = float(data[(data['date'] == past_day) & (data['ticker'] == company)]['eps'].iloc[0])
        except Exception:
            continue
        
        sue = (present_eps - past_eps) / stds[company]
        arr.append([company, sue])
        
    arr = sorted(arr, key=lambda x: x[1])
    
    if abs(arr[0][1] + arr[1][1] + arr[2][1]) > (arr[-1][1] + arr[-2][1] + arr[-3][1]):
        long_threshold = 1
        short_threshold = 3
    else:
        long_threshold = 3
        short_threshold = 1
    
    for j in range(len(arr) - 1, len(arr) - 1 - long_threshold, -1):
        if arr[j][0] not in portfolio:
            portfolio[arr[j][0]] = [['l', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])]]
        else:
            portfolio[arr[j][0]].append(['l', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])])
    
    for j in range(short_threshold):
        if arr[j][0] not in portfolio:
            portfolio[arr[j][0]] = [['s', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])]]
        else:
            portfolio[arr[j][0]].append(['s', float(data[(data['date'] == present_day) & (data['ticker'] == arr[j][0])]['price'].iloc[0])])
    
    

 83%|█████████████████████████████████████████████████████████████████████▏             | 5/6 [31:30<06:18, 378.13s/it]


In [33]:
print(f'profit: {profits[-1]}')
print(f'drawdown: {min(profits)}')
print(f'profit/drawdown: {profits[-1] / abs(min(profits))}')
print(f'sharpe ratio: {(profits.mean() - 0.02 / len(profits)) / np.std(profits)}')

profit: 0.03251982446556134
drawdown: 0.0
profit/drawdown: inf
sharpe ratio: 1.5146642090361218


  print(f'profit/drawdown: {profits[-1] / abs(min(profits))}')


In [34]:
profits

array([0.        , 0.01986132, 0.02142455, 0.04833004, 0.04038728,
       0.03251982])

In [35]:
portfolio

{'sh600120': [['l', 4.46], ['l', 4.05], ['l', 4.34], ['s', 3.65]],
 'sz002344': [['s', 5.02], ['s', 4.53], ['s', 4.57], ['s', 4.58], ['l', 4.17]],
 'sz300107': [['s', 6.27], ['s', 5.56], ['s', 5.49]],
 'sh600125': [['s', 5.32], ['s', 5.53], ['s', 5.67]],
 'sh600873': [['l', 9.1], ['l', 9.06]],
 'sh600096': [['l', 25.65], ['l', 25.01]],
 'sh600309': [['l', 83.34]]}