In [1]:
# key imports
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import datetime
from sklearn.model_selection import TimeSeriesSplit



In [2]:
# file paths
ROOT_PATH = "../../data/"
TRAIN_MARKET_PATH = f'{ROOT_PATH}first_round_train_market_data.csv'
TRAIN_FUNADMENTAL_PATH = f'{ROOT_PATH}first_round_train_fundamental_data.csv'
TRAIN_RETURN_PATH = f'{ROOT_PATH}first_round_train_return_data.csv'

TEST_ROOT_PATH = "../qids_package/"
TEST_MARKET_PATH = f'{TEST_ROOT_PATH}first_round_test_market_data.csv'
TEST_FUNADMENTAL_PATH = f'{TEST_ROOT_PATH}first_round_test_fundamental_data.csv'

In [3]:
# read csv files
df_train_market = pd.read_csv(TRAIN_MARKET_PATH)
df_train_return = pd.read_csv(TRAIN_RETURN_PATH)
df_train_fundamental = pd.read_csv(TRAIN_FUNADMENTAL_PATH)

df_test_market = pd.read_csv(TEST_MARKET_PATH)
df_test_fundamental = pd.read_csv(TEST_FUNADMENTAL_PATH)

In [4]:
def split_time(x):
    
    """
    split the date_time column
    x: Dataframe to be split
    """
    
    df1 = x['date_time'].str.split('d', expand=True)
    df1.columns=['code','s']
    code = df1['code']
    df1 = df1['s'].str.split('p', expand=True)
    df1.columns=['day','time_step']
    df2 = x['date_time'].str.rsplit('p', expand=True)
    df2.columns=['day_s','s']
    df1['day_s'] = df2['day_s']
    df1['code'] = code
    x = pd.concat([x,df1],axis=1)
    
    return x

In [5]:
# split and merge
df_train_market = split_time(df_train_market)
df = pd.merge(df_train_fundamental,df_train_market, left_on='date_time',right_on='day_s')  
df = pd.merge(df,df_train_return, left_on='day_s',right_on='date_time')  

df_test_market = split_time(df_test_market)
test = pd.merge(df_test_fundamental,df_test_market, left_on='date_time',right_on='day_s')  

df = df.drop_duplicates(subset='day_s', keep='last').reset_index(drop=True)
test = test.drop_duplicates(subset='day_s', keep='last').reset_index(drop=True)

In [6]:
df

Unnamed: 0,date_time_x,turnoverRatio,transactionAmount,pe_ttm,pe,pb,ps,pcf,date_time_y,open,...,high,low,volume,money,day,time_step,day_s,code,date_time,return
0,s0d1,3.6794,17229.0,34.4425,32.3029,4.9425,3.8180,-578.7700,s0d1p50,24.3731,...,24.3852,24.3731,170476.0,4.157520e+06,1,50,s0d1,s0,s0d1,-0.026877
1,s1d1,2.5150,3706.0,28.9934,27.2726,5.0552,3.0484,23.8260,s1d1p50,16.1557,...,16.2771,16.1071,70944.0,1.146780e+06,1,50,s1d1,s1,s1d1,-0.052674
2,s2d1,1.2858,5136.0,42.9352,41.9279,4.8083,4.1392,-58.2185,s2d1p50,9.0307,...,9.0307,9.0307,84204.0,7.603632e+05,1,50,s2d1,s2,s2d1,-0.002691
3,s3d1,2.2007,3280.0,15.2245,13.8032,2.1904,0.6691,61.0491,s3d1p50,9.1521,...,9.1763,9.1521,228997.0,2.099301e+06,1,50,s3d1,s3,s3d1,-0.018515
4,s4d1,0.8627,5291.0,-369.9850,-433.1736,3.0714,2.9370,-25.2279,s4d1p50,5.0494,...,5.0615,5.0494,114443.0,5.790603e+05,1,50,s4d1,s4,s4d1,-0.019184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53887,s49d998,0.3720,5676.0,41.1121,41.1121,2.8312,1.0491,73.1850,s49d998p50,13.9466,...,13.9466,13.9466,42258.0,5.893921e+05,998,50,s49d998,s49,s49d998,-0.014799
53888,s50d998,0.4780,4506.0,19.4678,19.4678,2.5076,1.1590,-38.0915,s50d998p50,11.2762,...,11.2762,11.2762,22858.0,2.576509e+05,998,50,s50d998,s50,s50d998,0.012921
53889,s51d998,1.1501,5179.0,14.5922,14.5922,1.4130,0.7009,19.5205,s51d998p50,3.7142,...,3.7142,3.7142,189327.0,7.036010e+05,998,50,s51d998,s51,s51d998,-0.052286
53890,s52d998,0.5684,7558.0,28.9922,28.9922,5.7855,3.7150,-582.4621,s52d998p50,28.8642,...,28.8642,28.8642,59609.0,1.720926e+06,998,50,s52d998,s52,s52d998,-0.015559


In [7]:
test

Unnamed: 0,date_time_x,turnoverRatio,transactionAmount,pe_ttm,pe,pb,ps,pcf,date_time_y,open,close,high,low,volume,money,day,time_step,day_s,code
0,s0d1001,1.1718,9290.0,63.6900,63.6900,1.6398,1.5477,-40.9539,s0d1001p50,12.3929,12.3929,12.3929,12.3929,193641.0,2.398687e+06,1001,50,s0d1001,s0
1,s1d1001,0.5027,9539.0,33.1536,33.1536,5.2277,3.3677,124.7176,s1d1001p50,30.2479,30.2964,30.2964,30.2479,44257.0,1.340836e+06,1001,50,s1d1001,s1
2,s2d1001,0.5060,9765.0,36.6197,36.6197,6.0413,4.4686,33.3814,s2d1001p50,23.8148,23.8148,23.8148,23.8148,33383.0,7.948739e+05,1001,50,s2d1001,s2
3,s3d1001,1.0206,3214.0,13.3529,13.3529,1.3622,0.4854,-19.2415,s3d1001p50,9.0671,9.0428,9.0671,9.0428,72373.0,6.544955e+05,1001,50,s3d1001,s3
4,s4d1001,3.1191,30274.0,31.4580,31.4580,4.4758,3.2646,-116.0588,s4d1001p50,9.0792,9.0792,9.0792,9.0792,1485647.0,1.348650e+07,1001,50,s4d1001,s4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37795,s49d1700,1.1109,29758.0,32.0733,29.9804,4.5339,1.3459,31.0558,s49d1700p50,27.6868,27.6868,27.6868,27.6868,329087.0,9.110941e+06,1700,50,s49d1700,s49
37796,s50d1700,0.8285,15622.0,30.0656,30.4533,4.0348,1.6924,34.0342,s50d1700p50,28.9370,28.9127,28.9370,28.9127,59517.0,1.720829e+06,1700,50,s50d1700,s50
37797,s51d1700,0.7608,14507.0,27.2893,26.4071,2.8152,1.5624,105.1597,s51d1700p50,9.9896,9.9896,9.9896,9.9896,87100.0,8.700919e+05,1700,50,s51d1700,s51
37798,s52d1700,0.7141,21682.0,44.0500,44.6896,8.9903,5.9904,521.3693,s52d1700p50,71.9055,71.9662,71.9662,71.9055,126493.0,9.103476e+06,1700,50,s52d1700,s52
