In [32]:
# 목표!! 포트폴리오 구성종목, 벤치마크, kospi, riskfree의 일별 가격을 excel로 읽어 json으로 넘기자.
# 각 종목별 엑셀파일을 읽어서 datafram -> json !!
import sys
import pandas as pd
import numpy as np
import time, datetime
import json
from scipy.stats import norm
from scipy import stats
from scipy.stats.mstats import gmean
import simplejson

def make_input ():

    #입력포맷
    my_input = {'start_date' : '2018-01-01',
                'init_index' : 1000,
                'init_amt' : 1000000,
                'portfolio' : ['069500','261220','332940','371450','HERO','SPY'],
                'rebalancing' : {
                    '2018-01-01' : [15,15,20,25,15,10],
                    '2019-01-01' : [15,15,20,25,15,10],
                    '2020-01-01' : [15,15,20,25,15,10],
                    '2021-01-01' : [15,15,20,25,15,10]},
                'history' : {}               
                }

    # 포트폴리오가격 엑셀에서 가져오기
    for item in my_input['portfolio'] :
        # excel 파일 읽기
        infile = 'price_' + item + '.xlsx'
        df = pd.read_excel(infile,dtype='unicode')
        df['DATE'] = pd.to_datetime(df['DATE'])
        df['DATE'] = df['DATE'].dt.strftime('%Y-%m-%d')
        #my_input[item] = df.to_dict('records')
        my_input['history'][item] = df.values.tolist()  # 리스트가 가벼워보여...

    # benchmark 엘셀에서 가져오기
    infile = "benchmark.xlsx"
    tmp_df = pd.read_excel(infile,dtype='unicode')
    tmp_df['DATE'] = pd.to_datetime(tmp_df['DATE'])
    tmp_df['DATE'] = tmp_df['DATE'].dt.strftime('%Y-%m-%d')
    my_input['history']['benchmark'] = tmp_df.values.tolist()

    # riskfree 엘셀에서 가져오기
    infile = "riskfree.xlsx"
    tmp_df = pd.read_excel(infile,dtype='unicode')
    tmp_df['DATE'] = pd.to_datetime(tmp_df['DATE'])
    tmp_df['DATE'] = tmp_df['DATE'].dt.strftime('%Y-%m-%d')
    my_input['history']['riskfree'] = tmp_df.values.tolist()

    # 마켓지표(KOSPI) 엘셀에서 가져오기
    infile = "kospi.xlsx"
    tmp_df = pd.read_excel(infile,dtype='unicode')
    tmp_df['DATE'] = pd.to_datetime(tmp_df['DATE'])
    tmp_df['DATE'] = tmp_df['DATE'].dt.strftime('%Y-%m-%d')
    my_input['history']['kospi'] = tmp_df.values.tolist()

    my_json = json.dumps(my_input)  
    #f = open("input.json",'w')
    #f.write(my_json)
    #f.close()
    return my_json

# 시계열 데이터 전처리 (서로다른 시계열을 하나로 합치고 결측치를 보정한다)
def data_preprocess(my_config) :

    hist_list = []
    # dictionary -> dataframe
    # 시계열데이터 다루는 작업은 dataframe이 좋다
    for key, value in my_config['history'].items():
        tmp_df = pd.DataFrame(value, columns=['DATE',key])
        tmp_df.set_index('DATE',inplace=True)
        tmp_df.index = pd.to_datetime(tmp_df.index) 
        tmp_df[key] = tmp_df[key].astype(float)
        hist_list.append(tmp_df)

    # 자산이 소속된 국가/거래소별로 시계열이 다를 수 있다. 그래서...
    # 1. 하나의 DATAFRAME으로 합친다. 
    prc_df = pd.concat(hist_list,join='outer',axis=1)
    # 2. 결측갑(N/A)를 처리한다.  method='pad'를 넣으면 N/A부분을 직전값으로 대체한다. 
    # 이때 상장이 늦게된종목의 경우 앞에서 부터 계속 N/A이므로 결측값의 대체가 어렵다 (나중에 0으로 바꿀지 고민해보자)
    prc_df.fillna(method='pad',inplace=True)

    # prc_df에서 [벤치마크/무위험이자율/KOSPI]와 포트폴리오 시계열을 분리한다.
    etc_df = prc_df[['benchmark','riskfree','kospi']].copy()

    #만약 벤치마크 앞쪽데이터가 없을 경우엔 데이터가 있는 날짜의 데이터로 채운다.
    #시계열 분석시 직전 데이터로 채워야하나(method='pad'), 힘빼지말자...
    etc_df.fillna(method='bfill',inplace=True)

    #벤치마크를 지수와 같은 스케일(첫시작을 1000)로 변환한다.
    #ratio = config['init_index'] / etc_df.iloc[0].loc['benchmark']
    #etc_df['BM1000'] = etc_df['benchmark'] * ratio

    # 포트폴리오 시계열에서 BM 삭제
    prc_df.drop(['benchmark','riskfree','kospi'],axis=1,inplace=True)

    # 포트폴리오 일별 수익률 df 생성
    rtn_df = prc_df.pct_change()
    rtn_df.fillna(0,inplace=True) # 첫날의 수익률은 0으로 SET

    # 타켓비중설정 (리벨런싱 반영) 후 df 생성
    target_wght_df = pd.DataFrame(data=None, columns=rtn_df.columns, index=rtn_df.index)
    for key, value in my_config['rebalancing'].items() :
        myindex = target_wght_df.index[target_wght_df.index.get_loc(key,method='bfill')]
        target_wght_df.loc[myindex] = [ x / 100 for x in value ]
    
    return { 'prc' : prc_df,
            'rtn' : rtn_df,
            'target_wght' : target_wght_df,
            'etc' : etc_df }

# 지수산출
def generate_index(init_index, init_amt, rtn_df, target_wght_df) :
    ## 초기값 세팅
    my_index = init_index
    my_deposit = init_amt

    tot_rtn_list = []
    my_index_list = []
    my_deposit_list = []

    ##빠른 loop처리를 위해 dict로 변환 후 iteration 작업 (df -> dict -> list)
    rtn_dict = rtn_df.to_dict('split')
    rtns = rtn_dict['data']

    wght_dict = target_wght_df.to_dict('split')
    wghts = wght_dict['data']

    ## 지수 계산 시작
    for i in range(0,len(rtns)) :
        tot_rtn = sum([ x*y for (x,y) in zip(rtns[i], wghts[i]) ]) / sum(wghts[i])
        my_index = my_index * (1+tot_rtn)
        my_deposit = my_deposit * (1+tot_rtn)
    
        tot_rtn_list.append(tot_rtn)
        my_index_list.append(my_index)
        my_deposit_list.append(my_deposit)
    
        # 다음 wghts 값이 미리설정되지 않았다면, 수익률을 반영한 비중을 계산한다. 즉, 리벨런싱은 skip
        if i < ( len(rtns) -1 ) :
            if  np.isnan(wghts[i+1]).any() :
                wghts[i+1] = [ (1+x)*y for (x,y) in zip(rtns[i],wghts[i])]
    
    index_df = pd.DataFrame({'backtest':my_index_list, 'rtn':tot_rtn_list, 'deposit':my_deposit_list},index=rtn_df.index)
    weight_df =  pd.DataFrame(data=wghts, columns=target_wght_df.columns, index=target_wght_df.index)

    return {'index': index_df, 'weight': weight_df}


# 시계열 분석을 위한 초기화
def init_analyze(input_df) :
    df = input_df.copy()
    df.rename(columns={'DATE':'date'},inplace=True)
    df.set_index(['date'],inplace=True)
    df.index = pd.to_datetime(df.index)
    df['prev'] = df['backtest'].shift(1)
    df['kospi_rtn'] = df['kospi'].pct_change()
    df['bm_prev'] = df['benchmark'].shift(1)
    df['bm_rtn'] = df['benchmark'].pct_change()
    df['riskfree'] = df['riskfree'] / 100

    # 시계열 분석 편의를 위해 첫데이터를 날린다.
    df = df.iloc[1:]
    return df

# 분석결과 자료 초기화
def init_result():
    return {
    'final_balance' : 0,
    'cagr' : 0,
    'stdev' : 0,
    'annlzd_stdev' : 0,
    'arith_mean' : 0,
    'annlzd_arith_mean' : 0,
    'geo_mean' : 0,
    'annlzd_geo_mean' : 0,
    'vol' : 0,
    'annlzd_vol' : 0,
    'hist_var' : 0,
    'anal_var' : 0,
    'c_var' : 0,
    'best_y' : {'year' : 0, 'rtn' : 0},
    'worst_y' : {'year' : 0, 'rtn' : 0},
    'mdd' : 0,
    'skewness' : 0,
    'kurtosis' : 0,
    'sharpe_rto' : 0,
    'sortino_rto' : 0,
    'down_dev' : 0,
    'vs_market' : {'beta' : 0, 'alpha' : 0, 'r2' : 0, 'corr' : 0},
    'vs_benchmark' : {'beta' : 0, 'alpha' : 0, 'r2' : 0, 'corr' : 0}
    }

# 시계열 데이터 분석
def analyze_data(initial_balance, df):
    
    result = init_result()
    
    NUM_OF_DAYS = 252
    
    # 1. Final Balance
    #initial_balance = 1000000
    final_balance = initial_balance * df['backtest'].iloc[-1] / df['prev'].iloc[0]
    result['final_balance'] = final_balance

    # 2. CAGR 
    year = df['backtest'].count() / NUM_OF_DAYS
    # 2000-12월 종가는 2001-1월의 전일가격을 사용
    CAGR = ( df.iloc[-1]['backtest'] / df.iloc[0]['prev'] )**(1/year) - 1
    result['cagr'] = CAGR

    # 3. Stdev (Annualized standard deviation of monthly returns)
    stdev = df['rtn'].std()
    result['stdev'] = stdev
    # 연간화를 위해 루트 NUM_OF_DAYS 를 곱한다.
    annlzd_stdev = stdev*(NUM_OF_DAYS**0.5)
    result['annlzd_stdev'] = annlzd_stdev
    result['vol'] = stdev
    result['annlzd_vol'] = annlzd_stdev

    # 4. Arithmetic Mean (monthly). 
    arith_mean = df['rtn'].mean()
    result['arith_mean'] = arith_mean

    # 5. Arithmetic Mean (annualized).
    annualized_arith_mean = (1 + arith_mean) ** NUM_OF_DAYS - 1
    result['annlzd_arith_mean'] = annualized_arith_mean

    # 6. Geometric Mean, scipy 의 gmean사용
    # 수익률의 기하평균은 각 수익률에 1을 더한후 루트를 적용, 이후에 1을 뺀다
    # monthly_rtn의 모든 컬럼값에 1을 더한다
    df['rtn_1'] = df['rtn'] + 1
    # gmean은 list형의 인자를 받는다
    geo_mean = gmean(df['rtn_1'].tolist()) - 1
    result['geo_mean'] = geo_mean

    # 7. Geometric Mean(annualized)
    annualized_geo_mean = ( 1 + geo_mean) ** NUM_OF_DAYS - 1
    result['annlzd_geo_mean'] = annualized_geo_mean

    # 8. Volatility (monthly) . 변동성은 표준편차를 의미
    #stdev = m_idx['rtn'].std() 
    #result['stdev'] = stdev

    # 9. Volatility (annualized). 3에서 구한 Stdev와 같은 값이다
    # 연간화를 위해 루트12 를 곱한다.
    #stdev = stdev*(12**0.5)

    # 10. VaR
    # 10.1 Historical VaR 
    # exclusive quantile을 자체 구현
    def quantile_exc(df2, q):
        list_sorted = sorted(df2) # sorted()는 list형의 결과를 리턴한다
        rank = q * (len(list_sorted) + 1) - 1
        #print ("q_exc : ", rank)
        #assert rank > 0, 'quantile is too small'
        if rank < 0 :
            print ('quantile is too small')
            return 0
        rank_l = int(rank)
        return list_sorted[rank_l] + (list_sorted[rank_l + 1] - 
                                      list_sorted[rank_l]) * (rank - rank_l)

    historical_var_95 = quantile_exc(df['rtn'], 0.05)
    if (historical_var_95 == 0) :
        historical_var_95 = df['rtn'].quantile(0.05)
    result['hist_var'] = historical_var_95

    # 10.2 Analytical VaR
    mean = df['rtn'].mean()
    stdev = df['rtn'].std()
    analytical_var_95 = norm.ppf(0.05, mean, stdev)
    result['anal_var'] = analytical_var_95

    # 10.3 Conditional VaR
    # 자체구현
    def conditional_var(df3, q):
        list_sorted = sorted(df3)
        rank = q * len(list_sorted) 
        rank_l = int(rank)

        sum_rtn = 0
        sum_rtn = sum(i for i in list_sorted[0:rank_l])

        return 1 / rank * sum_rtn

    cvar_95 = conditional_var(df['rtn'], 0.05)
    result['c_var'] = cvar_95

    # 11. Best Year / Worst Year
    # 년단위 데이터로 resamplingn
    y_idx = df.resample(rule='Y').last()
    y_idx['rtn'] = y_idx['backtest'].pct_change()
    if len(y_idx) == 1 :
        min_val = df['backtest'].iloc[-1] / df['prev'].iloc[0] - 1
        min_idx = df['backtest'].idxmin()
        max_val = df['backtest'].iloc[-1] / df['prev'].iloc[0] - 1
        max_idx = df['backtest'].idxmax()     
    else :
        min_val = y_idx['rtn'].min()
        min_idx = y_idx['rtn'].idxmin()
        max_val = y_idx['rtn'].max()
        max_idx = y_idx['rtn'].idxmax()
    result['best_y']['year'] = max_idx.year
    result['best_y']['rtn'] = max_val
    result['worst_y']['year'] = min_idx.year
    result['worst_y']['rtn'] = min_val

    # 12. MDD, 
    # - step1.지수의 수익률을 일별 누적(1+r을 계속곱해나감). 
    # - step2. 누적수익률에 대한 MAX를 일별로 기록
    # - step3. 일별로 누적수익률과 MAX수익률 간의 차이((CUM - MAX) / MAX) 가 가장 큰 것을 잡는다.

    #  등락률에 1을 더한다
    df['rtn_1'] = df['rtn'] + 1

    # 누적수익률계산
    df['cum'] = df['rtn_1'].cumprod()

    # 누적수익률중 최고값
    df['high'] = df['cum'].cummax()

    # drawdown 계산
    df['drawdown'] = (df['cum'] - df['high'])/df['high']
    MDD = df['drawdown'].min()
    result['mdd'] = MDD

    # 13. Skewness
    skewness = df['rtn'].skew()
    result['skewness'] = skewness

    # 14. Excess Kurtosis
    ex_kurtosis = df['rtn'].kurtosis()
    result['kurtosis'] = ex_kurtosis

    # 15. Ratio
    # https://www.quantnews.com/performance-metrics-sharpe-ratio-sortino-ratio/
    # 15.1 Sharpe Ratio
    # denominator - month(12), day(252)
    denominator = NUM_OF_DAYS
    df['excess_rtn'] = df['rtn'] - df['riskfree']/denominator
    sharpe_rto = df['excess_rtn'].mean() /  df['excess_rtn'].std() * np.sqrt(denominator)
    result['sharpe_rto'] = sharpe_rto

    # 15.2 Sortino Ratio
    target = 0
    df['downside_rtn'] = 0
    df.loc[df['rtn'] < target, 'downside_rtn'] = df['rtn']**2
    down_stdev = np.sqrt(df['downside_rtn'].mean())
    sortino_ratio = df['excess_rtn'].mean()/down_stdev * np.sqrt(denominator)
    result['sortino_rto'] = sortino_ratio
    result['down_dev'] = down_stdev

    # downside_stdev 를 excess_rtn으로 계산
    #m_idx['downside_rtn2'] = 0
    #m_idx.loc[m_idx['excess_rtn'] < target, 'downside_rtn2'] = m_idx['excess_rtn']**2
    #down_stdev2 = np.sqrt(m_idx['downside_rtn2'].mean())
    #sortino_ratio = m_idx['excess_rtn'].mean()/down_stdev2 * np.sqrt(denominator)

    # 16. [vsMarket] Beta, Alpha, R-squared, correlation
    # Beta, Alpha, R squared 참고사이트
    # http://gouthamanbalaraman.com/blog/calculating-stock-beta.html
    # https://stackoverflow.com/questions/893657/how-do-i-calculate-r-squared-using-python-and-numpy

    # 16.1 Beta
    covariance = np.cov(df['rtn'], df['kospi_rtn'])
    # variance는 np.var로 구할수도 있으나, covariance[1,1] 과 같다
    #variance = np.var(m_idx['mkt_rtn'],ddof=1)
    beta = covariance[0,1] / covariance[1,1]
    result['vs_market']['beta'] = beta

    # 16.2 Alpha
    alpha = df['rtn'].mean() - beta*(df['kospi_rtn'].mean())
    #연환산
    y_alpha = (1 + alpha) ** NUM_OF_DAYS - 1
    result['vs_market']['alpha'] = y_alpha

    # 16.3 R squared 
    # R2 - numpy_manual

    ypred = alpha + beta * df['kospi_rtn']
    SS_res = np.sum(np.power(ypred - df['rtn'],2))
    SS_tot = covariance[0,0] * (len(df) - 1) # SS_TOT is sample_variance*(n-1)
    r_squared = 1. - SS_res/SS_tot
    result['vs_market']['r2'] = r_squared

    # 1year momentum (bonus) 
    momentum = np.prod(1+df['rtn'].tail(NUM_OF_DAYS).values) - 1

    # 16.4 correlation
    # 비교를 위해 'rtn', 'mkt_rtn'만 새로운 dataframe 으로 copy
    #new_df = m_idx[['rtn','mkt_rtn']].copy()
    #corr = new_df.corr()
    corr = df['rtn'].corr(df['kospi_rtn'])
    result['vs_market']['corr'] = corr
    
    if 'benchmark' in df.columns:
        
        # 17. [vsBenchmark] Beta, Alpha, R-squared, correlation
        # Beta, Alpha, R squared 참고사이트
        # http://gouthamanbalaraman.com/blog/calculating-stock-beta.html
        # https://stackoverflow.com/questions/893657/how-do-i-calculate-r-squared-using-python-and-numpy

        # 17.1 Beta
        covariance = np.cov(df['rtn'], df['bm_rtn'])
        # variance는 np.var로 구할수도 있으나, covariance[1,1] 과 같다
        #variance = np.var(m_idx['mkt_rtn'],ddof=1)
        beta = covariance[0,1] / covariance[1,1]
        result['vs_benchmark']['beta'] = beta

        # 17.2 Alpha
        alpha = df['rtn'].mean() - beta*(df['bm_rtn'].mean())
        #연환산
        y_alpha = (1 + alpha) ** NUM_OF_DAYS - 1
        result['vs_benchmark']['alpha'] = y_alpha

        # 17.3 R squared 
        # R2 - numpy_manual

        ypred = alpha + beta * df['bm_rtn']
        SS_res = np.sum(np.power(ypred - df['rtn'],2))
        SS_tot = covariance[0,0] * (len(df) - 1) # SS_TOT is sample_variance*(n-1)
        r_squared = 1. - SS_res/SS_tot
        result['vs_benchmark']['r2'] = r_squared

        # 17.4 correlation
        # 비교를 위해 'rtn', 'mkt_rtn'만 새로운 dataframe 으로 copy
        #new_df = m_idx[['rtn','mkt_rtn']].copy()
        #corr = new_df.corr()
        corr = df['rtn'].corr(df['bm_rtn'])
        result['vs_benchmark']['corr'] = corr
    
    return result

# 결과 보기
def show_result(rslt) :
    for key in rslt.keys() :
        print("Final Balance : " , int(rslt[key]['final_balance']))
        print("CAGR : ", round(rslt[key]['cagr'] * 100, 5), "%" )
        print("Stdev : ", round(rslt[key]['annlzd_stdev'] * 100, 5), "%" )
        print("Best Year (",rslt[key]['best_y']['year'],") : ", round(rslt[key]['best_y']['rtn'] * 100, 5), "%" )
        print("Worst Year (",rslt[key]['worst_y']['year'],") : ", round(rslt[key]['worst_y']['rtn'] * 100, 5), "%" )
        print("MDD : ", round(rslt[key]['mdd'] * 100, 5), "%" )
        print("Sharpe Ratio : ", round(rslt[key]['sharpe_rto'], 5))
        print("Sortino Ratio : ", round(rslt[key]['sortino_rto'], 5))
        print("Korean MKT Correlation : ", round(rslt[key]['vs_market']['corr'], 5))
        print("Arithmetic Mean (daily) : ", round(rslt[key]['arith_mean'] * 100, 5), "%" )
        print("Arithmetic Mean (annualized) : ", round(rslt[key]['annlzd_arith_mean'] * 100, 5), "%" )
        print("Geometric Mean (daily) : ", round(rslt[key]['geo_mean'] * 100, 5), "%" )
        print("Geometric Mean (annualized) : ", round(rslt[key]['annlzd_geo_mean'] * 100, 5), "%" )
        print("Volatility (daily) : ", round(rslt[key]['stdev'] * 100, 5), "%" )
        print("Volatility (annualized) : ", round(rslt[key]['annlzd_stdev'] * 100, 5), "%" )
        print("Downside Deviation (daily) : ", round(rslt[key]['down_dev'] * 100, 5), "%" )
        print("MDD : ", round(rslt[key]['mdd'] * 100, 5), "%" )
        print("Korean MKT Correlation : ", round(rslt[key]['vs_market']['corr'], 5))
        print("Beta(vs market) : ", round(rslt[key]['vs_market']['beta'], 5))
        print("Alpha(vs market, annualized) : ", round(rslt[key]['vs_market']['alpha']*100, 5),"%")
        print("R2(vs market) : ", round(rslt[key]['vs_market']['r2']*100, 5),"%")
        print("Beta(vs benchmark) : ", round(rslt[key]['vs_benchmark']['beta'], 5))
        print("Alpha(vs benchmark, annualized) : ", round(rslt[key]['vs_benchmark']['alpha']*100, 5),"%")
        print("R2(vs benchmark) : ", round(rslt[key]['vs_benchmark']['r2']*100, 5),"%")
        print("Sharpe Ratio : ", round(rslt[key]['sharpe_rto'], 5))
        print("Sortino Ratio : ", round(rslt[key]['sortino_rto'], 5))
        print("Skewness : ", round(rslt[key]['skewness'], 5))
        print("Excess Kurtosis : ", round(rslt[key]['kurtosis'], 5))
        print("Historical VaR(5%) : ", round(rslt[key]['hist_var']*100, 5),"%")
        print("Analytical VaR(5%) : ", round(rslt[key]['anal_var']*100, 5),"%")
        print("Conditional VaR(5%) : ", round(rslt[key]['c_var']*100, 5),"%")
        print("="*50)

def print_elapsed_time(cal_tm_list):
    # 구간별 산출소요시간 계산
    mynp = np.array(cal_tm_list)
    label = np.array(mynp[:,0])
    time = np.array(mynp[:,1],dtype=float)
    
    tot_elapsed = time[-1] - time[0]

    tm_diff = np.diff(time, axis=0)
    tm_diff = np.insert(tm_diff,0,0,axis=0)

    tm_report = np.stack((label,tm_diff))
    for i in range(len(tm_report[0])) :
        print (f"{tm_report[:,i][0]} : {tm_report[:,i][1]} sec")
    print(f"총 소요시간 : {tot_elapsed} sec")

def init_final_result() :
    return {
        'summary' : {'period' : [], 'datenum' : 0, 'deposit' : [], 'backtest' : []},
        'timeseries' : {'label':[], 'dataset':[]},
        'tm_anal' : { 'backtest' : {}, 'benchmark' :{} },
        'portfolio_wght' : {'label':[], 'dataset':[]},
        'portfolio_anal' : {'label' :[], 'dataset' : []}
         }


if __name__ == '__main__':
    #테스트용 인풋을 자체생성함
    
    cal_tm = []
    cal_tm.append(['시작',time.time()])

    test_json = make_input()

    cal_tm.append(['테스트데이터 로드',time.time()])
    
    my_config = json.loads(test_json)

    # 시계열 데이터를 전처리한다.
    
    my_timeseries = data_preprocess(my_config)
    cal_tm.append(['시계열 데이터 전처리',time.time()])

    # 일별 지수 및 종목비중 생성(backtest)
    backtest_result = generate_index(my_config['init_index'],my_config['init_amt'],my_timeseries['rtn'],my_timeseries['target_wght'] )
    
    cal_tm.append(['지수산출',time.time()])

    # 시계열 분석 
    # 시계열분석을 위해, 산출된 지수값과 벤치마크, 무위험이자율, kospi지수를 합친다
    # 지수 : backtest_result['index'] , 벤치마크,무위험,kospi : my_timeseriese['etc']
    result_df = pd.concat([backtest_result['index'],my_timeseries['etc']],join='outer',axis=1)
    result_df.reset_index(inplace=True)
    result_df['DATE'] = result_df['DATE'].dt.strftime('%Y-%m-%d')
    
    tm_data = init_analyze(result_df)
    tm_anal_rslt = dict.fromkeys(['backtest','benchmark'])
    tm_anal_rslt['backtest'] = analyze_data(my_config['init_amt'], tm_data)

    if 'benchmark' in tm_data.columns:
        tm_data['backtest'] = tm_data['benchmark']
        tm_data['rtn'] = tm_data['bm_rtn']
        tm_data['prev'] = tm_data['bm_prev']
        tm_anal_rslt['benchmark'] = analyze_data(my_config['init_amt'], tm_data)
    
    cal_tm.append(['시계열분석',time.time()])
    
    
    print_elapsed_time(cal_tm)
    
    
    # WAS 전송용 결과 생성
    
    final_result = init_final_result()
    
    #결과0. 요약정보 정리
    final_result['summary']['deposit'] = [ backtest_result['index']['deposit'].iloc[0], backtest_result['index']['deposit'].iloc[-1] ]
    final_result['summary']['backtest'] = [ backtest_result['index']['backtest'].iloc[0], backtest_result['index']['backtest'].iloc[-1] ]
    final_result['summary']['period'] = [backtest_result['index']['deposit'].index[0].strftime('%Y-%m-%d'),  backtest_result['index']['deposit'].index[-1].strftime('%Y-%m-%d')]
    final_result['summary']['datenum'] = len(backtest_result['index']['deposit'])


    #결과1. 시계열 데이터 정리
    ratio = my_config['init_index'] / result_df.iloc[0].loc['benchmark']
    result_df['BM1000'] = result_df['benchmark'] * ratio
    tmp_df = result_df[['DATE','backtest','benchmark','BM1000']]
    final_result['timeseries']['labels'] = tmp_df.keys().to_list()
    final_result['timeseries']['dataset'] = tmp_df.values.tolist()

    #결과2. 시계열 분석매트릭스 정리
    final_result['tm_anal']['backtest'] = tm_anal_rslt['backtest']
    final_result['tm_anal']['benchmark'] = tm_anal_rslt['benchmark']

    #결과3. 월별포트폴리오비중현황 정리
    tmp_df = backtest_result['weight']
    month_wght_df = tmp_df.resample('M').last()
    month_wght_df.reset_index(inplace=True)
    month_wght_df['DATE'] = month_wght_df['DATE'].dt.strftime('%Y-%m-%d')
    final_result['portfolio_wght']['label'] = month_wght_df.keys().to_list()
    final_result['portfolio_wght']['dataset'] = month_wght_df.values.tolist() # np array 는 tolist()

    #결과4. 포트폴리오 분석 정리
    my_timeseries['prc'].fillna(method='bfill',inplace=True)
    port_rtn = (my_timeseries['prc'].iloc[-1] / my_timeseries['prc'].iloc[0] - 1)
    final_result['portfolio_anal']['label'] = port_rtn.index.tolist()
    final_result['portfolio_anal']['dataset'] = port_rtn.values.tolist()
    


시작 : 0.0 sec
테스트데이터 로드 : 0.4518284797668457 sec
시계열 데이터 전처리 : 0.05580902099609375 sec
지수산출 : 0.03291201591491699 sec
시계열분석 : 0.09474682807922363 sec
총 소요시간 : 0.6352963447570801 sec


In [34]:
final_result.keys()

dict_keys(['summary', 'timeseries', 'tm_anal', 'portfolio_wght', 'portfolio_anal'])

In [38]:
final_result['portfolio_wght']

{'label': ['DATE', '069500', '261220', '332940', '371450', 'HERO', 'SPY'],
 'dataset': [['2018-01-31',
   0.15343778324485302,
   0.16019382627422823,
   0.2,
   0.25,
   0.15,
   0.1048331286973993],
  ['2018-02-28',
   0.14603295351547324,
   0.15804020100502514,
   0.2,
   0.25,
   0.15,
   0.1021058897942479],
  ['2018-03-31',
   0.14524634209504073,
   0.16062455132806888,
   0.2,
   0.25,
   0.15,
   0.09790899281913903],
  ['2018-04-30',
   0.14832480901204184,
   0.16916726489590814,
   0.2,
   0.25,
   0.15,
   0.0991777356103731],
  ['2018-05-31',
   0.14315842289265818,
   0.1660445082555635,
   0.2,
   0.25,
   0.15,
   0.10142873088514331],
  ['2018-06-30',
   0.13855528939531261,
   0.1811557788944724,
   0.2,
   0.25,
   0.15,
   0.10078877850950614],
  ['2018-07-31',
   0.13787064612197322,
   0.17602297200287154,
   0.2,
   0.25,
   0.15,
   0.10415969044164146],
  ['2018-08-31',
   0.13821053994561688,
   0.1795046661880833,
   0.2,
   0.25,
   0.15,
   0.108010566655

In [79]:
my_timeseries['prc'].fillna(method='bfill',inplace=True)
port_rtn = (my_timeseries['prc'].iloc[-1] / my_timeseries['prc'].iloc[0] - 1)
final_result['portfolio_anal']['label'] = port_rtn.index.tolist()
final_result['portfolio_anal']['dataset'] = port_rtn.values.tolist()


In [80]:
final_result['portfolio_anal']

{'label': ['069500', '261220', '332940', '371450', 'HERO', 'SPY'],
 'dataset': [0.3935646769390133,
  -0.5716678631251495,
  0.7341708542713568,
  0.061316051844466646,
  1.182967398536261,
  0.5310116456449754]}

In [62]:
my_timeseries['prc']

Unnamed: 0_level_0,069500,261220,332940,371450,HERO,SPY
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,30892.0,20895.0,,,,268.77
2018-01-03,31023.0,20770.0,,,,270.47
2018-01-04,30780.0,21380.0,,,,271.61
2018-01-05,31197.0,21320.0,,,,273.42
2018-01-08,31408.0,21180.0,,,,273.92
...,...,...,...,...,...,...
2021-04-05,43130.0,9095.0,17390.0,10600.0,32.49,406.36
2021-04-06,43190.0,8945.0,17430.0,10550.0,32.58,406.12
2021-04-07,43295.0,8925.0,17455.0,10590.0,32.41,406.59
2021-04-08,43255.0,8890.0,17375.0,10575.0,32.87,408.52


In [48]:
z = np.diff(y, axis=0)
a = z.tolist()

In [49]:
a

[0.2626955509185791,
 0.036902427673339844,
 0.03091597557067871,
 0.03690838813781738]

In [41]:
z

array(['시작', '테스트데이터 로드', '시계열 데이터 전처리', '지수산출', '시계열분석'], dtype='<U18')

In [43]:
np.concatenate((z,y))

array(['시작', '테스트데이터 로드', '시계열 데이터 전처리', '지수산출', '시계열분석',
       '1620706190.354424', '1620706190.6171196', '1620706190.654022',
       '1620706190.684938', '1620706190.7218463'], dtype='<U32')

In [47]:
for x in np.nditer(z):
    print( x )

0.2626955509185791
0.036902427673339844
0.03091597557067871
0.03690838813781738
