# 신고가 경신종목 분석
신고가 갱신종목의 양상에 따른 주가 횡보 분석

## 개요
신고가는 과거대비 가장 높은 가격을 뜻하며 보통 52주 신고가 등 기간을 한정에 많이 사용한다.<br>
또한 가장 강한 시세를 가지고 있음은 그만큼 투자자들의 관심도도 큼을 뜻한다.<br>
그냥해서는 안될거같고 좀 부가적인 요소들을넣어보자<br>
쓰토캐스틱같은거도

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy
import os

In [2]:
from Stock import importData

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt
import cufflinks as cf
import plotly.graph_objs as go
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [4]:
new_low = pd.read_csv("data (2).csv")

In [5]:
new_low['현재가'] = new_low['현재가'].str.replace(',','').astype(int)

In [6]:
new_low['가격(원)'] = new_low['가격(원)'].str.replace(',','').astype(int)

In [7]:
new_low['현재가'] / new_low['가격(원)']

0      1.068063
1      1.012376
2      1.011211
3      1.022535
4      1.083333
         ...   
177    1.845304
178    1.250000
179    1.798780
180    1.093182
181    1.119093
Length: 182, dtype: float64

## Use kospi data

In [124]:
def normalize(arr):
    return (arr-np.mean(arr))/np.std(arr)

In [8]:
PASSWORD=os.environ['SQL_PWD']

In [9]:
sql = importData.FromMysql('stock_price', PASSWORD)

In [10]:
codes = sql.all_codes()

In [180]:
df['macd'] = get_macd(df)

In [184]:
def get_macd(df, l=26, s=12):
    macd = df.Close.rolling(window=s, min_periods=s).mean() - df.Close.rolling(window=l, min_periods=s).mean()
    return macd

In [147]:
def get_stochastic(df, n=15, m=5, t=3):
    # https://excelsior-cjh.tistory.com/111
    # 입력받은 값이 dataframe이라는 것을 정의해줌
    df = pd.DataFrame(df)
    # n일중 최고가
    ndays_high = df.High.rolling(window=n, min_periods=n//2).max()  # min_periods안잡으면 NA값이 나온다
    # n일중 최저가
    ndays_low = df.Low.rolling(window=n, min_periods=n//2).min()
 
    # Fast%K 계산
    kdj_k = ((df.Close - ndays_low) / (ndays_high - ndays_low))*100
    # Fast%D (=Slow%K) 계산
    kdj_d = kdj_k.ewm(span=m).mean()
    # Slow%D 계산
    kdj_j = kdj_d.ewm(span=t).mean()
    return {"kdj_k": kdj_k, "kdj_d": kdj_d, "kdj_j": kdj_j}

In [75]:
def get_stochastic_volume(df, n=15, m=5, t=3):
    # https://excelsior-cjh.tistory.com/111
    # 입력받은 값이 dataframe이라는 것을 정의해줌
    df = pd.DataFrame(df)
    # n일중 최고가
    df_rolling = df.Volume.rolling(window=n, min_periods=1)
    ndays_high = df_rolling.max()
    # n일중 최저가
    ndays_low = df_rolling.min()
    
    # Fast%K 계산
    kdj_k = ((df.Volume - ndays_low) / (ndays_high - ndays_low))*100
    # Fast%D (=Slow%K) 계산
    kdj_d = kdj_k.ewm(span=m).mean()
    # Slow%D 계산
    kdj_j = kdj_d.ewm(span=t).mean()
    return {"kdj_k": kdj_k, "kdj_d": kdj_d, "kdj_j": kdj_j}

In [13]:
def get_high_idx(df, term, next_n):
    highs = []
    idx = term  # 52주 신고가
    n = len(df)
    
    while idx < n-1:
        df_cut = df['Close'][idx-term:idx+1]
        if df_cut.max() == df_cut.iloc[-1]:
            highs.append(idx)
            idx += next_n
        idx += 1
        
    return np.array(highs)

In [14]:
def get_low_idx(df, term, next_n):
    highs = []
    idx = term  # 52주 신고가
    n = len(df)
    
    while idx < n:
        df_cut = df['Close'][idx-term:idx]
        if df_cut.min() == df_cut.iloc[-1]:
            highs.append(idx)
            idx += next_n
        idx += 1
        
    return np.array(highs)

In [138]:
def get_ratio(df, code, idxes, later):
    '''
    later일 후 주가(종가) 변화량
    '''
    n = len(df)
    idxes_next = idxes + later
    idxes = idxes[idxes_next < n]
    idxes_buy = idxes + 1  #다음날 종가에 매수
    idxes_next = idxes_next[idxes_next < n]

    dates = df.iloc[idxes]['Date'].tolist() 
    codes = [code] * len(idxes)
    stochas = df.iloc[idxes]['stoch_close'].tolist()
    volumes = df.iloc[idxes]['stoch_volume'].tolist()
    macd = df.iloc[idxes]['macd'].tolist()
    ratios = (df.iloc[idxes_next]['Close'].values / df.iloc[idxes_buy]['Close']).tolist()
    
    return dates, stochas, volumes, macd, codes, ratios

In [16]:
# df = sql.extract_df(codes[1])
# df = get_stochastic(df)

## 스토캐스틱 + 신저가 or 고가

In [131]:
code = codes[0]

In [186]:
df = sql.extract_df(code)
df['stoch_close'] = get_stochastic(df)['kdj_j']
df['stoch_volume'] = get_stochastic_volumne(df)['kdj_j']
df['macd'] = get_macd(df) / df['Close']
highs = get_high_idx(df, 250, 20)

In [141]:
highs_list = get_ratio(df, code, highs, 10)

In [142]:
for idx, k in enumerate(result_high.keys()):
        result_high[k].extend(highs_list[idx])

In [215]:
result_high = {'date': [],
                      'stochastic': [],
                       'volume':[],
                       'macd':[],
                       'code':[],
                      'ratio': []}

In [216]:
result_low = copy.deepcopy(result_high)

In [217]:
later = 20
for code in tqdm(codes):
    df = sql.extract_df(code)
#    df = df[df['Date'] >= '2019']
    df['stoch_close'] = get_stochastic(df)['kdj_j']
    df['stoch_volume'] = get_stochastic_volume(df)['kdj_j']
    df['macd'] = get_macd(df) / df['Close']
    highs = get_high_idx(df, 250, 20)
    lows = get_low_idx(df, 250, 20)
    
    highs_list = get_ratio(df, code, highs, later)
    lows_list = get_ratio(df, code, lows, later)
    
    for idx, k in enumerate(result_high.keys()):
        result_high[k].extend(highs_list[idx])
        result_low[k].extend(lows_list[idx])
        

100%|██████████| 799/799 [07:03<00:00,  1.89it/s]


In [218]:
df_high = pd.DataFrame(result_high)

In [219]:
df_low = pd.DataFrame(result_low)

In [220]:
df_high.corr()

Unnamed: 0,stochastic,volume,macd,ratio
stochastic,1.0,0.058845,0.406575,-0.01065
volume,0.058845,1.0,-0.119202,-0.006244
macd,0.406575,-0.119202,1.0,-0.046167
ratio,-0.01065,-0.006244,-0.046167,1.0


In [221]:
df_low.corr()

Unnamed: 0,stochastic,volume,macd,ratio
stochastic,1.0,-0.075613,0.322021,-0.057822
volume,-0.075613,1.0,0.01075,0.042914
macd,0.322021,0.01075,1.0,-0.211405
ratio,-0.057822,0.042914,-0.211405,1.0


In [222]:
df_low.sort_values("ratio")

Unnamed: 0,date,stochastic,volume,macd,code,ratio
1098,2016-05-02,71.698754,0.165166,0.000000,c001470,0.394475
4070,2018-12-12,20.068134,73.633013,-0.018719,c007460,0.418573
983,2020-02-24,52.290875,48.596273,-0.013549,c001380,0.427742
286,2020-02-24,35.728130,49.896944,-0.022952,c000370,0.443956
1747,2020-02-24,12.764110,13.032538,-0.014627,c002690,0.446532
...,...,...,...,...,...,...
8347,2016-12-23,11.581567,10.198204,-0.171760,c071970,2.343225
1936,2012-12-14,14.814205,63.925034,-0.056647,c002990,2.426301
897,2015-06-03,15.015077,25.688272,-0.022270,c001260,2.635779
3131,2011-09-09,25.589820,11.570599,-0.034265,c005110,2.823256


In [223]:
df_high.iplot(kind='scatter', mode='markers', x='macd', y='ratio')

In [224]:
df_low.iplot(kind='scatter', mode='markers', x='macd', y='ratio')

In [238]:
df_high.to_csv("new_high_210204.csv", index=False)
df_low.to_csv("new_low_210204.csv", index=False)


In [None]:
#check
code = 'c000640'
df = sql.extract_df(code)
df['stoch_close'] = get_stochastic(df)['kdj_j']
df['stoch_volume'] = get_stochastic_volumne(df)['kdj_j']

In [None]:
# trash
#np.concatenate((a,b), axis=0)