### 거래주체(기관, 외인)과 주가의 관계
- 가설1 : 외인, 기관의 매수, 매도량이 급증했을 때 주가가 상승 or 하락?

In [1]:
import pandas as pd
import numpy as np
import time
import pickle
from datetime import datetime
from tqdm import tqdm
#from scipy.stats.mstats import gmean

In [2]:
import pymysql
from sqlalchemy import create_engine
from pandas.io import sql

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt
import cufflinks as cf
import plotly.graph_objs as go
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [26]:
from Stock.importData import FromMysql 

In [5]:
dir(importData)

['FromMysql',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'pd',
 'pymysql']

In [10]:
PASSWORD=""

### Preprocessing

In [6]:
class Preprocessor:
    '''
    데이터의 scaling과 관련된 메소드
    '''
    def normalize(arr):
        #     sign = np.sign(arr)
        #     arr = np.log(np.abs(arr)+1)*sign
        return (arr-np.mean(arr))/np.std(arr)
    
    def minmax(arr):
        return((arr-np.min(arr))/(np.max(arr)-np.min(arr)))

In [None]:
FromMysql

In [28]:
conStock = FromMysql('stock', PASSWORD)
conPrice = FromMysql('stock_price', PASSWORD)

In [33]:
class TradeStradegy:
    '''
    mergePrice: 기관, 외인 데이터베이스로부터 추출된 데이터프레임과
    주가 데이터베이스로부터 추출된 데이터프레임 결합
    
    stradegy1Condition: 기관, 외인 순매매량의 이상치 index 반환
    
    stradegy1: 위의 index를 기준으로 n일 후 종가 변화량들 반환
    '''
    def __init__(self, code, criteria=2.5, window=10, dayLater=30, who='inst_sum'):
        
        self.code = code
        self.criteria = criteria
        self.window = window
        self.dayLater = dayLater
        self.who = who
        self.df = self.mergePrice()
        
    def mergePrice(self):
        global conStock, conPrice
        
        dfWho = conStock.extract_df(self.code)
        dfPrice = conPrice.extract_df(self.code)
        dfWho['Date'] = pd.to_datetime(dfWho['Date'])
        dfWho['volumePrice'] = dfWho[self.who]*dfWho['Close']
        dfWho = dfWho.drop(columns = ['Close','Change','Volume'])
        df = dfWho.merge(dfPrice,how='left',on='Date')
        df = df[::-1].reset_index(drop=True)
        return(df)
    
    
    def stradegy1Condition(self):
        ppsr = Preprocessor
        self.df['volumePriceRolling'] = ppsr.normalize(self.df['volumePrice'].rolling(window=self.window).mean())
        if(self.criteria>0):
            plusIdx = self.df[self.df['volumePriceRolling']>self.criteria].index.values
        else:
            plusIdx = self.df[self.df['volumePriceRolling']<self.criteria].index.values
        
        if(len(plusIdx)==0):
            return np.array([])
        uniqueIdx = [plusIdx[0]]
        for i in range(1,len(plusIdx)):
            if(plusIdx[i] - plusIdx[i-1] > 5):
                uniqueIdx.append(plusIdx[i])
        return np.array(uniqueIdx)
    
    def stradegy1(self): #or inst_sum
        uniqueIdx = self.stradegy1Condition()
        uniqueIdxLater = uniqueIdx + self.dayLater

        uniqueIdx = uniqueIdx[uniqueIdxLater<len(self.df)]
        uniqueIdxLater = uniqueIdxLater[uniqueIdxLater<len(self.df)]
        
        
        dfOrigin = self.df.iloc[uniqueIdx,:]
        dfLater = self.df.iloc[uniqueIdxLater,:]
        return (dfLater['Close'].values/dfOrigin['Close'].values,
                np.random.choice(self.df[self.dayLater:]['Close'].values / self.df[:-1*self.dayLater]['Close'].values, len(uniqueIdx)))
#        return (1-np.mean(dfSelected['Close']/dfSelected['Open']), len(dfSelected))

### 간단한 EDA

In [34]:
codes = importData.FromMysql('stock', PASSWORD).all_codes()

In [35]:
tss = TradeStradegy('c005930') #대한항공의 주식코드
df = tss.mergePrice()
dfPlot = df[['Date','Close','inst_sum','foreign_sum']]

In [36]:
dfPlot.iplot(x='Date', y=['inst_sum','foreign_sum'], title = "기관, 외국인의 일일 순매매량")

In [37]:
dfPlot1 = dfPlot[::-1]
dfPlot1.iloc[:,2:] = dfPlot1.iloc[:,2:].cumsum()

#scaling for plotting
for i in range(1,dfPlot1.shape[1]):
        dfPlot1.iloc[:,i] = Preprocessor.minmax(dfPlot1.iloc[:,i])

In [38]:
dfPlot1.iplot(x='Date', y=['inst_sum','foreign_sum','Close'], title = "기관, 외국인 누적 순매매량(Scaled) + 종가(삼성전자)")

### 통계적 검정

### Strategy1
외국인 or 기관의 매도량이 급증 or 급락했을때 n일 후 주가의 변화


In [39]:
tss = TradeStradegy('c005930', 2.5, 10, 30, "inst_sum")
df = tss.mergePrice()

fig = df.iplot(x='Date', y='volumePrice', asFigure=True, title='삼성전자 기관 순매매량')
fig.show()

In [40]:
plusIdx = tss.stradegy1Condition()
dfSelected = tss.stradegy1()
np.mean(dfSelected)

1.0289405176441555

In [41]:
ppsr = Preprocessor
df['vPR'] = df['volumePrice'].rolling(window=10).mean()
df['vPRNM'] = ppsr.normalize(df['vPR'])

fig = df['vPRNM'].iplot(asFigure=True, title='삼성전자 기관 순매매량(10일 이동평균 + 정규화)')
for i in plusIdx:
   # fig.add_scatter(x=[i]*100, y=np.linspace(-100000,100000,100), opacity=0.3)
    fig.add_trace(go.Scatter(
    x=[i]*100, y=np.linspace(df['vPRNM'].min(),df['vPRNM'].max(),100),
    opacity=0.3,
    marker_color='rgba(0, 0, 158, .8)'
))
fig.show()

In [42]:
df['vPRMM'] = ppsr.minmax(df['vPRNM'])
df['CloseMM'] = ppsr.minmax(df['Close'])

In [44]:
fig = df.iplot(asFigure=True, y='CloseMM', title = "삼성전자 기관 순매매량 급증시기와 종가")
for i in plusIdx:
   # fig.add_scatter(x=[i]*100, y=np.linspace(-100000,100000,100), opacity=0.3)
    fig.add_trace(go.Scatter(
    x=[i]*100, y=np.linspace(df['vPRMM'].min(),df['vPRMM'].max(),100),
    opacity=0.3,
    marker_color='rgba(0, 0, 158, .8)'
))
fig.show()

In [82]:
result = np.array([])
resultRandom = np.array([])
for code in tqdm(codes):
    tss = TradeStradegy(code, 2.5, 10, 30, "inst_sum")
    stdg1 = tss.stradegy1()
    changeSelected = stdg1[0]
    changeRandom = stdg1[1]
    result = np.append(result, changeSelected)
    resultRandom = np.append(resultRandom, changeRandom)

100%|██████████| 799/799 [01:24<00:00,  9.47it/s]


In [45]:
# len(result)
# np.mean(result)

In [None]:
n = 30
allCases = np.array([])
for code in tqdm(codes):
    df = extractDf(conPrice.cursor(), codes[0])
    allCases = np.append(allCases, df[n:]['Close'].values/df[:-n]['Close'].values)
    

In [96]:
# df = pd.DataFrame({"mean": meanList, "gmean": gmeanList})

# df.iplot(kind='histogram', theme='white', title='temp')

In [97]:
allCases_ = allCases - 1
result_ = result - 1

In [98]:
n = len(result)
pd.DataFrame({"random": np.random.choice(allCases_, n), "selected": result_}).iplot(kind='histogram', theme='white', title='Strategy1 vs random')

In [119]:
# bootstrap

meanList = []
#gmeanList = []
for i in range(10000):
    temp = np.random.choice(allCases, n)
    meanList.append(np.mean(temp))
#    gmeanList.append(gmean(temp))

In [122]:
min(meanList)

1.0126067157474217

## Parameter tuning

In [20]:
# 40이 끝
criteriaList = [-2.5, 2.5]  # 2.5는 기관/외인 매매량 급증, -2.5는 급락신호
windowList = [10, 30]  # Moving average값(10 or 30)
dayLaterList = [1, 5, 10, 30, 60]  # n영업일 후 주가 변화
whoList = ['inst_sum', 'foreign_sum']  # inst_sum: 기관, foreign_sum: 외인
resultList = []
i=0
for cri in criteriaList:
    for win in windowList:
        for day in dayLaterList:
            for who in whoList:
                resultDic = {}
                resultDic['criteria'] = cri
                resultDic['window'] = win
                resultDic['dayLater'] = day
                resultDic['who'] = who
                print(i, end="-")
                i+=1
                result = np.array([])
                for code in codes:
                    tss = TradeStradegy(code, cri, win, day, who)
                    stdg1 = tss.stradegy1()
                    changeSelected = stdg1[0]
                    result = np.append(result, changeSelected)
                resultDic['result'] = result
                resultList.append(resultDic)
                
                

0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-29-30-31-32-33-34-35-36-37-38-39-

In [24]:
with open("./data_stored/resultList.pkl", "wb") as f:
    pickle.dump(resultList, f)

In [54]:
with open("./data_stored/resultList.pkl", "rb") as f:
    resultList = pickle.load(f)

In [61]:
for i in range(len(resultList)):
    resultList[i]['result'] = np.mean(resultList[i]['result'])

pd.DataFrame(resultList).sort_values("result").reset_index(drop=True)

Unnamed: 0,criteria,window,dayLater,who,result
0,2.5,30,60,foreign_sum,0.955323
1,2.5,10,60,foreign_sum,0.957551
2,-2.5,30,60,foreign_sum,0.958133
3,2.5,30,60,inst_sum,0.959598
4,-2.5,10,60,foreign_sum,0.960666
5,2.5,10,60,inst_sum,0.971618
6,2.5,10,30,foreign_sum,0.975157
7,2.5,30,30,foreign_sum,0.975741
8,-2.5,10,30,foreign_sum,0.977434
9,2.5,30,30,inst_sum,0.979941
