In [1]:
%matplotlib inline

In [91]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import data, wb
import datetime
import pickle
import os
from scipy import fftpack
import seaborn as sns
from matplotlib.font_manager import FontProperties
from IPython.display import display
import naver_kospi_api as nka

In [8]:
plt.style.use('seaborn-notebook')
#sns.set_style("whitegrid")
#sns.set_style("ticks")
sns.set_style("dark")

plt.rc('font', family='Noto Sans Korean')

# 데이터 정리

네이버 시가총액 데이터는 내림차순으로 정렬되어 출력되므로 날짜별로 종목의 row 가 바뀔 수 있다

In [119]:
df1=nka.load_naver_kospi_data('naver_kospi_volume_160721_200825.pickle')
df2=nka.load_naver_kospi_data('naver_kospi_volume_160725_123436.pickle')

# 종목명으로 정렬
df1.sort_values('종목명', inplace=True)
df2.sort_values('종목명', inplace=True)

# 종목명이 같은지 확인
dd=df2['종목명']==df1['종목명']
display(dd.describe())

count     1218
unique       1
top       True
freq      1218
Name: 종목명, dtype: object

# 시가총액

$$KOSPI = \frac{비교시점 시가총액}{기준시점(1980. 1. 4) 시가총액} \times 100$$

## 시가총액 변동량

In [159]:
s1=df1['시가총액'].sum()
s2=df2['시가총액'].sum()

print '시가총액(Prev)  : %d' % s1
print '시가총액(Curr)  : %d' % s2
print '시가총액(Delta) : %d' % (s2-s1)

# 시가총액 변동량
df_diff=df2['시가총액'] - df1['시가총액']
#df_dummy=pd.DataFrame([0]*len(df1), columns=['기여도'])

df_new=pd.concat([df1['종목명'], df_diff, df_diff], axis=1)
df_new.columns=['종목명', '시가총액변동금액', '기여도(%)']
df_ratio=df_new['기여도(%)'].apply(lambda x : x / s2 * 100)
df_new['기여도(%)']=df_ratio
display(df_new.head())

시가총액(Prev)  : 13081507
시가총액(Curr)  : 13052600
시가총액(Delta) : -28907


Unnamed: 0,종목명,시가총액변동금액,기여도(%)
0,삼성전자,-62907.0,-0.48195
1,한국전력,3210.0,0.024593
2,현대차,11014.0,0.084382
3,삼성전자우,-4198.0,-0.032162
4,삼성물산,3211.0,0.0246


## 오른 금액, 내린 금액(억)

In [160]:
k='시가총액변동금액'
df_up=df_new[df_new[k] > 0]
df_down=df_new[df_new[k] < 0]
print '+%d, %d' % (df_up[k].sum(), df_down[k].sum())
print '%d' % (df_up[k].sum()+df_down[k].sum())

+88361, -117268
-28907


## 누가 올랐나? 누가 내렸나? (T억이상)

In [167]:
T=1000

display(df_new[df_new[k] >= T].sort_values(k, ascending=False))
display(df_new[df_new[k] <= -T].sort_values(k, ascending=True))

df_new['기여도(%)'].sum()

Unnamed: 0,종목명,시가총액변동금액,기여도(%)
2,현대차,11014.0,0.084382
19,KB금융,7373.0,0.056487
18,SK이노베이션,6826.0,0.052296
12,SK텔레콤,5652.0,0.043302
11,신한지주,4267.0,0.032691
34,삼성SDI,3863.0,0.029596
22,삼성에스디에스,3482.0,0.026677
4,삼성물산,3211.0,0.0246
1,한국전력,3210.0,0.024593
10,POSCO,3051.0,0.023375


Unnamed: 0,종목명,시가총액변동금액,기여도(%)
0,삼성전자,-62907.0,-0.48195
6,아모레퍼시픽,-5846.0,-0.044788
16,LG화학,-4308.0,-0.033005
3,삼성전자우,-4198.0,-0.032162
21,아모레G,-3990.0,-0.030569
68,유한양행,-2119.0,-0.016234
60,에스원,-1484.0,-0.011369
26,롯데케미칼,-1430.0,-0.010956
15,KT&G,-1373.0,-0.010519
24,LG디스플레이,-1252.0,-0.009592


-0.22146545515835955

# 전체 시가총액에서 얼만큼 비중을 차지했나?

In [199]:
df11=df1['시가총액'].apply(lambda x : x / df1['시가총액'].sum())
#df11.sort_values(inplace=True, ascending=False)
#df11.head()

In [191]:
# 시가총액 비율
df11=df1['시가총액'].apply(lambda x : x / df1['시가총액'].sum())
df22=df2['시가총액'].apply(lambda x : x / df2['시가총액'].sum())
df=df22-df11
df3=pd.concat([df1['종목명'], df], axis=1)
df3.columns=['종목명', '시가총액비']
df3.sort_values('시가총액비', inplace=True, ascending=False)
df3

Unnamed: 0,종목명,시가총액비
2,현대차,0.000892
19,KB금융,0.000587
18,SK이노베이션,0.000546
12,SK텔레콤,0.000463
11,신한지주,0.000358
1,한국전력,0.000312
34,삼성SDI,0.000308
4,삼성물산,0.000289
22,삼성에스디에스,0.000286
10,POSCO,0.000266


In [93]:
#df1[['종목명', '시가총액']]
df1.head()

Unnamed: 0,N,종목명,현재가,전일비,등락률,액면가,시가총액,상장주식수,외국인비율,거래량,PER,ROE,토론실
0,1,삼성전자,1543000,3000,+0.19%,5000,2206017.0,142969,50.75,159014,14.04,11.16,
1,2,한국전력,61200,800,+1.32%,5000,392882.0,641964,33.0,748355,2.96,22.11,
2,3,현대차,130000,500,+0.39%,5000,286359.0,220276,43.09,434610,5.78,10.72,
3,4,삼성전자우,1240000,2000,-0.16%,5000,261186.0,21063,77.97,31341,11.28,,
4,5,삼성물산,133000,1500,-1.12%,100,252288.0,189690,7.76,255708,7.45,25.22,
