# 1. Library

In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests
import pickle

In [3]:
def make_url(sosok=0, page_size=10, page=1):
    return "http://m.stock.naver.com/api/json/sise/siseListJson.nhn?\menu=market_sum&sosok="+str(sosok)+"&pageSize="+str(page_size)+"&page="+str(page)

In [4]:
def get_data(url):
    """
    URL을 받으면 json으로 데이터를 가져와서 pandas dataframe 로 만들어주는 함수
    """
    response = requests.get(url)   # response라는 obj. 안에 결과 데이터를 받아옴
    json_info = response.json()    # json함수: 문자열 데이터를 json 형태로 parsing
    companies = json_info["result"]["itemList"]
    df = pd.DataFrame(columns = ["종목","시세","전일비","등락율","시가총액","거래량"])
    for company in companies:
        df.loc[len(df)] = {
            "종목":company["nm"],
            "시세":company["nv"],
            "전일비":company["cv"],
            "등락율":company["cr"],
            "시가총액":company["mks"],
            "거래량":company["aq"]
        }
    return df

# 2. Crawling

In [5]:
url = make_url(0, 10000, 1)
df = get_data(url)
df.tail()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
1519,유유제약2우B,18200,-350,-1.89,30,1193
1520,KODEX WTI원유선물인버스(H),14210,-225,-1.56,28,1682
1521,동양3우B,30450,-1000,-3.18,27,2489
1522,ARIRANG 심천차이넥스트(합성),6665,20,0.3,20,108
1523,파워 단기채,105350,5,0.0,13,400


In [6]:
url = make_url(1, 10000, 1)
df2 = get_data(url)
df2.tail()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
1323,한화에이스스팩3호,2070,-15,-0.72,64,7440
1324,미래에셋대우스팩2호,2090,0,0.0,64,3942
1325,에프티이앤이,87,0,0.0,59,0
1326,소프트센우,23150,350,1.54,33,1254
1327,모다,155,0,0.0,33,0


In [7]:
url = make_url(2, 10000, 1)
df3 = get_data(url)
df3.tail()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
95,GS리테일,38700,300,0.78,29799,257793
96,현대차2우B,80400,-1300,-1.59,29334,158351
97,오렌지라이프,35700,300,0.85,29274,241504
98,금호석유,95700,-2600,-2.64,29158,141579
99,제일기획,25050,50,0.2,28818,330995


In [8]:
df = pd.concat([df, df2, df3], axis=0)

In [9]:
df.reset_index(inplace=True, drop=False)

In [10]:
del df['index']

In [11]:
df.tail()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
2947,GS리테일,38700,300,0.78,29799,257793
2948,현대차2우B,80400,-1300,-1.59,29334,158351
2949,오렌지라이프,35700,300,0.85,29274,241504
2950,금호석유,95700,-2600,-2.64,29158,141579
2951,제일기획,25050,50,0.2,28818,330995


## 3. Data Exploration

In [12]:
df = df.sort_values('거래량', ascending=False)

### 가장 거래가 많은 주식

In [13]:
df.drop_duplicates(inplace=True, subset='종목')

In [14]:
df.reset_index(inplace=True, drop=True)

In [17]:
df.head()

Unnamed: 0,종목,시세,전일비,등락율,시가총액,거래량
0,한창,3200,-940,-22.71,1113,66722981
1,코디엠,969,-66,-6.38,1372,42275554
2,토박스코리아,1495,-75,-4.78,588,36240372
3,SDN,1700,-165,-8.85,709,34979556
4,팬스타엔터프라이즈,1090,-335,-23.51,484,34745395
