In [None]:
import requests
import json
import os
import csv

In [None]:
stock_path = ['Industry_List.csv', 'Stock_List.csv']
for a in stock_path:
    if os.path.exists(a):
        os.remove(a)

In [None]:
def get_web(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.27 Safari/537.36 Edg/99.0.1150.16'}
    response = requests.get(url, headers=headers)
    return response

In [None]:
def get_industry_list():
    url = 'https://finfo-api.vndirect.com.vn/v4/industry_classification?industryLevel'
    data = get_web(url).json()
    page_size = data['totalElements']
    
    #Resize array
    url = url + '&size=' + str(page_size)
    data = get_web(url).json()

    cate_list = []
    i = 0
    for d in data['data']:
        try:
            ind_id = d['industryCode']
            ind_lvl = d['industryLevel']
            parent_ind_id = 0 if 'higherLevelCode' not in d else d['higherLevelCode']
            ind_name_eng = d['englishName']
            ind_name_vie = d['vietnameseName']
            element_count = d['totalCount']

            cate_list.append([ind_id, ind_lvl, parent_ind_id, ind_name_eng, ind_name_vie, element_count])
        except Exception as err:
            print(err)
    
    return cate_list

In [None]:
def get_stock_list():
    url = 'https://finfo-api.vndirect.com.vn/v4/stocks?q=type:stock,ifc~floor:HOSE,HNX,UPCOM'
    data = get_web(url).json()
    page_size = data['totalElements']
    
    #Resize array
    url = url + '&size=' + str(page_size)
    data = get_web(url).json()

    cate_list = []
    for d in data['data']:
        try:
            stock_type = d['type']
            stock_floor = d['floor']
            stock_code = d['code']
            stock_stt = d['status']
            listed_date = d['listedDate']
            corp_id = d['companyId']
            corp_name_eng = None if 'companyNameEng' not in d else d['companyNameEng']
            corp_name_vie = d['companyName']
            
            #industry
            industry_url = 'https://finfo-api.vndirect.com.vn/v4/industry_classification?q=codeList:' + stock_code + '~industryLevel:4'
            industry_data = get_web(industry_url).json()
            for a in industry_data['data']:
                ind_id = a['industryCode']
                ind_name_eng = a['englishName']
                ind_name_vie = a['vietnameseName']

            cate_list.append([stock_type, stock_floor, stock_code, stock_stt, listed_date, corp_id, corp_name_eng, corp_name_vie, ind_id, ind_name_eng, ind_name_vie])
        except Exception as err:
            print(err)

    return cate_list

In [None]:
%%time

industry_header = [['industry_id', 'industry_level', 'industry_parent_id', 'industry_name_eng', 'industry_name_vie', 'element_count']]
industry_detail = get_industry_list()

with open(stock_path[0], 'w', newline='', encoding='utf-8-sig') as f:
    write = csv.writer(f)
    write.writerows(industry_header)
    write.writerows(industry_detail)
    
print('Crawl done!')

In [None]:
%%time

stock_header = [['stock_type', 'stock_floor', 'stock_code', 'stock_status', 'listed_date', 'corp_id', 'corp_name_eng', 'corp_name_vie', 'industry_id', 'industry_name_eng', 'industry_name_vie']]
stock_detail = get_stock_list()

with open(stock_path[1], 'w', newline='', encoding='utf-8-sig') as f:
    write = csv.writer(f)
    write.writerows(stock_header)
    write.writerows(stock_detail)
    
print('Crawl done!')