# Data Crawling
## Install and import packages

In [1]:
#!pip install vnstock pandas requests bs4

In [2]:
from vnstock import *
from datetime import datetime
import pandas as pd

In [17]:
df_companies = listing_companies(live=True)

company_symbols = df_companies["ticker"]
print(company_symbols)

0       A32
1       AAA
2       AAM
3       AAS
4       AAT
       ... 
1581    XPH
1582    YBC
1583    YBM
1584    YEG
1585    YTC
Name: ticker, Length: 1586, dtype: object


In [31]:
def capture_stock_data(symbol, capture_date):
    date_string = capture_date.strftime("%Y-%m-%d")

    df = stock_historical_data(
        symbol=symbol,
        start_date=date_string,
        end_date=date_string,
        resolution='1D',
        type='stock',
        beautify=True,
        decor=True,
        source='DNSE'
    )

    return df


In [40]:
from requests.exceptions import ConnectTimeout
import time
import logging

logging.basicConfig(filename='error_log.txt', level=logging.ERROR)

def capture_all_stock_data(capture_date, max_retries=3, retry_delay=20):
    df_companies = listing_companies(live=True)
    company_symbols = df_companies["ticker"]

    all_data = {}
    total_symbols = len(company_symbols)

    for i, symbol in enumerate(company_symbols, start=1):
        retries = 0
        while retries < max_retries:
            try:
                df = capture_stock_data(symbol, capture_date)
                all_data[symbol] = df
                break
            except ConnectTimeout as e:
                error_message = f"Connection timeout for symbol {symbol}: {e}"
                logging.error(error_message)
                retries += 1
                if retries < max_retries:
                    retry_message = f"Retrying in {retry_delay} seconds..."
                    logging.error(retry_message)
                    print(retry_message)
                    time.sleep(retry_delay)
                else:
                    max_retry_message = f"Max retries exceeded for symbol {symbol}. Skipping."
                    logging.error(max_retry_message)
                    print(max_retry_message)
                    break 

        progress_message = f"Processed {i}/{total_symbols} symbols"
        print(progress_message)

    combined_df = pd.concat(all_data.values(), keys=all_data.keys(), names=['Symbol'])

    date_string = capture_date.strftime("%Y-%m-%d")
    filename = f'output_{date_string}.csv'

    combined_df.to_csv(filename, index=True)

    return combined_df


In [41]:
my_date = datetime(2023, 12, 22)

all_stock_data = capture_all_stock_data(my_date)

print(all_stock_data)

Processed 1/1586 symbols
Processed 2/1586 symbols
Processed 3/1586 symbols
Processed 4/1586 symbols
Processed 5/1586 symbols
Processed 6/1586 symbols
Processed 7/1586 symbols
Processed 8/1586 symbols
Processed 9/1586 symbols
Processed 10/1586 symbols
Processed 11/1586 symbols
Processed 12/1586 symbols
Processed 13/1586 symbols
Processed 14/1586 symbols
Processed 15/1586 symbols
Processed 16/1586 symbols
Processed 17/1586 symbols
Processed 18/1586 symbols
Processed 19/1586 symbols
Processed 20/1586 symbols
Processed 21/1586 symbols
Processed 22/1586 symbols
Processed 23/1586 symbols
Processed 24/1586 symbols
Processed 25/1586 symbols
Processed 26/1586 symbols
Processed 27/1586 symbols
Processed 28/1586 symbols
Processed 29/1586 symbols
Processed 30/1586 symbols
Processed 31/1586 symbols
Processed 32/1586 symbols
Processed 33/1586 symbols
Processed 34/1586 symbols
Processed 35/1586 symbols
Processed 36/1586 symbols
Processed 37/1586 symbols
Processed 38/1586 symbols
Processed 39/1586 sym