In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from io import BytesIO
import plotly.graph_objects as go
from datetime import datetime
import random
import os
import uuid
import concurrent.futures
import traceback
import time
# Saving the DataFrames
import pickle


In [5]:
def get_random_headers():
    ssi_headers = {
        'Accept': 'application/json',
        # Added comma after Accept-Language value
        'Accept-Language': 'en-US,en;q=0.9,vi-VN;q=0.8,vi;q=0.7',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'DNT': '1',
        'Origin': 'https://iboard.ssi.com.vn',
        'Referer': 'https://iboard.ssi.com.vn/',
        'sec-ch-ua': '"Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
        'sec-ch-ua-platform': 'Windows',
        'sec-ch-ua-mobile': '?0',
        'Sec-Fetch-Site': 'same-site',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Dest': 'empty',
        'User-Agent': '',  # Placeholder for User-Agent value
        'X-Fiin-User-ID': 'ID',
        'X-Fiin-Key': 'KEY',
        'X-Fiin-Seed': 'SEED',
    }

    user_agent_list = [
        # Chrome
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        # Firefox
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0',
        # Safari
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15',
        # Edge
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 Edg/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 Edg/97.0.1234.56',
        # Opera
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 OPR/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 OPR/97.0.1234.56',
        # Others
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
    ]

    user_agent = random.choice(user_agent_list)
    ssi_headers['User-Agent'] = user_agent

    supported_cache_directives = ['no-cache', 'max-age=0', 'private']
    cache_directive = random.choice(supported_cache_directives)
    ssi_headers['Cache-Control'] = cache_directive

    session_cookie = str(uuid.uuid4())
    ssi_headers['Cookie'] = f'session={session_cookie}'

    return ssi_headers


def url_prepare(tickers, reports, frequency):
    urls = ['https://fiin-fundamental.ssi.com.vn/FinancialStatement/Download{}?language=vi&OrganCode={}&Skip=0&Frequency={}'.format(report, ticker, frequency)
            for ticker in tickers for report in reports]
    keys = [f'Q_{ticker}_{report}' for ticker in tickers for report in reports]
    return urls, keys


def fetch_financial_statement(url, key):
    """To download the financial statement from SSI API

    Args:
        url (_str_): formatted API url with ticker, report type and frequency
        key (_str_): the name of report for example Q_SSI_incomestatement

    Returns:
        _dataframe_: financial statement with table format
    """
    try:
        response = requests.get(url, headers=get_random_headers())
        df = pd.read_excel(BytesIO(response.content),
                           skiprows=7, engine='openpyxl').dropna()
        print(f'Successfully fetched: {key}')
        return df
    except Exception as e:
        print(f'Error occurred while fetching {key}: {e}')
    time.sleep(7)

# OPTION 1
def fetch_batch_fs(tickers, reports, frequency, batch_size=10):
    """Download multiple financial statements from SSI, number and types of report of each ticker are listed in reports

    Args:
        tickers (_type_): Company stock exchange symbol
        reports (_type_): list of report type can include income statement, balance sheet, cash flow
        frequency (_type_): yearly or quarterly
        batch_size (int, optional): just a prevent of heat up the laptop Defaults to 10.

    Returns:
        _dict_: dictionary of financial statement with Q_ticker_reporttype as keys.
    """
    current_results = {}
    urls, keys = url_prepare(tickers, reports, frequency)

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures_map = {executor.submit(
            fetch_financial_statement, url, key): key for url, key in zip(urls, keys)}

        for future in concurrent.futures.as_completed(futures_map):
            key = futures_map[future]
            result = future.result()
            if result is not None:
                try:
                    current_results[key] = result
                except Exception as e:
                    print(f'Error occurred while storing: {e}')

    return current_results


#OPTION 2
def fetch_batch_fs_v0(tickers, reports, frequency, batch_size=30):
    current_rp = {}
    urls, keys = url_prepare(tickers, reports, frequency)
    total_urls = len(urls)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in range(0, total_urls, batch_size):
            batch_urls = urls[i:i+batch_size]
            batch_keys = keys[i:i+batch_size]
            futures = [executor.submit(fetch_financial_statement, url, key)
                       for url, key in zip(batch_urls, batch_keys)]

            for future, key in zip(concurrent.futures.as_completed(futures), batch_keys):
                result = future.result()
                if result is not None:
                    try:
                        current_rp[key] = result
                    except Exception as e:
                        print(f'Error occurred while storing: {e}')

            # Add a pause of 2 minutes after each batch
            if i + batch_size < total_urls:
                print(
                    f'Pausing for 2 minutes after batch {i // batch_size + 1}...')
                time.sleep(120)

    return current_rp


In [None]:
def get_random_headers():
    ssi_headers = {
        'Accept': 'application/json',
        # Added comma after Accept-Language value
        'Accept-Language': 'en-US,en;q=0.9,vi-VN;q=0.8,vi;q=0.7',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'DNT': '1',
        'Origin': 'https://iboard.ssi.com.vn',
        'Referer': 'https://iboard.ssi.com.vn/',
        'sec-ch-ua': '"Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
        'sec-ch-ua-platform': 'Windows',
        'sec-ch-ua-mobile': '?0',
        'Sec-Fetch-Site': 'same-site',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Dest': 'empty',
        'User-Agent': '',  # Placeholder for User-Agent value
        'X-Fiin-User-ID': 'ID',
        'X-Fiin-Key': 'KEY',
        'X-Fiin-Seed': 'SEED',
    }

    user_agent_list = [
        # Chrome
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        # Firefox
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0',
        # Safari
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15',
        # Edge
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 Edg/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 Edg/97.0.1234.56',
        # Opera
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 OPR/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 OPR/97.0.1234.56',
        # Others
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
    ]

    user_agent = random.choice(user_agent_list)
    ssi_headers['User-Agent'] = user_agent

    supported_cache_directives = ['no-cache', 'max-age=0', 'private']
    cache_directive = random.choice(supported_cache_directives)
    ssi_headers['Cache-Control'] = cache_directive

    session_cookie = str(uuid.uuid4())
    ssi_headers['Cookie'] = f'session={session_cookie}'

    return ssi_headers


def url_prepare(tickers, reports, frequency):
    urls = ['https://fiin-fundamental.ssi.com.vn/FinancialStatement/Download{}?language=vi&OrganCode={}&Skip=0&Frequency={}'.format(report, ticker, frequency)
            for ticker in tickers for report in reports]
    keys = [f'Q_{ticker}_{report}' for ticker in tickers for report in reports]
    return urls, keys


def fetch_financial_statement(url, key):
    try:
        response = requests.get(url, headers=get_random_headers())
        df = pd.read_excel(BytesIO(response.content),
                           skiprows=7, engine='openpyxl').dropna()
        print(f'Successfully fetched: {key}')
        return df
    except Exception as e:
        print(f'Error occurred while fetching {key}: {e}')
    time.sleep(7)

# OPTION 1
def fetch_batch_fs(tickers, reports, frequency, batch_size=10):
    current_rp = {}
    urls, keys = url_prepare(tickers, reports, frequency)
    total_urls = len(urls)

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures_map = {executor.submit(
            fetch_financial_statement, url, key): key for url, key in zip(urls, keys)}

        for future in concurrent.futures.as_completed(futures_map):
            key = futures_map[future]
            result = future.result()
            if result is not None:
                try:
                    current_rp[key] = result
                except Exception as e:
                    print(f'Error occurred while storing: {e}')

    return current_rp


#OPTION 2
def fetch_batch_fs_v0(tickers, reports, frequency, batch_size=30):
    current_rp = {}
    urls, keys = url_prepare(tickers, reports, frequency)
    total_urls = len(urls)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in range(0, total_urls, batch_size):
            batch_urls = urls[i:i+batch_size]
            batch_keys = keys[i:i+batch_size]
            futures = [executor.submit(fetch_financial_statement, url, key)
                       for url, key in zip(batch_urls, batch_keys)]

            for future, key in zip(concurrent.futures.as_completed(futures), batch_keys):
                result = future.result()
                if result is not None:
                    try:
                        current_rp[key] = result
                    except Exception as e:
                        print(f'Error occurred while storing: {e}')

            # Add a pause of 2 minutes after each batch
            if i + batch_size < total_urls:
                print(
                    f'Pausing for 2 minutes after batch {i // batch_size + 1}...')
                time.sleep(120)

    return current_rp


In [None]:
def get_random_headers():
    ssi_headers = {
        'Accept': 'application/json',
        # Added comma after Accept-Language value
        'Accept-Language': 'en-US,en;q=0.9,vi-VN;q=0.8,vi;q=0.7',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'DNT': '1',
        'Origin': 'https://iboard.ssi.com.vn',
        'Referer': 'https://iboard.ssi.com.vn/',
        'sec-ch-ua': '"Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
        'sec-ch-ua-platform': 'Windows',
        'sec-ch-ua-mobile': '?0',
        'Sec-Fetch-Site': 'same-site',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Dest': 'empty',
        'User-Agent': '',  # Placeholder for User-Agent value
        'X-Fiin-User-ID': 'ID',
        'X-Fiin-Key': 'KEY',
        'X-Fiin-Seed': 'SEED',
    }

    user_agent_list = [
        # Chrome
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        # Firefox
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0',
        # Safari
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15',
        # Edge
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 Edg/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 Edg/97.0.1234.56',
        # Opera
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36 OPR/98.0.1234.56',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36 OPR/97.0.1234.56',
        # Others
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.1234.56 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.1234.56 Safari/537.36',
    ]

    user_agent = random.choice(user_agent_list)
    ssi_headers['User-Agent'] = user_agent

    supported_cache_directives = ['no-cache', 'max-age=0', 'private']
    cache_directive = random.choice(supported_cache_directives)
    ssi_headers['Cache-Control'] = cache_directive

    session_cookie = str(uuid.uuid4())
    ssi_headers['Cookie'] = f'session={session_cookie}'

    return ssi_headers


def url_prepare(tickers, reports, frequency):
    urls = ['https://fiin-fundamental.ssi.com.vn/FinancialStatement/Download{}?language=vi&OrganCode={}&Skip=0&Frequency={}'.format(report, ticker, frequency)
            for ticker in tickers for report in reports]
    keys = [f'Q_{ticker}_{report}' for ticker in tickers for report in reports]
    return urls, keys


def fetch_financial_statement(url, key):
    try:
        response = requests.get(url, headers=get_random_headers())
        df = pd.read_excel(BytesIO(response.content),
                           skiprows=7, engine='openpyxl').dropna()
        print(f'Successfully fetched: {key}')
        return df
    except Exception as e:
        print(f'Error occurred while fetching {key}: {e}')
    time.sleep(7)

# OPTION 1
def fetch_batch_fs(tickers, reports, frequency, batch_size=10):
    current_rp = {}
    urls, keys = url_prepare(tickers, reports, frequency)
    total_urls = len(urls)

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures_map = {executor.submit(
            fetch_financial_statement, url, key): key for url, key in zip(urls, keys)}

        for future in concurrent.futures.as_completed(futures_map):
            key = futures_map[future]
            result = future.result()
            if result is not None:
                try:
                    current_rp[key] = result
                except Exception as e:
                    print(f'Error occurred while storing: {e}')

    return current_rp


#OPTION 2
def fetch_batch_fs_v0(tickers, reports, frequency, batch_size=30):
    current_rp = {}
    urls, keys = url_prepare(tickers, reports, frequency)
    total_urls = len(urls)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in range(0, total_urls, batch_size):
            batch_urls = urls[i:i+batch_size]
            batch_keys = keys[i:i+batch_size]
            futures = [executor.submit(fetch_financial_statement, url, key)
                       for url, key in zip(batch_urls, batch_keys)]

            for future, key in zip(concurrent.futures.as_completed(futures), batch_keys):
                result = future.result()
                if result is not None:
                    try:
                        current_rp[key] = result
                    except Exception as e:
                        print(f'Error occurred while storing: {e}')

            # Add a pause of 2 minutes after each batch
            if i + batch_size < total_urls:
                print(
                    f'Pausing for 2 minutes after batch {i // batch_size + 1}...')
                time.sleep(120)

    return current_rp


In [16]:
list_companies = pd.read_excel(
    r'C:\Users\Dell\Downloads\data_analytics\financial_project\list_of_companies.xlsx')

ticker_list = list_companies['ticker'].to_list()

tickers = ['ACB', 'BID', 'BVH', 'FPT', 'HDB', 'HPG', 'IJC', 'MSN', 'MBB', 'MWG', 'NVL', 'GAS', 'POW',
           'PDR', 'STB', 'SAB', 'SSI', 'TCB', 'TPB', 'VIB', 'VCB', 'VJC', 'VPB', 'VNM', 'VRE', 'VIC', 'VHM']

reports = ['balancesheet', 'incomestatement', 'cashflow']

frequency = 'quarterly'

current_rp = fetch_batch_fs(ticker_list, reports, frequency)


Error occurred while fetching Q_VVS_cashflow: File is not a zip file
Error occurred while fetching Q_VVS_incomestatement: File is not a zip file
Error occurred while fetching Q_XDC_incomestatement: File is not a zip file
Error occurred while fetching Q_HSV_incomestatement: File is not a zip file
Error occurred while fetching Q_HSV_cashflow: File is not a zip file
Error occurred while fetching Q_HSV_balancesheet: File is not a zip file
Error occurred while fetching Q_XDC_balancesheet: File is not a zip file
Error occurred while fetching Q_CST_balancesheet: File is not a zip file
Error occurred while fetching Q_VVS_balancesheet: File is not a zip file
Error occurred while fetching Q_XDC_cashflow: File is not a zip file
Error occurred while fetching Q_CST_incomestatement: File is not a zip file
Error occurred while fetching Q_BVL_incomestatement: File is not a zip file
Error occurred while fetching Q_SGI_balancesheet: File is not a zip file
Error occurred while fetching Q_SGI_incomestatem

In [None]:

dir_name = r"C:\Users\Dell\Downloads\data_analytics\financial_project\vn_quarterly_reports_v1"


def save_report(report_data, dir_name):
    for key, df in report_data.items():
        file_name = f"{key}.pkl"
        file_path = os.path.join(dir_name, file_name)
        df.to_pickle(file_path)


def load_report(dir_name):
    # Loading the DataFrames
    my_dict = {}
    for key in report_data.keys():
        file_name = f"{key}.pkl"
        file_path = os.path.join(dir_name, file_name)
        my_dict[key] = pd.read_pickle(file_path)
    return my_dict




In [None]:
save_report(current_rp, dir_name)


NameError: name 'report_data' is not defined