In [10]:
import requests
from bs4 import BeautifulSoup

def get_issuer_codes():
    url = 'https://www.mse.mk/mk/stats/symbolhistory/REPL'  # Replace with actual URL if different
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    issuers = []
    # Assuming issuer codes are within a <select> dropdown in the HTML
    for option in soup.select('select#issuer-dropdown option'):
        code = option.get('value')
        if code and not code.isdigit():  # Skip numeric-only codes
            issuers.append(code)
    return issuers

# Test the function
issuer_codes = get_issuer_codes()
print("Issuer codes:", issuer_codes)


Issuer codes: []


In [14]:
from selenium.webdriver.support.ui import Select
from datetime import datetime

def change_company_code(driver, company):
    """Selects the company code in the dropdown menu on the webpage."""
    code_dropdown = Select(driver.find_element(By.ID, 'Code'))
    code_dropdown.select_by_value(company)

    # Set the ToDate input field to the current date
    input_to = driver.find_element(By.ID, 'ToDate')
    input_to.clear()
    input_to.send_keys(transform_date_to_string(datetime.now()))


In [11]:
from abc import ABC, abstractmethod

class Filter(ABC):
    @abstractmethod
    async def process(self, driver, data):
        """Process method to be implemented by each filter subclass."""
        pass


In [12]:
import re
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By

class CodeDownloaderFilter(Filter):
    async def process(self, driver, date):
        print("Filter 1 starting...")
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        options = soup.find(id='Code').find_all('option')
        
        for option in options:
            if not re.search(r'\d', option.text):
                date.append(option.text.strip())
                
        return driver, date[:5]  # Limit the output to 5 companies as in the example


In [13]:
import os
import pandas as pd
from datetime import datetime, timedelta

class SaveDataFilter(Filter):
    async def process(self, driver, data):
        print("Filter 2 starting...")
        dates = {}

        for company in data:
            print(f"Processing company: {company}")
            await change_company_code(driver, company)
            await change_input_values(driver, datetime.now())
            
            if check_existing_data(company):
                last_date = await self.get_last_date(company)
                dates[company] = last_date
                print(f"Last date in database for {company}: {last_date}")
            else:
                dates[company] = transform_date_to_string(datetime.now())
                await self.save_last_10_years(driver, company)
                
        return driver, dates

    async def get_last_date(self, company_name):
        path = os.path.join('..', 'database', f'{company_name}.xlsx')
        if os.path.exists(path):
            df = pd.read_excel(path)
            return df.iloc[0, 0]  # Assuming date is in the first column
        return transform_date_to_string(datetime.now())

    async def save_last_10_years(self, driver, company_name):
        for i in range(10):
            await self.change_date(driver, i)
            await click_button(driver)
            time.sleep(3)
            df = await get_df(driver)
            output_path = os.path.join('..', 'database', f'{company_name}.xlsx')
            
            if os.path.exists(output_path):
                existing_df = pd.read_excel(output_path)
                df = pd.concat([existing_df, df], ignore_index=True)
                
            df.to_excel(output_path, index=False)

    async def change_date(self, driver, year_offset):
        input_from = driver.find_element(By.ID, 'FromDate')
        input_to = driver.find_element(By.ID, 'ToDate')

        end_date = datetime.now() - timedelta(days=year_offset * 365)
        start_date = end_date - timedelta(days=365)

        input_from.clear()
        input_from.send_keys(transform_date_to_string(start_date))
        input_to.clear()
        input_to.send_keys(transform_date_to_string(end_date))
