In [1]:
import os
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from threading import Thread


In [2]:
def format_date(date):
    return date.strftime("%m/%d/%Y")

def init_directory():
    if not os.path.exists('./data'):
        os.makedirs('./data')

BASE_URL = 'https://www.mse.mk/en/stats/symbolhistory/'
init_directory()


In [8]:
def fetch_companies():
    sample_url = 'https://www.mse.mk/en/stats/symbolhistory/KMB'
    response = requests.get(sample_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    company_names = soup.select('.form-control option')
    
    valid_company_names = [name.text for name in company_names if not any(char.isdigit() for char in name.text)]
    return valid_company_names

companies = fetch_companies()
companies


['ADIN',
 'ALK',
 'ALKB',
 'AMBR',
 'AMEH',
 'APTK',
 'ATPP',
 'AUMK',
 'BANA',
 'BGOR',
 'BIKF',
 'BIM',
 'BLTU',
 'CBNG',
 'CDHV',
 'CEVI',
 'CKB',
 'CKBKO',
 'DEBA',
 'DIMI',
 'EDST',
 'ELMA',
 'ELNC',
 'ENER',
 'ENSA',
 'EUHA',
 'EUMK',
 'EVRO',
 'FAKM',
 'FERS',
 'FKTL',
 'FROT',
 'FUBT',
 'GALE',
 'GDKM',
 'GECK',
 'GECT',
 'GIMS',
 'GRDN',
 'GRNT',
 'GRSN',
 'GRZD',
 'GTC',
 'GTRG',
 'IJUG',
 'INB',
 'INHO',
 'INOV',
 'INPR',
 'INTP',
 'JAKO',
 'JUSK',
 'KARO',
 'KDFO',
 'KJUBI',
 'KKST',
 'KLST',
 'KMB',
 'KMPR',
 'KOMU',
 'KONF',
 'KONZ',
 'KORZ',
 'KPSS',
 'KULT',
 'KVAS',
 'LAJO',
 'LHND',
 'LOTO',
 'LOZP',
 'MAGP',
 'MAKP',
 'MAKS',
 'MB',
 'MERM',
 'MKSD',
 'MLKR',
 'MODA',
 'MPOL',
 'MPT',
 'MPTE',
 'MTUR',
 'MZHE',
 'MZPU',
 'NEME',
 'NOSK',
 'OBPP',
 'OILK',
 'OKTA',
 'OMOS',
 'OPFO',
 'OPTK',
 'ORAN',
 'OSPO',
 'OTEK',
 'PELK',
 'PGGV',
 'PKB',
 'POPK',
 'PPIV',
 'PROD',
 'PROT',
 'PTRS',
 'RADE',
 'REPL',
 'RIMI',
 'RINS',
 'RZEK',
 'RZIT',
 'RZIZ',
 'RZLE',
 'RZLV',


In [3]:
def parse_cells(row):
    cells = row.find_all('td')
    
    def format_price(value):
        try:
            return "{:,.2f}".format(float(value.replace(',', '.'))).replace(',', 'X').replace('.', ',').replace('X', '.')
        except ValueError:
            return value
    
    item = {
        'Date': cells[0].text,
        'Last trade price': format_price(cells[1].text),
        'Max': format_price(cells[2].text),
        'Min': format_price(cells[3].text),
        'Avg Price': format_price(cells[4].text),
        '%chg.': cells[5].text.replace('.', ','),
        'Volume': cells[6].text,
        'TurnoverBEST_MKD': format_price(cells[7].text),
        'TotalTurnoverMKD': format_price(cells[8].text)
    }
    
    return item


In [4]:
def scrape_data_from_url(company, date_from, date_to):
    url = f"{BASE_URL}{company}?FromDate={date_from}&ToDate={date_to}" 
    response = requests.get(url, timeout=(25, 60))
    
    if response.status_code != 200:
        print(f"Failed to fetch data for {company} from {date_from} to {date_to}")
        return pd.DataFrame() 

    soup = BeautifulSoup(response.text, 'html.parser')
    rows = soup.find_all('tbody')[0].find_all('tr') if soup.find_all('tbody') else []
    data = [parse_cells(row) for row in rows]
    
    df = pd.DataFrame(data)
    return df



In [5]:
def fetch_data_for_company(company, years_back=10):
    today = datetime.today()
    end_date = format_date(today)
    start_date = format_date(today - timedelta(days=365 * years_back))
    
    company_data = []
    for year in range(years_back):
        date_to = format_date(today - timedelta(days=365 * year))
        date_from = format_date(today - timedelta(days=365 * (year + 1)))
        
        yearly_data = scrape_data_from_url(company, date_from, date_to)
        if not yearly_data.empty:
            company_data.append(yearly_data)
    
    if company_data:
        final_df = pd.concat(company_data, ignore_index=True)
        final_df.to_csv(f'./data/{company}.csv', index=False)
        print(f"Data for {company} saved to ./data/{company}.csv")
    else:
        print(f"No data collected for {company}")

fetch_data_for_company("TEL")


Data for TEL saved to ./data/TEL.csv


In [9]:
def fetch_data_for_all_companies(years_back=10):
    start_time = time.time()  
    for company in companies:
        print(f"Fetching data for {company}")
        fetch_data_for_company(company, years_back=years_back)
    end_time = time.time()  
    elapsed_time = end_time - start_time 
    print(f"Total time taken to fetch data: {elapsed_time / 60:.2f} minutes")

fetch_data_for_all_companies()


Fetching data for ADIN
Data for ADIN saved to ./data/ADIN.csv
Fetching data for ALK
Data for ALK saved to ./data/ALK.csv
Fetching data for ALKB
Data for ALKB saved to ./data/ALKB.csv
Fetching data for AMBR
Data for AMBR saved to ./data/AMBR.csv
Fetching data for AMEH
Data for AMEH saved to ./data/AMEH.csv
Fetching data for APTK
Data for APTK saved to ./data/APTK.csv
Fetching data for ATPP
Data for ATPP saved to ./data/ATPP.csv
Fetching data for AUMK
Data for AUMK saved to ./data/AUMK.csv
Fetching data for BANA
Data for BANA saved to ./data/BANA.csv
Fetching data for BGOR
Data for BGOR saved to ./data/BGOR.csv
Fetching data for BIKF
Data for BIKF saved to ./data/BIKF.csv
Fetching data for BIM
Data for BIM saved to ./data/BIM.csv
Fetching data for BLTU
Data for BLTU saved to ./data/BLTU.csv
Fetching data for CBNG
Data for CBNG saved to ./data/CBNG.csv
Fetching data for CDHV
Data for CDHV saved to ./data/CDHV.csv
Fetching data for CEVI
Data for CEVI saved to ./data/CEVI.csv
Fetching data 

Failed to fetch data for KORZ from 11/16/2019 to 11/15/2020
Failed to fetch data for KORZ from 11/16/2018 to 11/16/2019
Failed to fetch data for KORZ from 11/16/2017 to 11/16/2018
Data for KORZ saved to ./data/KORZ.csv
Fetching data for KPSS
Data for KPSS saved to ./data/KPSS.csv
Fetching data for KULT
Data for KULT saved to ./data/KULT.csv
Fetching data for KVAS
Data for KVAS saved to ./data/KVAS.csv
Fetching data for LAJO
Data for LAJO saved to ./data/LAJO.csv
Fetching data for LHND
Data for LHND saved to ./data/LHND.csv
Fetching data for LOTO
Data for LOTO saved to ./data/LOTO.csv
Fetching data for LOZP
Data for LOZP saved to ./data/LOZP.csv
Fetching data for MAGP
Failed to fetch data for MAGP from 11/15/2023 to 11/14/2024
Failed to fetch data for MAGP from 11/15/2022 to 11/15/2023
Failed to fetch data for MAGP from 11/15/2021 to 11/15/2022
Failed to fetch data for MAGP from 11/15/2020 to 11/15/2021
Failed to fetch data for MAGP from 11/16/2019 to 11/15/2020
Data for MAGP saved to .

Data for TNB saved to ./data/TNB.csv
Fetching data for TRDB
Data for TRDB saved to ./data/TRDB.csv
Fetching data for TRPS
Data for TRPS saved to ./data/TRPS.csv
Fetching data for TRUB
Data for TRUB saved to ./data/TRUB.csv
Fetching data for TSMP
Data for TSMP saved to ./data/TSMP.csv
Fetching data for TSZS
Data for TSZS saved to ./data/TSZS.csv
Fetching data for TTK
Data for TTK saved to ./data/TTK.csv
Fetching data for TTKO
Data for TTKO saved to ./data/TTKO.csv
Fetching data for UNI
Data for UNI saved to ./data/UNI.csv
Fetching data for USJE
Data for USJE saved to ./data/USJE.csv
Fetching data for VARG
Data for VARG saved to ./data/VARG.csv
Fetching data for VFPM
Data for VFPM saved to ./data/VFPM.csv
Fetching data for VITA
Data for VITA saved to ./data/VITA.csv
Fetching data for VROS
Data for VROS saved to ./data/VROS.csv
Fetching data for VSC
Data for VSC saved to ./data/VSC.csv
Fetching data for VTKS
Data for VTKS saved to ./data/VTKS.csv
Fetching data for ZAS
Data for ZAS saved t