## Import libraries

In [4]:
import math
import pandas as pd
import numpy as np
import random
import yfinance as yf
import os

import requests
from bs4 import BeautifulSoup
import re
import pathlib

## Time period

In [5]:
start_date = '2020-01-01'
end_date = '2023-12-31'

## Functions for downloading

In [9]:
def load_historical_data(tickers: list, start_date: str, end_date: str, name_folder: str) -> None:
    if not os.path.exists('../' + name_folder):
        os.makedirs('../' + name_folder)

    for ticker in tickers:
        print(f"Download historical data for {ticker}")
        data = yf.download(ticker, start=start_date, end=end_date)
        data = pd.DataFrame(data)
        data.to_csv(f'../{name_folder}/{ticker}.csv')
    print("Data loading is complete!")

def ticker_preprocessing(tickers: list, template_add: str) -> list:

    for ticker_index in range(len(tickers)):
        tickers[ticker_index] = tickers[ticker_index] + template_add

    return tickers

def get_stock_tickers(url: str, template_parsing: str) -> list:
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
    }

    response = requests.get(url, headers=headers)

    # response.raise_for_status()  
    soup = BeautifulSoup(response.text, 'html.parser')
    # print(soup)

    lines = []
    for link in soup.find_all('tr', class_='row-RdUXZpkv listRow'):
        lines.append(link)
    
    tickers = []
    for line in lines:
        html_string=str(line)
        # match = re.search(r'data-rowkey="MOEX:(.*?)"', html_string)
        match = re.search(r'data-rowkey="'+ template_parsing + r':(.*?)"', html_string)
        if match:
            tickers.append(match.group(1))
    return tickers

def get_number_stocks(folder_path: str) -> None:
    file_count = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
    print(f'Number of stocks in a folder: {file_count}')

## Downloading stock market data Brazil

In [7]:
tickers = get_stock_tickers(url = "https://ru.tradingview.com/markets/stocks-brazil/market-movers-large-cap/", 
                            template_parsing = 'BMFBOVESPA')

tickers = ticker_preprocessing(tickers = tickers,
                               template_add = '.SA')

load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'Brazil')
        
get_number_stocks(folder_path = '../Brazil')

Download historical data for PETR3.SA


[*********************100%***********************]  1 of 1 completed


Download historical data for ITUB3.SA


[*********************100%***********************]  1 of 1 completed


Download historical data for VALE3.SA


[*********************100%***********************]  1 of 1 completed

Download historical data for WEGE3.SA





KeyboardInterrupt: 

## Downloading stock market data Russia

In [41]:
tickers = get_stock_tickers(url = "https://ru.tradingview.com/markets/stocks-russia/market-movers-large-cap/", 
                            template_parsing = 'MOEX')

tickers = ticker_preprocessing(tickers = tickers,
                               template_add = '.ME')
load_historical_data(tickers = tickers,
                    start_date = start_date_,
                    end_date = end_date_,
                    name_folder = 'Russia')
        
get_number_stocks(folder_path = '../Russia')

## Downloading stock market data India

In [None]:

tickers = get_stock_tickers(url = "https://ru.tradingview.com/markets/stocks-india/market-movers-large-cap/", 
                            template_parsing = 'NSE')

tickers = ticker_preprocessing(tickers = tickers,
                               template_add = '.NS')

load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'India')
        
get_number_stocks(folder_path = '../India')

## Downloading stock market data China

In [None]:
tickers = get_stock_tickers(url = "https://ru.tradingview.com/markets/stocks-china/market-movers-large-cap/", 
                            template_parsing = 'SZSE')

tickers = ticker_preprocessing(tickers = tickers,
                               template_add = '.SZ')

load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'China')
        
get_number_stocks(folder_path = '../China')

## Downloading stock market data South Africa

In [None]:
tickers = get_stock_tickers(url = "https://ru.tradingview.com/markets/stocks-south-africa/market-movers-large-cap/", 
                            template_parsing = 'JSE')
                    
tickers = ticker_preprocessing(tickers = tickers,
                               template_add = '.JO')                            

load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'South Africa')
        
get_number_stocks(folder_path = '../South Africa')

## Downloading stock market data SP100


In [18]:
start_date = '2016-01-01'
end_date = '2019-12-31'
tickers = get_stock_tickers(url = "https://ru.tradingview.com/symbols/SP-OEX/components/", 
                            template_parsing = 'NYSE')
                    
                        
load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'SP100_1619')
        
get_number_stocks(folder_path = '../SP100_1619')


tickers = get_stock_tickers(url = "https://ru.tradingview.com/symbols/SP-OEX/components/", 
                            template_parsing = 'NASDAQ')
                    
                        
load_historical_data(tickers = tickers,
                    start_date = start_date,
                    end_date = end_date,
                    name_folder = 'SP100_1619')
        
get_number_stocks(folder_path = '../SP100_1619')


<!DOCTYPE html>

<html class="is-not-authenticated is-not-pro is-not-trial" dir="ltr" lang="ru">
<head><meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name="viewport"/> <script nonce="qKfGNuXo5h8+yZG7HKbGlg==">window.initData = {};</script>
<link href="https://notifications.tradingview.com/news/channel" rel="news-streaming-url"/><title>Индекс S&amp;P 100 – Список акций – SP:OEX — TradingView</title>
<link crossorigin="use-credentials" href="https://pricealerts.tradingview.com" rel="preconnect"/><!-- render_css_bundle('big_footer') -->
<link crossorigin="anonymous" href="https://static.tradingview.com/static/bundles/65857.1476614bdd184136d7fd.css" rel="stylesheet" type="text/css">
<link crossorigin="anonymous" href="https://static.tradingview.com/static/bundles/36010.a1a91e4e3b8d4d56af6a.css" rel="stylesheet" type="text/css">
<link crossorigin="anonymous" href="https://static.tradingview.com/static/bu

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Download historical data for LLY


[*********************100%***********************]  1 of 1 completed


Download historical data for WMT


[*********************100%***********************]  1 of 1 completed


Download historical data for JPM


[*********************100%***********************]  1 of 1 completed


Download historical data for V


[*********************100%***********************]  1 of 1 completed


Download historical data for XOM


[*********************100%***********************]  1 of 1 completed


Download historical data for UNH


[*********************100%***********************]  1 of 1 completed


Download historical data for ORCL


[*********************100%***********************]  1 of 1 completed


Download historical data for MA


[*********************100%***********************]  1 of 1 completed


Download historical data for PG


[*********************100%***********************]  1 of 1 completed


Download historical data for HD


[*********************100%***********************]  1 of 1 completed


Download historical data for JNJ


[*********************100%***********************]  1 of 1 completed


Download historical data for ABBV


[*********************100%***********************]  1 of 1 completed


Download historical data for BAC


[*********************100%***********************]  1 of 1 completed


Download historical data for CRM


[*********************100%***********************]  1 of 1 completed


Download historical data for KO


[*********************100%***********************]  1 of 1 completed


Download historical data for CVX


[*********************100%***********************]  1 of 1 completed


Download historical data for MRK


[*********************100%***********************]  1 of 1 completed


Download historical data for ACN


[*********************100%***********************]  1 of 1 completed


Download historical data for WFC


[*********************100%***********************]  1 of 1 completed


Download historical data for MCD


[*********************100%***********************]  1 of 1 completed


Download historical data for TMO


[*********************100%***********************]  1 of 1 completed


Download historical data for PM


[*********************100%***********************]  1 of 1 completed


Download historical data for ABT


[*********************100%***********************]  1 of 1 completed


Download historical data for IBM


[*********************100%***********************]  1 of 1 completed


Download historical data for MS


[*********************100%***********************]  1 of 1 completed


Download historical data for AXP


[*********************100%***********************]  1 of 1 completed


Download historical data for GE


[*********************100%***********************]  1 of 1 completed


Download historical data for CAT


[*********************100%***********************]  1 of 1 completed


Download historical data for DHR


[*********************100%***********************]  1 of 1 completed


Download historical data for DIS


[*********************100%***********************]  1 of 1 completed


Download historical data for VZ


[*********************100%***********************]  1 of 1 completed


Download historical data for GS


[*********************100%***********************]  1 of 1 completed


Download historical data for NEE


[*********************100%***********************]  1 of 1 completed


Download historical data for RTX


[*********************100%***********************]  1 of 1 completed


Download historical data for PFE


[*********************100%***********************]  1 of 1 completed


Download historical data for T


[*********************100%***********************]  1 of 1 completed


Download historical data for LOW


[*********************100%***********************]  1 of 1 completed


Download historical data for BLK


[*********************100%***********************]  1 of 1 completed


Download historical data for UNP


[*********************100%***********************]  1 of 1 completed


Download historical data for SCHW


[*********************100%***********************]  1 of 1 completed


Download historical data for LMT


[*********************100%***********************]  1 of 1 completed


Download historical data for C


[*********************100%***********************]  1 of 1 completed


Download historical data for COP


[*********************100%***********************]  1 of 1 completed


Download historical data for NKE


[*********************100%***********************]  1 of 1 completed


Download historical data for MDT


[*********************100%***********************]  1 of 1 completed


Download historical data for UPS


[*********************100%***********************]  1 of 1 completed


Download historical data for DE


[*********************100%***********************]  1 of 1 completed


Download historical data for BMY


[*********************100%***********************]  1 of 1 completed


Download historical data for AMT


[*********************100%***********************]  1 of 1 completed


Download historical data for SO


[*********************100%***********************]  1 of 1 completed


Download historical data for BA


[*********************100%***********************]  1 of 1 completed


Download historical data for DUK


[*********************100%***********************]  1 of 1 completed


Download historical data for MO


[*********************100%***********************]  1 of 1 completed


Download historical data for GD


[*********************100%***********************]  1 of 1 completed


Download historical data for CL


[*********************100%***********************]  1 of 1 completed


Download historical data for USB


[*********************100%***********************]  1 of 1 completed


Download historical data for CVS


[*********************100%***********************]  1 of 1 completed


Download historical data for MMM


[*********************100%***********************]  1 of 1 completed


Download historical data for TGT


[*********************100%***********************]  1 of 1 completed


Download historical data for FDX


[*********************100%***********************]  1 of 1 completed


Download historical data for EMR


[*********************100%***********************]  1 of 1 completed


Download historical data for COF


[*********************100%***********************]  1 of 1 completed


Download historical data for MET


[*********************100%***********************]  1 of 1 completed


Download historical data for GM


[*********************100%***********************]  1 of 1 completed


Download historical data for SPG


[*********************100%***********************]  1 of 1 completed


Download historical data for BK


[*********************100%***********************]  1 of 1 completed


Download historical data for AIG


[*********************100%***********************]  1 of 1 completed


Download historical data for F


[*********************100%***********************]  1 of 1 completed

Data loading is complete!
Number of stocks in a folder: 100





## Convert Russia data stocks


In [7]:
def rename_columns_in_files(rename_dict: dict, path: pathlib.Path) -> None:
    """
    Iterates over files in the directory and renames columns based on the provided dictionary.
    
    Parameters:
    ----------
    - rename_dict (dict): A dictionary where the keys are current column names and 
                          the values are the new column names.
    - path (Path): Path object representing the directory containing the files.
    """
    for file in path.iterdir():
        if file.is_file():
            try:
                data = pd.read_csv(file)
                data.rename(columns=rename_dict, inplace=True)
                data.to_csv(file, index=False)
            except Exception as e:
                print(f"Error processing file {file.name}: {e}")
                continue


In [56]:
def clean_numeric_string(num_str: str) -> float:
    """
    Cleans a numeric string and converts it to a float.
    
    Parameters:
    ----------
    num_str : str
        The numeric string to clean and convert.
    
    Returns:
    -------
    float
        The cleaned float value.
    """
    num_str = str(num_str)
    num_str = num_str.replace(' ', '')
    num_str = num_str.replace(',', '.')
    num_str = re.sub(r'(?<=\d)\.(?=\d{3})', '', num_str)

    return float(num_str)

def rename_and_reformat_files(path: pathlib.Path) -> None:
    """
    Iterates over files in the directory, renames columns, and reformats the data.
    
    Parameters:
    ----------
    path : pathlib.Path
        The path to the directory containing the stock data files.
    """
    for file_path in path.iterdir():  # Используем iterdir для обхода по файлам
        if file_path.is_file():  # Проверяем, является ли это файлом
            try:
                data = pd.read_csv(file_path)
                data['Close'] = data['Close'].apply(clean_numeric_string)
                data['Date'] = pd.to_datetime(data['Date'], format="%d.%m.%Y").dt.strftime('%Y-%m-%d')

                data.to_csv(file_path, index=False)  # Используем file_path для сохранения

            except Exception as e:
                print(f"Error processing file {file_path.name}: {e}")
                continue

In [None]:
rename_dict = {
               'Дата': 'Date',
               'Цена': 'Close', 
               'Откр.': 'Open', 
               'Макс.': 'High',
               'Мин.': 'Low',
               'Объём': 'Volume',
               'Изм. %':'Adj Close'
}

rename_columns_in_files(rename_dict, path = pathlib.Path('../data/DataStocks/Russia'))
rename_and_reformat_files(path = pathlib.Path('../data/DataStocks/Russia'))