In [1]:
import os
import re  # noqa
from calendar import monthrange
from datetime import datetime
from zipfile import BadZipFile

import pandas as pd
import requests
from bs4 import BeautifulSoup
from pandas.errors import EmptyDataError

# –ö–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è 
base_url = "https://www.nationalbank.kz"
listing_urls = [
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/2319",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/2204",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/1985",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/1907",
]

save_folder = "downloads"
os.makedirs(save_folder, exist_ok=True)
output_csv_path = os.path.join(save_folder, "changed_kredits_data.csv")
previous_version_path = os.path.join(save_folder, "previous_kredits.csv")


# –í—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏
def find_row_contains(df, keyword):
    keyword = keyword.lower().strip()
    for i, row in df.iterrows():
        if pd.notna(row.iloc[0]):
            cell = str(row.iloc[0]).lower().strip()
            if keyword in cell:
                return i
    return None


def get_value_by_keyword(df, row_keyword, col_index):
    row_idx = find_row_contains(df, row_keyword)
    if row_idx is not None:
        try:
            return df.iloc[row_idx, col_index]
        except IndexError:
            return None
    return None


def get_filename_from_cd(cd):
    if not cd: return None
    fname = re.findall('filename="(.+)"', cd)
    return fname[0] if fname else None


# 1: –°–±–æ—Ä —Å—Å—ã–ª–æ–∫ –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü—ã –æ—Ç—á–µ—Ç–æ–≤
print("üîç –®–∞–≥ 1: –°–±–æ—Ä —Å—Å—ã–ª–æ–∫...")
report_links = []
try:
    for listing_url in listing_urls:
        resp = requests.get(listing_url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        for tag in soup.find_all("a", string=lambda t: t and "–ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ" in t):
            href = tag.get("href")
            if href and href.startswith("/"):
                report_links.append((tag.text.strip(), base_url + href))
except requests.exceptions.RequestException as e:
    print(f"–ö—Ä–∏—Ç–∏—á–µ—Å–∫–∞—è –æ—à–∏–±–∫–∞ –ø—Ä–∏ —Å–±–æ—Ä–µ —Å—Å—ã–ª–æ–∫: {e}")
    exit()

if not report_links:
    raise Exception("–ù–µ—Ç –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö —Å—Å—ã–ª–æ–∫.")
print(f"üîó –ù–∞–π–¥–µ–Ω–æ —Å—Å—ã–ª–æ–∫ –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü—ã/—Ñ–∞–π–ª—ã: {len(report_links)}")

# –®–∞–≥ 2: –û–±—Ä–∞–±–æ—Ç–∫–∞ –∫–∞–∂–¥–æ–π —Å—Å—ã–ª–∫–∏ –∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö
print("\nüîÑ –®–∞–≥ 2: –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤ –∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...")
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
all_rows = []

# –ë–ª–æ–∫ –æ–±—Ä–∞–±–æ—Ç–∫–∏ —Ñ–∞–π–ª–æ–≤ (–±–µ–∑ –∏–∑–º–µ–Ω–µ–Ω–∏–π)
for title, report_url in report_links:
    print(f"--- –û–±—Ä–∞–±–æ—Ç–∫–∞: {title} ---")
    try:
        resp = requests.get(report_url, timeout=20)
        resp.raise_for_status()
        content_type = resp.headers.get('content-type', '').lower()

        file_content, file_name = None, None

        if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in content_type:
            print("   -> –û–±–Ω–∞—Ä—É–∂–µ–Ω–∞ –ø—Ä—è–º–∞—è —Å—Å—ã–ª–∫–∞ –Ω–∞ —Ñ–∞–π–ª.")
            file_content = resp.content
            file_name = get_filename_from_cd(
                resp.headers.get('content-disposition')) or f"file_{report_url.split('/')[-1]}.xlsx"
        elif 'text/html' in content_type:
            print("   -> –û–±–Ω–∞—Ä—É–∂–µ–Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–∞, –∏—â–µ–º —Å—Å—ã–ª–∫—É .xlsx...")
            details_soup = BeautifulSoup(resp.text, "html.parser")
            download_tag = details_soup.find("a", href=lambda h: h and ".xlsx" in h.lower())
            if download_tag and download_tag.get("href"):
                actual_file_url = base_url + download_tag["href"]
                file_name = actual_file_url.split("/")[-1]
                print(f"   -> –ù–∞–π–¥–µ–Ω–∞ —Å—Å—ã–ª–∫–∞ –Ω–∞ —Ñ–∞–π–ª: {actual_file_url}")
                file_resp = requests.get(actual_file_url, timeout=30)
                file_resp.raise_for_status()
                file_content = file_resp.content
            else:
                print(f"–ü—Ä–æ–ø—É—â–µ–Ω: –ù–µ –Ω–∞–π–¥–µ–Ω–∞ —Å—Å—ã–ª–∫–∞ .xlsx –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ: {report_url}")
                continue
        else:
            print(f"–ü—Ä–æ–ø—É—â–µ–Ω: –ù–µ–∏–∑–≤–µ—Å—Ç–Ω—ã–π —Ç–∏–ø –∫–æ–Ω—Ç–µ–Ω—Ç–∞ '{content_type}'")
            continue

        if file_content and file_name:
            file_path = os.path.join(save_folder, file_name)
            if not os.path.exists(file_path):
                with open(file_path, "wb") as f:
                    f.write(file_content)
                print(f"–°–∫–∞—á–∞–Ω: {file_name}")
            else:
                print(f"–£–∂–µ –µ—Å—Ç—å: {file_name}")

            try:
                xls = pd.ExcelFile(file_path, engine="openpyxl")
            except BadZipFile:
                print(f"–û—à–∏–±–∫–∞: {file_name} –Ω–µ —è–≤–ª—è–µ—Ç—Å—è –∫–æ—Ä—Ä–µ–∫—Ç–Ω—ã–º Excel —Ñ–∞–π–ª–æ–º.")
                continue

            sheet_issued = next((s for s in xls.sheet_names if "–≤—ã–¥–∞–Ω–æ" in s.lower()), None)
            sheet_rates = next((s for s in xls.sheet_names if "—Å—Ç–∞–≤–∫" in s.lower()), None)
            if not sheet_issued or not sheet_rates: continue

            df_issued = xls.parse(sheet_issued)
            df_rates = xls.parse(sheet_rates)

            headers_row = df_issued.iloc[2, 1:]
            periods = []
            for idx, val in enumerate(headers_row):
                if isinstance(val, str) and "." in val:
                    try:
                        m, y = val.split(".");
                        m, y = int(m), int("20" + y)
                        last_day = monthrange(y, m)[1]
                        full_date = f"{y}-{m:02d}-{last_day}"
                        col_nat = df_issued.columns[idx + 2]
                        col_for = df_issued.columns[idx + 3]
                        periods.append((val, full_date, col_nat, col_for))
                    except (ValueError, IndexError):
                        continue

            rate_nat_col = df_rates.columns.get_loc("Unnamed: 7")
            rate_for_col = df_rates.columns.get_loc("Unnamed: 8")
            rate_nat = get_value_by_keyword(df_rates, "–ø–æ –≤—Å–µ–º –∫—Ä–µ–¥–∏—Ç–∞–º", rate_nat_col)
            rate_for = get_value_by_keyword(df_rates, "–ø–æ –≤—Å–µ–º –∫—Ä–µ–¥–∏—Ç–∞–º", rate_for_col)

            for _, period_date, col_nat, col_for in periods:
                col_nat_idx = df_issued.columns.get_loc(col_nat)
                col_for_idx = df_issued.columns.get_loc(col_for)
                val_nat_total = get_value_by_keyword(df_issued, "–≤—Å–µ–≥–æ –∫—Ä–µ–¥–∏—Ç—ã –≤—ã–¥–∞–Ω–Ω—ã–µ", col_nat_idx) or 0
                val_for_total = get_value_by_keyword(df_issued, "–≤—Å–µ–≥–æ –∫—Ä–µ–¥–∏—Ç—ã –≤—ã–¥–∞–Ω–Ω—ã–µ", col_for_idx) or 0
                mapping = {
                    1: ("–í—Å–µ–≥–æ", val_nat_total + val_for_total),
                    2: ("–í—Å–µ–≥–æ –≤ –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ", val_nat_total),
                    3: ("–í—Å–µ–≥–æ –≤ –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ", val_for_total),
                    4: ("–í –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ, –º–∞–ª–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º –º–∞–ª–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                    5: ("–í –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ, —Å—Ä–µ–¥–Ω–µ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º —Å—Ä–µ–¥–Ω–µ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                    6: ("–í –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ, –∫—Ä—É–ø–Ω–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º –∫—Ä—É–ø–Ω–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                    7: ("–í –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ, –º–∞–ª–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º –º–∞–ª–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
                    8: ("–í –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ, —Å—Ä–µ–¥–Ω–µ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º —Å—Ä–µ–¥–Ω–µ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
                    9: ("–í –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ, –∫—Ä—É–ø–Ω–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                        get_value_by_keyword(df_issued, "—Å—É–±—ä–µ–∫—Ç–∞–º –∫—Ä—É–ø–Ω–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
                }
                for type_id, (desc, value) in mapping.items():
                    if value is None or not pd.notna(value): continue
                    rate = None
                    if type_id in [2, 4, 5, 6]:
                        rate = rate_nat
                    elif type_id in [3, 7, 8, 9]:
                        rate = rate_for
                    all_rows.append({
                        "LOAD_DATE": timestamp,
                        "PACKAGE_ID": 1,
                        "TYPE": type_id,
                        "TYPE_DESCRIPTION": desc,
                        "ISSUED_MONTH_KZT": float(value),
                        "RATE_PERCENTAGE": float(rate) if rate is not None and pd.notna(rate) else None,
                        "PERIOD": period_date,
                    })

    except requests.exceptions.RequestException as e:
        print(f"–û—à–∏–±–∫–∞ —Å–µ—Ç–∏ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {report_url}: {e}")
    except Exception as e:
        import traceback

        print(f"–ù–µ–ø—Ä–µ–¥–≤–∏–¥–µ–Ω–Ω–∞—è –æ—à–∏–±–∫–∞ –¥–ª—è '{title}': {e}")
        traceback.print_exc()

# –®–∞–≥ 3: –°—Ä–∞–≤–Ω–µ–Ω–∏–µ –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ
print("\n–®–∞–≥ 3: –°—Ä–∞–≤–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤...")

if not all_rows:
    print("–ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è. –ó–∞–≤–µ—Ä—à–µ–Ω–∏–µ —Ä–∞–±–æ—Ç—ã.")
    exit()


current_df = pd.DataFrame(all_rows).drop_duplicates(subset=['PERIOD', 'TYPE'], keep='last')

perform_comparison = False
prev_df = pd.DataFrame()  # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç–æ–π DataFrame –Ω–∞ —Å–ª—É—á–∞–π

if os.path.exists(previous_version_path):
    try:
        # –ü—ã—Ç–∞–µ–º—Å—è –ø—Ä–æ—á–∏—Ç–∞—Ç—å —Ñ–∞–π–ª
        prev_df = pd.read_csv(previous_version_path)
        if not prev_df.empty:
            # –ï—Å–ª–∏ —Ñ–∞–π–ª –ø—Ä–æ—á–∏—Ç–∞–Ω –∏ –æ–Ω –Ω–µ –ø—É—Å—Ç–æ–π, –±—É–¥–µ–º —Å—Ä–∞–≤–Ω–∏–≤–∞—Ç—å
            perform_comparison = True
            print("–ù–∞–π–¥–µ–Ω —Ñ–∞–π–ª –ø—Ä–µ–¥—ã–¥—É—â–µ–π –≤–µ—Ä—Å–∏–∏. –°—Ä–∞–≤–Ω–∏–≤–∞–µ–º...")
        else:
            # –§–∞–π–ª —Å—É—â–µ—Å—Ç–≤—É–µ—Ç, –Ω–æ –ø—É—Å—Ç–æ–π
            print("–§–∞–π–ª –ø—Ä–µ–¥—ã–¥—É—â–µ–π –≤–µ—Ä—Å–∏–∏ –ø—É—Å—Ç. –í—Å–µ —Ç–µ–∫—É—â–∏–µ —Å—Ç—Ä–æ–∫–∏ —Å—á–∏—Ç–∞—é—Ç—Å—è –Ω–æ–≤—ã–º–∏.")

    except EmptyDataError:
        # –§–∞–π–ª —Å—É—â–µ—Å—Ç–≤—É–µ—Ç, –Ω–æ –µ–≥–æ –Ω–µ —É–¥–∞–ª–æ—Å—å –ø—Ä–æ—á–∏—Ç–∞—Ç—å (–æ–Ω –ø—É—Å—Ç–æ–π)
        print("–§–∞–π–ª –ø—Ä–µ–¥—ã–¥—É—â–µ–π –≤–µ—Ä—Å–∏–∏ –ø—É—Å—Ç (–æ—à–∏–±–∫–∞ —á—Ç–µ–Ω–∏—è). –í—Å–µ —Ç–µ–∫—É—â–∏–µ —Å—Ç—Ä–æ–∫–∏ —Å—á–∏—Ç–∞—é—Ç—Å—è –Ω–æ–≤—ã–º–∏.")
else:
    # –§–∞–π–ª–∞ –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç
    print("–§–∞–π–ª –ø—Ä–µ–¥—ã–¥—É—â–µ–π –≤–µ—Ä—Å–∏–∏ –Ω–µ –Ω–∞–π–¥–µ–Ω. –í—Å–µ —Ç–µ–∫—É—â–∏–µ —Å—Ç—Ä–æ–∫–∏ —Å—á–∏—Ç–∞—é—Ç—Å—è –Ω–æ–≤—ã–º–∏.")

if perform_comparison:
    # --- –õ–æ–≥–∏–∫–∞ –¥–µ—Ç–∞–ª—å–Ω–æ–≥–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—è, –µ—Å–ª–∏ –ø—Ä–µ–¥—ã–¥—É—â–∏–π —Ñ–∞–π–ª —É—Å–ø–µ—à–Ω–æ –∑–∞–≥—Ä—É–∂–µ–Ω ---
    current_df['PERIOD'] = pd.to_datetime(current_df['PERIOD'])
    prev_df['PERIOD'] = pd.to_datetime(prev_df['PERIOD'])

    current_df.set_index(['PERIOD', 'TYPE'], inplace=True)
    prev_df.set_index(['PERIOD', 'TYPE'], inplace=True)

    new_indices = current_df.index.difference(prev_df.index)
    new_rows = current_df.loc[new_indices].reset_index()
    print(f"–ù–∞–π–¥–µ–Ω–æ –Ω–æ–≤—ã—Ö —Å—Ç—Ä–æ–∫: {len(new_rows)}")

    common_indices = current_df.index.intersection(prev_df.index)
    current_common = current_df.loc[common_indices]
    prev_common = prev_df.loc[common_indices]

    compare_cols = ['ISSUED_MONTH_KZT', 'RATE_PERCENTAGE']
    is_changed = (current_common[compare_cols].round(2)).ne(prev_common[compare_cols].round(2)).any(axis=1)

    changed_indices = is_changed[is_changed].index
    updated_rows = current_common.loc[changed_indices].reset_index()
    print(f"–ù–∞–π–¥–µ–Ω–æ –∏–∑–º–µ–Ω–µ–Ω–Ω—ã—Ö —Å—Ç—Ä–æ–∫: {len(updated_rows)}")

    final_changed_df = pd.concat([new_rows, updated_rows], ignore_index=True)
else:
    # –ï—Å–ª–∏ —Å—Ä–∞–≤–Ω–µ–Ω–∏–µ –Ω–µ —Ç—Ä–µ–±—É–µ—Ç—Å—è, –≤—Å–µ —Ç–µ–∫—É—â–∏–µ —Å—Ç—Ä–æ–∫–∏ - –Ω–æ–≤—ã–µ
    final_changed_df = current_df.copy()

# === –®–∞–≥ 4: –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ ===
print(f"\n–ò—Ç–æ–≥: {len(final_changed_df)} –Ω–æ–≤—ã—Ö –∏–ª–∏ –∏–∑–º–µ–Ω–µ–Ω–Ω—ã—Ö —Å—Ç—Ä–æ–∫ –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è.")

final_changed_df.to_csv(output_csv_path, index=False, encoding="utf-8-sig")
print(f"–ù–æ–≤—ã–µ/–∏–∑–º–µ–Ω–µ–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤: {output_csv_path}")

current_df.to_csv(previous_version_path, index=False, encoding="utf-8-sig")
print(f"–ü–æ–ª–Ω–∞—è —Ç–µ–∫—É—â–∞—è –≤–µ—Ä—Å–∏—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞ –¥–ª—è –±—É–¥—É—â–∏—Ö —Å—Ä–∞–≤–Ω–µ–Ω–∏–π –≤: {previous_version_path}")


üîç –®–∞–≥ 1: –°–±–æ—Ä —Å—Å—ã–ª–æ–∫...
üîó –ù–∞–π–¥–µ–Ω–æ —Å—Å—ã–ª–æ–∫ –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü—ã/—Ñ–∞–π–ª—ã: 16

üîÑ –®–∞–≥ 2: –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤ –∏ –∏–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ ---
   -> –û–±–Ω–∞—Ä—É–∂–µ–Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–∞, –∏—â–µ–º —Å—Å—ã–ª–∫—É .xlsx...
–ü—Ä–æ–ø—É—â–µ–Ω: –ù–µ –Ω–∞–π–¥–µ–Ω–∞ —Å—Å—ã–ª–∫–∞ .xlsx –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ: https://www.nationalbank.kz/ru/news/kredity-bankovskogo-sektora-ekonomike
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
   -> –û–±–Ω–∞—Ä—É–∂–µ–Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–∞, –∏—â–µ–º —Å—Å—ã–ª–∫—É .xlsx...
–ü—Ä–æ–ø—É—â–µ–Ω: –ù–µ –Ω–∞–π–¥–µ–Ω–∞ —Å—Å—ã–ª–∫–∞ .xlsx –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ: https://www.nationalbank.kz/ru/news/banking-sector-loans-to-economy-analytics
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ

KeyError: 'PERIOD'

In [2]:
import os
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
from calendar import monthrange
from pandas.errors import EmptyDataError
import vertica_python

# –ö–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è
base_url = "https://www.nationalbank.kz"
listing_urls = [
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/2319",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/2204",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/1985",
    f"{base_url}/ru/news/banking-sector-loans-to-economy-analytics/rubrics/1907",
]
save_folder = "downloads"
os.makedirs(save_folder, exist_ok=True)

# --- –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ Vertica ---
VERTICA_CONN_INFO = {
    'host': '10.7.7.231',
    'port': 5433,
    'user': '',
    'password': '',
    'database': 'baiterek',
    'tlsmode': 'disable',
    'autocommit': True
}
TABLE_NAME = "SANDBOX.D_LENDING_TOTAL_BVU_RK"


# --- –í—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω—ã–µ —Ñ—É–Ω–∫—Ü–∏–∏ ---
def find_row_contains(df, keyword):
    keyword = keyword.lower().strip()
    for i, row in df.iterrows():
        if pd.notna(row.iloc[0]):
            cell = str(row.iloc[0]).lower().strip()
            if keyword in cell:
                return i
    return None


def get_value_by_keyword(df, row_keyword, col_index):
    row_idx = find_row_contains(df, row_keyword)
    if row_idx is not None:
        try:
            return df.iloc[row_idx, col_index]
        except IndexError:
            return None
    return None


def get_filename_from_cd(cd):
    if not cd: return None
    fname = re.findall('filename="(.+)"', cd)
    return fname[0] if fname else None


# --- –ü–æ–ª—É—á–µ–Ω–∏–µ –Ω–æ–≤–æ–≥–æ PACKAGE_ID ---
with vertica_python.connect(**VERTICA_CONN_INFO) as conn:
    cursor = conn.cursor()
    cursor.execute(f"SELECT COALESCE(MAX(PACKAGE_ID), 0) FROM {TABLE_NAME}")
    max_package_id = cursor.fetchone()[0]
    PACKAGE_ID = max_package_id + 1
    print(f"–ù–æ–≤—ã–π PACKAGE_ID: {PACKAGE_ID}")

# –°–±–æ—Ä —Å—Å—ã–ª–æ–∫ 
print("–®–∞–≥ 1: –°–±–æ—Ä —Å—Å—ã–ª–æ–∫...")
report_links = []
for listing_url in listing_urls:
    try:
        resp = requests.get(listing_url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        for tag in soup.find_all("a", string=lambda t: t and "–ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ" in t):
            href = tag.get("href")
            if href and href.startswith("/"):
                report_links.append((tag.text.strip(), base_url + href))
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –∑–∞–≥—Ä—É–∑–∫–µ {listing_url}: {e}")

if not report_links:
    raise Exception("–ù–µ—Ç –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö —Å—Å—ã–ª–æ–∫.")
print(f"–ù–∞–π–¥–µ–Ω–æ —Å—Å—ã–ª–æ–∫: {len(report_links)}")

# –ò–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö 
print("\n–®–∞–≥ 2: –ò–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...")
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
all_rows = []

for title, report_url in report_links:
    print(f"--- –û–±—Ä–∞–±–æ—Ç–∫–∞: {title} ---")
    try:
        resp = requests.get(report_url, timeout=20)
        resp.raise_for_status()
        content_type = resp.headers.get('content-type', '').lower()

        file_content = None
        if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in content_type:
            file_content = resp.content
        elif 'text/html' in content_type:
            soup = BeautifulSoup(resp.text, "html.parser")
            tag = soup.find("a", href=lambda h: h and ".xlsx" in h.lower())
            if tag:
                actual_file_url = base_url + tag['href']
                file_resp = requests.get(actual_file_url, timeout=30)
                file_resp.raise_for_status()
                file_content = file_resp.content
            else:
                print("XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.")
                continue
        else:
            print(f"–ù–µ–∏–∑–≤–µ—Å—Ç–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç –∫–æ–Ω—Ç–µ–Ω—Ç–∞: {content_type}")
            continue

        xls = pd.ExcelFile(file_content, engine="openpyxl")
        sheet_issued = next((s for s in xls.sheet_names if "–≤—ã–¥–∞–Ω–æ" in s.lower()), None)
        sheet_rates = next((s for s in xls.sheet_names if "—Å—Ç–∞–≤–∫" in s.lower()), None)
        if not sheet_issued or not sheet_rates:
            continue

        df_issued = xls.parse(sheet_issued)
        df_rates = xls.parse(sheet_rates)

        headers_row = df_issued.iloc[2, 1:]
        periods = []
        for idx, val in enumerate(headers_row):
            if isinstance(val, str) and "." in val:
                try:
                    m, y = val.split(".");
                    m, y = int(m), int("20" + y)
                    last_day = monthrange(y, m)[1]
                    full_date = f"{y}-{m:02d}-{last_day}"
                    col_nat = df_issued.columns[idx + 2]
                    col_for = df_issued.columns[idx + 3]
                    periods.append((val, full_date, col_nat, col_for))
                except:
                    continue

        rate_nat_col = df_rates.columns.get_loc("Unnamed: 7")
        rate_for_col = df_rates.columns.get_loc("Unnamed: 8")
        rate_nat = get_value_by_keyword(df_rates, "–ø–æ –≤—Å–µ–º –∫—Ä–µ–¥–∏—Ç–∞–º", rate_nat_col)
        rate_for = get_value_by_keyword(df_rates, "–ø–æ –≤—Å–µ–º –∫—Ä–µ–¥–∏—Ç–∞–º", rate_for_col)

        for _, period_date, col_nat, col_for in periods:
            col_nat_idx = df_issued.columns.get_loc(col_nat)
            col_for_idx = df_issued.columns.get_loc(col_for)
            val_nat_total = get_value_by_keyword(df_issued, "–≤—Å–µ–≥–æ –∫—Ä–µ–¥–∏—Ç—ã –≤—ã–¥–∞–Ω–Ω—ã–µ", col_nat_idx) or 0
            val_for_total = get_value_by_keyword(df_issued, "–≤—Å–µ–≥–æ –∫—Ä–µ–¥–∏—Ç—ã –≤—ã–¥–∞–Ω–Ω—ã–µ", col_for_idx) or 0
            mapping = {
                1: ("–í—Å–µ–≥–æ", val_nat_total + val_for_total),
                2: ("–í—Å–µ–≥–æ –≤ –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ", val_nat_total),
                3: ("–í—Å–µ–≥–æ –≤ –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ", val_for_total),
                4: ("–í –Ω–∞—Ü. –≤–∞–ª—é—Ç–µ, –º–∞–ª–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "–º–∞–ª–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                5: ("–í –Ω–∞—Ü. –≤–∞–ª—é—Ç–µ, —Å—Ä–µ–¥–Ω–µ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "—Å—Ä–µ–¥–Ω–µ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                6: ("–í –Ω–∞—Ü. –≤–∞–ª—é—Ç–µ, –∫—Ä—É–ø–Ω–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "–∫—Ä—É–ø–Ω–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_nat_idx)),
                7: ("–í –∏–Ω. –≤–∞–ª—é—Ç–µ, –º–∞–ª–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "–º–∞–ª–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
                8: ("–í –∏–Ω. –≤–∞–ª—é—Ç–µ, —Å—Ä–µ–¥–Ω–µ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "—Å—Ä–µ–¥–Ω–µ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
                9: ("–í –∏–Ω. –≤–∞–ª—é—Ç–µ, –∫—Ä—É–ø–Ω–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–æ",
                    get_value_by_keyword(df_issued, "–∫—Ä—É–ø–Ω–æ–≥–æ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—Ç–≤–∞", col_for_idx)),
            }
            for type_id, (desc, value) in mapping.items():
                if value is None or not pd.notna(value): continue
                rate = None
                if type_id in [2, 4, 5, 6]:
                    rate = rate_nat
                elif type_id in [3, 7, 8, 9]:
                    rate = rate_for
                all_rows.append({
                    "LOAD_DATE": timestamp,
                    "PACKAGE_ID": PACKAGE_ID,
                    "TYPE": type_id,
                    "TYPE_DESCRIPTION": desc,
                    "ISSUED_MONTH_KZT": float(value),
                    "RATE_PERCENTAGE": float(rate) if rate is not None and pd.notna(rate) else None,
                    "PERIOD": period_date
                })

    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ '{title}': {e}")

#–®–∞–≥ 3: –í—ã–≥—Ä—É–∑–∫–∞ –≤ –≤–∏—Ç—Ä–∏–Ω—É
print("\n –®–∞–≥ 3: –ó–∞–≥—Ä—É–∑–∫–∞ –≤ Vertica...")
df = pd.DataFrame(all_rows)
df.drop_duplicates(subset=["PERIOD", "TYPE"], inplace=True)

insert_query = f"""
INSERT INTO {TABLE_NAME} (
    LOAD_DATE,
    PACKAGE_ID,
    TYPE,
    TYPE_DESCRIPTION,
    ISSUED_MONTH_KZT,
    RATE_PERCENTAGE,
    PERIOD
) VALUES (:LOAD_DATE, :PACKAGE_ID, :TYPE, :TYPE_DESCRIPTION, :ISSUED_MONTH_KZT, :RATE_PERCENTAGE, :PERIOD)
"""

# –ü—Ä–µ–æ–±—Ä–∞–∑—É–µ–º PERIOD –≤ —Å—Ç—Ä–æ–∫—É
df["PERIOD"] = df["PERIOD"].astype(str)

# –ó–∞–º–µ–Ω—è–µ–º NaN –Ω–∞ None
df = df.where(pd.notnull(df), None)
print(df)

with vertica_python.connect(**VERTICA_CONN_INFO) as conn:
    cursor = conn.cursor()
    for record in df.to_dict(orient="records"):
        cursor.execute(insert_query, record)
    conn.commit()
    print(f"–£—Å–ø–µ—à–Ω–æ –∑–∞–≥—Ä—É–∂–µ–Ω–æ {len(df)} —Å—Ç—Ä–æ–∫.")

–®–∞–≥ 1: –°–±–æ—Ä —Å—Å—ã–ª–æ–∫...
–ù–∞–π–¥–µ–Ω–æ —Å—Å—ã–ª–æ–∫: 16

–®–∞–≥ 2: –ò–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ, —è–Ω–≤–∞—Ä—å-–∞–ø—Ä–µ–ª—å 2025 –≥. (–æ—Å—Ç–∞—Ç–æ–∫ –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏, –æ–±—ä–µ–º –≤—ã–¥–∞—á–∏, —Å—Ç–∞–≤–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –ø–æ –≤—ã–¥–∞–Ω–Ω—ã–º –∫—Ä–µ–¥–∏—Ç–∞–º, –ø—Ä–æ—Å—Ä

  xls = pd.ExcelFile(file_content, engine="openpyxl")


--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ, 2024 –≥. (–æ—Å—Ç–∞—Ç–æ–∫ –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏, –æ–±—ä–µ–º –≤—ã–¥–∞—á–∏, —Å—Ç–∞–≤–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –ø–æ –≤—ã–¥–∞–Ω–Ω—ã–º –∫—Ä–µ–¥–∏—Ç–∞–º, –ø—Ä–æ—Å—Ä–æ—á–µ–Ω–Ω–∞—è –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç—å) ---


  xls = pd.ExcelFile(file_content, engine="openpyxl")


--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ, 2023–≥. (–æ—Å—Ç–∞—Ç–æ–∫ –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏, –æ–±—ä–µ–º –≤—ã–¥–∞—á–∏, —Å—Ç–∞–≤–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –ø–æ –≤—ã–¥–∞–Ω–Ω—ã–º –∫—Ä–µ–¥–∏—Ç–∞–º, –ø—Ä–æ—Å—Ä–æ—á–µ–Ω–Ω–∞—è –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç—å) ---


  xls = pd.ExcelFile(file_content, engine="openpyxl")


--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ (–∞–Ω–∞–ª–∏—Ç–∏—á–µ—Å–∫–æ–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–∏–µ) ---
XLSX-—Ñ–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –Ω–∞ HTML-—Å—Ç—Ä–∞–Ω–∏—Ü–µ.
--- –û–±—Ä–∞–±–æ—Ç–∫–∞: –ö—Ä–µ–¥–∏—Ç—ã –±–∞–Ω–∫–æ–≤—Å–∫–æ–≥–æ —Å–µ–∫—Ç–æ—Ä–∞ —ç–∫–æ–Ω–æ–º–∏–∫–µ, 2022 –≥. (–æ—Å—Ç–∞—Ç–æ–∫ –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏, –æ–±—ä–µ–º –≤—ã–¥–∞—á–∏, —Å—Ç–∞–≤–∫–∏ –≤–æ–∑–Ω–∞–≥—Ä–∞–∂–¥–µ–Ω–∏—è –ø–æ –≤—ã–¥–∞–Ω–Ω—ã–º –∫—Ä–µ–¥–∏—Ç–∞–º, –ø—Ä–æ—Å—Ä–æ—á–µ–Ω–Ω–∞—è –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç—å) ---


  xls = pd.ExcelFile(file_content, engine="openpyxl")



 –®–∞–≥ 3: –ó–∞–≥—Ä—É–∑–∫–∞ –≤ Vertica...
               LOAD_DATE  PACKAGE_ID  TYPE  \
0    2025-06-21 15:51:23           1     1   
1    2025-06-21 15:51:23           1     2   
2    2025-06-21 15:51:23           1     3   
3    2025-06-21 15:51:23           1     4   
4    2025-06-21 15:51:23           1     5   
..                   ...         ...   ...   
328  2025-06-21 15:51:23           1     5   
329  2025-06-21 15:51:23           1     6   
330  2025-06-21 15:51:23           1     7   
331  2025-06-21 15:51:23           1     8   
332  2025-06-21 15:51:23           1     9   

                               TYPE_DESCRIPTION  ISSUED_MONTH_KZT  \
0                                         –í—Å–µ–≥–æ      2.374352e+06   
1                   –í—Å–µ–≥–æ –≤ –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω–æ–π –≤–∞–ª—é—Ç–µ      2.217012e+06   
2                    –í—Å–µ–≥–æ –≤ –∏–Ω–æ—Å—Ç—Ä–∞–Ω–Ω–æ–π –≤–∞–ª—é—Ç–µ      1.573400e+05   
3      –í –Ω–∞—Ü. –≤–∞–ª—é—Ç–µ, –º–∞–ª–æ–µ –ø—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å—

KeyboardInterrupt: 